diff --git a/.gitattributes b/.gitattributes index e54b439bff318788c17727816f1771c271584d79..f05d2693f734810f52cf654efc52e63b95099f1c 100644 --- a/.gitattributes +++ b/.gitattributes @@ -8717,3 +8717,248 @@ neuronxcc-2.17.194.0+d312836f/MODULE_733b16d0b3a2314f4e14+165e9558/wrapped_neff. neuronxcc-2.17.194.0+d312836f/MODULE_37e0946a0c24edac473c+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text neuronxcc-2.17.194.0+d312836f/MODULE_37e0946a0c24edac473c+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text neuronxcc-2.17.194.0+d312836f/MODULE_431f4323665d4b4d39f0+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_014569533dcbafbc3ea9+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_01f5b55fd0ce0ced4fe2+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_0266cb26c6deb0adcd96+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_02f045f6902463c49bce+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_036bcd15be933a0d28e7+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_05ffd8d270e309fd1907+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_061fdc842caa70a3fe1f+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_0ae7505139b34db6da76+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_0bbe60dde8eaacbc8218+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_0bbe60dde8eaacbc8218+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_1215feca19e3858f9ef6+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_12343f539f64e8427d30+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_1250cbb754ea34b4df39+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_1250cbb754ea34b4df39+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_13aef406efd76bc89e83+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_14073ca9fb30b9c830ff+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_14073ca9fb30b9c830ff+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_14e06fee0b82efd1d1ac+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_14e06fee0b82efd1d1ac+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_154b94d9a246be73fae7+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_154b94d9a246be73fae7+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_17d13efb22a842d128de+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_17d13efb22a842d128de+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_1bce819dd943e86ebacb+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_1cadac86f33fc48d4ed3+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_1e78bd200a100a9daaf0+26ac6be0/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_1ed7027480f23aac5a36+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_1fc614255e6a9d7b109d+841d78e1/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_20a8ad605d63a097ec04+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_2295a69ee277ef5b31cd+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_2295a69ee277ef5b31cd+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_2305ba70fd0a2832ceca+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_23f5615c0d551c1e6267+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_26b84de33e6524d9507a+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_28996c645289206ff473+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_29f24c5cd251b3858465+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_29f24c5cd251b3858465+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_2c2bde636c6bfd7ff3d2+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_2c3058cc319329755e86+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_2c3058cc319329755e86+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_2d5d17bc5aed8b62d1bc+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_2d5d17bc5aed8b62d1bc+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_2e1c23833792efa0f9c8+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_2e1c23833792efa0f9c8+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_30c8e5dffb371f5a2fc0+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_30c8e5dffb371f5a2fc0+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_3192063576450b7b66df+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_3192063576450b7b66df+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_32582ba4c0f78135ecd9+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_32582ba4c0f78135ecd9+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_357d74bcc250c7615bf7+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_3596a0566dd0e4bfadf9+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_3596a0566dd0e4bfadf9+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_37bbedb3065730ed804d+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_37bbedb3065730ed804d+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_389d56c4ece62e7f27bf+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_39acf6f0bda0ded27c43+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_3b4391082ea94bb985dd+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_3c6243e358220353ba94+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_3da0cc0b0cb73796573e+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_3da0cc0b0cb73796573e+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_41612d58bf9ba6f268b0+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_44e4fd5eeb6561e287c8+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_462f82cae5a4b2e24531+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_462f82cae5a4b2e24531+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_46b9d2bfbdf1b2752484+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_47164c525d06157b6333+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_47e3db014921f702834c+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_47e3db014921f702834c+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_4a2733ad0c48499b3cb1+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_4e28e03424738806f8ca+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_506394735e22714a6fbc+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_5147f0f8001e6350b5d5+bfc62e4c/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_5147f0f8001e6350b5d5+bfc62e4c/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_5233b5f6cf574796f38a+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_53400f8937ebf3494e5f+26ac6be0/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_537ba05ff4cad22bf765+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_5459a61db398caa4e50a+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_57517a23a64ac02bfe43+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_575f797c9c6fc13486af+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_575f797c9c6fc13486af+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_589baa5ead70f9ec464a+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_58ceff19c65e10158e3f+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_58ceff19c65e10158e3f+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_5995189e21fa80e009d5+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_5a10198534c5f2725fd7+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_5a10198534c5f2725fd7+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_5dadd23af77b433b987a+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_5e3af8e039ed38dc1451+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_5fa284430c100416769a+26ac6be0/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_616ec6966b1c927020d5+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_616ec6966b1c927020d5+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_619d574d0a0470964273+bfc62e4c/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_619d574d0a0470964273+bfc62e4c/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_634439d56d5bb67e9812+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_634439d56d5bb67e9812+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_6557b82d7b377cd89bc7+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_6557b82d7b377cd89bc7+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_685829f3c21ea0a6ae49+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_685829f3c21ea0a6ae49+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_6ae2ede62d472bc6d7f6+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_6c8ee4ae1f75b2b77a4b+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_6e790833dac8835aeef7+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_6e790833dac8835aeef7+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_6eaa88a4e11b815b0091+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_71d753ec4c8142531990+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_72055a9b8fe0e4cf6282+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_72055a9b8fe0e4cf6282+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_720ca15364a9dc388010+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_720ca15364a9dc388010+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_72281753d7f57952bfe4+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_72281753d7f57952bfe4+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_79528694e33b295d9797+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_79528694e33b295d9797+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_7a301946be6cceeea745+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_7af318ed51d57f96cca6+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_7b5cd8868740b1a8ff8e+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_7b5cd8868740b1a8ff8e+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_7d17be30edb500bd0e79+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_7d97d3b9bee047397c87+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_7d97d3b9bee047397c87+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_7ef58b1e1f63c382674a+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_801af15b5001c885c608+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_801af15b5001c885c608+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_804dc8509b11b248fd01+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_8118fc175fd139050980+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_857bfa5db7b34c9a9421+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_857bfa5db7b34c9a9421+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_86c70bcc620d1fdc9c0e+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_86c70bcc620d1fdc9c0e+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_89c0bf163fa9c488f5fd+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_8a721d51fb81f729b755+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_8fa8f6042a6e8b712a5e+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_94ba84f59b341a98c066+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_978d77f879e10731219e+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_978d77f879e10731219e+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_9840ca94af4106910064+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_9c3b0ea1f43b9125df5e+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_9c3b0ea1f43b9125df5e+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_9ceca1f24a9c467682e4+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_9ceca1f24a9c467682e4+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_9e250fde8dcb316efa79+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_a113aac4f960a5da051f+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_a196e11ffd0a26eca3b0+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_a196e11ffd0a26eca3b0+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_a24e3ed896dae389d4f2+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_a4b551d188be0a834e52+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_a4b551d188be0a834e52+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_a4ec5c639ea6c2922365+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_a4ec5c639ea6c2922365+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_a57600b672ea2ee05d8c+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_a57600b672ea2ee05d8c+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_a75c2e49085ba31c8fd6+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_a75c2e49085ba31c8fd6+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_a87a55960cced183cfcf+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_a87a55960cced183cfcf+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_aa05d84ecf9b27f81079+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_ab079fbd60921c54cb87+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_ab111b44d64c2153350e+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_ac7123306d7d1dd877bc+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_ad1c55475518c7abfe46+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_b0335000c191d09c6572+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_b202d269501d3ed0b483+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_b202d269501d3ed0b483+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_b254587b1ec9eec17e0e+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_b254587b1ec9eec17e0e+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_b5cb392eb50260fa24a3+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_b73c22227b6353be7e03+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_b7c36bb23a045298987b+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_b8568f949badf968f33c+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_ba9e5f7f86364cb7b38d+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_ba9e5f7f86364cb7b38d+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_bc0dc6318052d18d4f59+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_bc0dc6318052d18d4f59+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_bd7733fff1675159a47f+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_bde7c6a8467969595669+26ac6be0/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_bebc77fa7efe716566fe+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_bebc77fa7efe716566fe+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_c0cc3ee540cc1113e295+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_c0cc3ee540cc1113e295+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_c1254120be09b5712a5f+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_c1254120be09b5712a5f+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_c1a28b35c9ef3d5e39c4+bfc62e4c/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_c1a28b35c9ef3d5e39c4+bfc62e4c/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_c2a8fd6e4e6bd9ef3682+bfc62e4c/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_c2a8fd6e4e6bd9ef3682+bfc62e4c/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_c36623907d50079ba312+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_c3dda8a57464a1f8b5f3+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_c53f552d07020ec60927+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_c6a4c0cb35e7a906a9c2+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_c7b34d5943a8e162e192+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_c8004cdbb3824a472562+26ac6be0/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_cb76f3926e2853557294+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_cb76f3926e2853557294+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_d1e5616a8086d5b15822+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_d2787d322069585a8ab7+26ac6be0/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_d2f3b0359d66bc4ef0fc+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_d3986b7b5efee5f065f4+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_d7c9bb41f28fb7099dfb+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_d7e2548756fae2419754+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_d7e2548756fae2419754+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_d8f0f9b8ac103cce2472+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_d8f0f9b8ac103cce2472+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_dbfa93fc61816a97047d+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_dc869e91ce0855da0223+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_de463290b0cc81f3e50a+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_de7020db3ad42153d0a9+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_de7020db3ad42153d0a9+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_dea3fa0fa1232db56e94+26ac6be0/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_ded5e72f03277b22e294+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_ded5e72f03277b22e294+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_df8cbade8e779ffc5703+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_e00ea95e26db168f8dea+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_e00ea95e26db168f8dea+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_e1ae3c47bc7fd4336c49+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_e5f864ff8f5ba3001616+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_e5f864ff8f5ba3001616+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_e97fb518c22ed7e2515d+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_eb323409b9b19da08484+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_eb928a784f0138529be9+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_eb928a784f0138529be9+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_ec98086d86f184f276f1+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_ec98086d86f184f276f1+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_ecf9cfe0aa34e5b00f85+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_efd8799e097b083a4ed2+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_efd8799e097b083a4ed2+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_f03aeb88217270f42f42+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_f03aeb88217270f42f42+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_f3e8207126f92d912816+26ac6be0/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_f55bef5a76fbf9bf2649+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_f76426f46b3b076a97d3+26ac6be0/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_f84068611c70eb3c41a2+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_f84b9ea570c171f5a5fd+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_f84b9ea570c171f5a5fd+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_f86c96f7ee62c6431f74+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_fa033bd8b80a68e8d242+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_fa033bd8b80a68e8d242+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_fb1bf0457a26706fb5d1+bfc62e4c/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_fb1bf0457a26706fb5d1+bfc62e4c/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_fc12e674baf93fa7e151+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_fc2afbd3a188bc24f65d+bfc62e4c/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_fc2afbd3a188bc24f65d+bfc62e4c/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_fe2b7cb58538780206c1+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_fe2b7cb58538780206c1+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_fe86d2c41acff0457a14+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/llama/meta-llama/Llama-3.2-1B-Instruct/e50437e463c4cd2fb8eb.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/llama/meta-llama/Llama-3.2-1B-Instruct/e50437e463c4cd2fb8eb.json new file mode 100644 index 0000000000000000000000000000000000000000..2edcdf26f9a27207f116fe0e6410d67805c97a0d --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0/llama/meta-llama/Llama-3.2-1B-Instruct/e50437e463c4cd2fb8eb.json @@ -0,0 +1,77 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "meta-llama/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "meta-llama/Llama-3.2-1B-Instruct", + "checkpoint_revision": "9213176726f574b556790deb65791e0c5aa438b6", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.2.0", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/phi3/microsoft/Phi-3-mini-4k-instruct/1cab6edbf167cfd815cf.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/phi3/microsoft/Phi-3-mini-4k-instruct/1cab6edbf167cfd815cf.json new file mode 100644 index 0000000000000000000000000000000000000000..1a2ebfc997e5a074741c744fc7bb41a8f9ed4e9d --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/phi3/microsoft/Phi-3-mini-4k-instruct/1cab6edbf167cfd815cf.json @@ -0,0 +1,77 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "microsoft/Phi-3-mini-4k-instruct", + "_task": "text-generation", + "architectures": [ + "Phi3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "auto_map": { + "AutoConfig": "microsoft/Phi-3-mini-4k-instruct--configuration_phi3.Phi3Config", + "AutoModelForCausalLM": "microsoft/Phi-3-mini-4k-instruct--modeling_phi3.Phi3ForCausalLM" + }, + "embd_pdrop": 0.0, + "hidden_act": "silu", + "hidden_size": 3072, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 4096, + "model_type": "phi3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 4, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "microsoft/Phi-3-mini-4k-instruct", + "checkpoint_revision": "0a67737cc96d2554230f90338b163bc6380a2a85", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.3.0.dev0", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 32, + "original_max_position_embeddings": 4096, + "partial_rotary_factor": 1.0, + "resid_pdrop": 0.0, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 10000.0, + "sliding_window": 2047, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 32064 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen3/Qwen/Qwen3-1.7B/211d2bf85194cf8d9207.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen3/Qwen/Qwen3-1.7B/211d2bf85194cf8d9207.json new file mode 100644 index 0000000000000000000000000000000000000000..06847ba9d7b294919def1dea05faa6f5887c32c9 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen3/Qwen/Qwen3-1.7B/211d2bf85194cf8d9207.json @@ -0,0 +1,72 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-1.7B", + "_task": "text-generation", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 6144, + "max_position_embeddings": 40960, + "max_window_layers": 28, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 4, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "Qwen/Qwen3-1.7B", + "checkpoint_revision": "0060bc56d46589041c1048efd1a397421b1142b5", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.3.0.dev0", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 16, + "num_hidden_layers": 28, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen3/Qwen/Qwen3-1.7B/f6dbd12c1a06eb5a2084.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen3/Qwen/Qwen3-1.7B/f6dbd12c1a06eb5a2084.json new file mode 100644 index 0000000000000000000000000000000000000000..c03826e53ce5ed12960558376f7a60bb1481aff6 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev0/qwen3/Qwen/Qwen3-1.7B/f6dbd12c1a06eb5a2084.json @@ -0,0 +1,72 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-1.7B", + "_task": "text-generation", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 6144, + "max_position_embeddings": 40960, + "max_window_layers": 28, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 4, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "Qwen/Qwen3-1.7B", + "checkpoint_revision": "0060bc56d46589041c1048efd1a397421b1142b5", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.0.dev0", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 16, + "num_hidden_layers": 28, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/16eb552455637c961181.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/16eb552455637c961181.json new file mode 100644 index 0000000000000000000000000000000000000000..d275807cdbfb870a0be53266909b4296b27a64a9 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/16eb552455637c961181.json @@ -0,0 +1,51 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "_task": "text-generation", + "architectures": [ + "GraniteForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attention_multiplier": 1.0, + "embedding_multiplier": 1.0, + "hidden_act": "silu", + "hidden_size": 32, + "initializer_range": 0.02, + "intermediate_size": 64, + "logits_scaling": 1.0, + "max_position_embeddings": 2048, + "mlp_bias": false, + "model_type": "granite", + "neuron": { + "_serialized_key": "HloNeuronConfig", + "all_reduce_dtype": null, + "allow_flash_attention": true, + "attention_layout": "HSB", + "attn_output_transposed": false, + "auto_cast_type": "fp16", + "batch_size": 2, + "checkpoint_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "checkpoint_revision": "c3074ebc0ac2fe545305f5e5f6cce2cc9b2aa0c5", + "collectives_layout": "HSB", + "continuous_batching": true, + "fuse_qkv": true, + "group_query_attention": null, + "log_softmax_scores": false, + "neuronxcc_version": "2.17.194.0+d312836f", + "optimum_neuron_version": "0.3.0.dev1", + "output_all_logits": false, + "sequence_length": 100, + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "residual_multiplier": 1.0, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 10000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 49152 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/98a5b36eff78463d521e.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/98a5b36eff78463d521e.json new file mode 100644 index 0000000000000000000000000000000000000000..00eb3567442b2cfd634e41a17799aaecf7ea764c --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/98a5b36eff78463d521e.json @@ -0,0 +1,51 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "_task": "text-generation", + "architectures": [ + "GraniteForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attention_multiplier": 1.0, + "embedding_multiplier": 1.0, + "hidden_act": "silu", + "hidden_size": 32, + "initializer_range": 0.02, + "intermediate_size": 64, + "logits_scaling": 1.0, + "max_position_embeddings": 2048, + "mlp_bias": false, + "model_type": "granite", + "neuron": { + "_serialized_key": "HloNeuronConfig", + "all_reduce_dtype": null, + "allow_flash_attention": true, + "attention_layout": "HSB", + "attn_output_transposed": false, + "auto_cast_type": "bf16", + "batch_size": 1, + "checkpoint_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "checkpoint_revision": "c3074ebc0ac2fe545305f5e5f6cce2cc9b2aa0c5", + "collectives_layout": "HSB", + "continuous_batching": false, + "fuse_qkv": true, + "group_query_attention": null, + "log_softmax_scores": false, + "neuronxcc_version": "2.17.194.0+d312836f", + "optimum_neuron_version": "0.3.0.dev1", + "output_all_logits": false, + "sequence_length": 100, + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "residual_multiplier": 1.0, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 10000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 49152 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/b3f4b03f5c98af7258c7.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/b3f4b03f5c98af7258c7.json new file mode 100644 index 0000000000000000000000000000000000000000..e570ab13e6bb7e702a8883bdb57fbd286231eeb9 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/b3f4b03f5c98af7258c7.json @@ -0,0 +1,51 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "_task": "text-generation", + "architectures": [ + "GraniteForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attention_multiplier": 1.0, + "embedding_multiplier": 1.0, + "hidden_act": "silu", + "hidden_size": 32, + "initializer_range": 0.02, + "intermediate_size": 64, + "logits_scaling": 1.0, + "max_position_embeddings": 2048, + "mlp_bias": false, + "model_type": "granite", + "neuron": { + "_serialized_key": "HloNeuronConfig", + "all_reduce_dtype": null, + "allow_flash_attention": true, + "attention_layout": "HSB", + "attn_output_transposed": false, + "auto_cast_type": "fp16", + "batch_size": 1, + "checkpoint_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "checkpoint_revision": "c3074ebc0ac2fe545305f5e5f6cce2cc9b2aa0c5", + "collectives_layout": "HSB", + "continuous_batching": false, + "fuse_qkv": true, + "group_query_attention": null, + "log_softmax_scores": false, + "neuronxcc_version": "2.17.194.0+d312836f", + "optimum_neuron_version": "0.3.0.dev1", + "output_all_logits": false, + "sequence_length": 100, + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "residual_multiplier": 1.0, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 10000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 49152 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/granite/ibm-granite/granite-3.1-2b-instruct/0563184c338261c6fbaa.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/granite/ibm-granite/granite-3.1-2b-instruct/0563184c338261c6fbaa.json new file mode 100644 index 0000000000000000000000000000000000000000..85952b928cf206bb293a37c628cb5d451cd451f2 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/granite/ibm-granite/granite-3.1-2b-instruct/0563184c338261c6fbaa.json @@ -0,0 +1,73 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "ibm-granite/granite-3.1-2b-instruct", + "_task": "text-generation", + "architectures": [ + "GraniteForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.1, + "attention_multiplier": 0.015625, + "embedding_multiplier": 12.0, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "logits_scaling": 8.0, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "granite", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 4, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct", + "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.0.dev1", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_hidden_layers": 40, + "num_key_value_heads": 8, + "residual_multiplier": 0.22, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 5000000.0, + "tie_word_embeddings": true, + "use_cache": true, + "vocab_size": 49155 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/granite/ibm-granite/granite-3.1-2b-instruct/698ede202023fad6e4ac.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/granite/ibm-granite/granite-3.1-2b-instruct/698ede202023fad6e4ac.json new file mode 100644 index 0000000000000000000000000000000000000000..f9db0fe2b47988867e19cfa9969e63566683b31c --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/granite/ibm-granite/granite-3.1-2b-instruct/698ede202023fad6e4ac.json @@ -0,0 +1,73 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "ibm-granite/granite-3.1-2b-instruct", + "_task": "text-generation", + "architectures": [ + "GraniteForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.1, + "attention_multiplier": 0.015625, + "embedding_multiplier": 12.0, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "logits_scaling": 8.0, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "granite", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 4, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct", + "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.3.0.dev1", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_hidden_layers": 40, + "num_key_value_heads": 8, + "residual_multiplier": 0.22, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 5000000.0, + "tie_word_embeddings": true, + "use_cache": true, + "vocab_size": 49155 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/granite/ibm-granite/granite-3.1-2b-instruct/6c659b1c4f864a345f17.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/granite/ibm-granite/granite-3.1-2b-instruct/6c659b1c4f864a345f17.json new file mode 100644 index 0000000000000000000000000000000000000000..fb6b0f8cb492cd8e7993b54ac0190e949c9c53b1 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/granite/ibm-granite/granite-3.1-2b-instruct/6c659b1c4f864a345f17.json @@ -0,0 +1,73 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "ibm-granite/granite-3.1-2b-instruct", + "_task": "text-generation", + "architectures": [ + "GraniteForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.1, + "attention_multiplier": 0.015625, + "embedding_multiplier": 12.0, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "logits_scaling": 8.0, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "granite", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 4, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct", + "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.3.0.dev1", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_hidden_layers": 40, + "num_key_value_heads": 8, + "residual_multiplier": 0.22, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 5000000.0, + "tie_word_embeddings": true, + "use_cache": true, + "vocab_size": 49155 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/granite/ibm-granite/granite-3.1-2b-instruct/bb0f60069cb5e089f6e4.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/granite/ibm-granite/granite-3.1-2b-instruct/bb0f60069cb5e089f6e4.json new file mode 100644 index 0000000000000000000000000000000000000000..e3eb8c6309a6a35242846f53b978e9fd8fd5244a --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/granite/ibm-granite/granite-3.1-2b-instruct/bb0f60069cb5e089f6e4.json @@ -0,0 +1,73 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "ibm-granite/granite-3.1-2b-instruct", + "_task": "text-generation", + "architectures": [ + "GraniteForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.1, + "attention_multiplier": 0.015625, + "embedding_multiplier": 12.0, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "logits_scaling": 8.0, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "granite", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 4, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct", + "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.0.dev1", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_hidden_layers": 40, + "num_key_value_heads": 8, + "residual_multiplier": 0.22, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 5000000.0, + "tie_word_embeddings": true, + "use_cache": true, + "vocab_size": 49155 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/granite/ibm-granite/granite-3.1-2b-instruct/d1f56a608fd1f85f24f1.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/granite/ibm-granite/granite-3.1-2b-instruct/d1f56a608fd1f85f24f1.json new file mode 100644 index 0000000000000000000000000000000000000000..ed3b38c98d9e1b387b66962b48b9690a173ee94e --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/granite/ibm-granite/granite-3.1-2b-instruct/d1f56a608fd1f85f24f1.json @@ -0,0 +1,73 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "ibm-granite/granite-3.1-2b-instruct", + "_task": "text-generation", + "architectures": [ + "GraniteForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.1, + "attention_multiplier": 0.015625, + "embedding_multiplier": 12.0, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "logits_scaling": 8.0, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "granite", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct", + "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.0.dev1", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_hidden_layers": 40, + "num_key_value_heads": 8, + "residual_multiplier": 0.22, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 5000000.0, + "tie_word_embeddings": true, + "use_cache": true, + "vocab_size": 49155 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/granite/ibm-granite/granite-3.3-8b-instruct/8e67447ff0fe199668d6.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/granite/ibm-granite/granite-3.3-8b-instruct/8e67447ff0fe199668d6.json new file mode 100644 index 0000000000000000000000000000000000000000..41994e12ef5d0f71f9b7a3da3a0390d1cb5c6606 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/granite/ibm-granite/granite-3.3-8b-instruct/8e67447ff0fe199668d6.json @@ -0,0 +1,73 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "ibm-granite/granite-3.3-8b-instruct", + "_task": "text-generation", + "architectures": [ + "GraniteForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attention_multiplier": 0.0078125, + "embedding_multiplier": 12.0, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 12800, + "logits_scaling": 16.0, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "granite", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "ibm-granite/granite-3.3-8b-instruct", + "checkpoint_revision": "51dd4bc2ade4059a6bd87649d68aa11e4fb2529b", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.0.dev1", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_hidden_layers": 40, + "num_key_value_heads": 8, + "residual_multiplier": 0.22, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 10000000.0, + "tie_word_embeddings": true, + "use_cache": true, + "vocab_size": 49159 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/llamafactory/tiny-random-Llama-3/23870c03582a624b981f.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/llamafactory/tiny-random-Llama-3/23870c03582a624b981f.json new file mode 100644 index 0000000000000000000000000000000000000000..e2199f4bcd5afceef8b436b0604458d7ffa940d1 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/llamafactory/tiny-random-Llama-3/23870c03582a624b981f.json @@ -0,0 +1,55 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "HloNeuronConfig", + "all_reduce_dtype": null, + "allow_flash_attention": true, + "attention_layout": "BSH", + "attn_output_transposed": false, + "auto_cast_type": "fp16", + "batch_size": 1, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "collectives_layout": "HSB", + "continuous_batching": false, + "fuse_qkv": true, + "group_query_attention": null, + "log_softmax_scores": false, + "neuronxcc_version": "2.17.194.0+d312836f", + "optimum_neuron_version": "0.3.0.dev1", + "output_all_logits": false, + "sequence_length": 100, + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/llamafactory/tiny-random-Llama-3/38c497769b1d1cbd7c0d.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/llamafactory/tiny-random-Llama-3/38c497769b1d1cbd7c0d.json new file mode 100644 index 0000000000000000000000000000000000000000..d06d7489fd2f02e68ccc3db3bc42dc9aab412dab --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/llamafactory/tiny-random-Llama-3/38c497769b1d1cbd7c0d.json @@ -0,0 +1,55 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "HloNeuronConfig", + "all_reduce_dtype": null, + "allow_flash_attention": true, + "attention_layout": "BSH", + "attn_output_transposed": false, + "auto_cast_type": "bf16", + "batch_size": 1, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "collectives_layout": "HSB", + "continuous_batching": false, + "fuse_qkv": true, + "group_query_attention": null, + "log_softmax_scores": false, + "neuronxcc_version": "2.17.194.0+d312836f", + "optimum_neuron_version": "0.3.0.dev1", + "output_all_logits": false, + "sequence_length": 100, + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/llamafactory/tiny-random-Llama-3/3f83ce0c2e5f27f6fa2d.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/llamafactory/tiny-random-Llama-3/3f83ce0c2e5f27f6fa2d.json new file mode 100644 index 0000000000000000000000000000000000000000..01bdfd8810bccfc48298fe2d8748a300be7808b5 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/llamafactory/tiny-random-Llama-3/3f83ce0c2e5f27f6fa2d.json @@ -0,0 +1,77 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.0.dev1", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/llamafactory/tiny-random-Llama-3/8dcd6598dcebb27ef470.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/llamafactory/tiny-random-Llama-3/8dcd6598dcebb27ef470.json new file mode 100644 index 0000000000000000000000000000000000000000..a264242c960e5343432f086a620fa49f1847e6ec --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/llamafactory/tiny-random-Llama-3/8dcd6598dcebb27ef470.json @@ -0,0 +1,77 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.0.dev1", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/llamafactory/tiny-random-Llama-3/b9624072379e00f37909.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/llamafactory/tiny-random-Llama-3/b9624072379e00f37909.json new file mode 100644 index 0000000000000000000000000000000000000000..1a16723daefaa5f854b0ffaf693f91336bf4590e --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/llamafactory/tiny-random-Llama-3/b9624072379e00f37909.json @@ -0,0 +1,55 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "HloNeuronConfig", + "all_reduce_dtype": null, + "allow_flash_attention": true, + "attention_layout": "BSH", + "attn_output_transposed": false, + "auto_cast_type": "fp16", + "batch_size": 2, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "collectives_layout": "HSB", + "continuous_batching": true, + "fuse_qkv": true, + "group_query_attention": null, + "log_softmax_scores": false, + "neuronxcc_version": "2.17.194.0+d312836f", + "optimum_neuron_version": "0.3.0.dev1", + "output_all_logits": false, + "sequence_length": 100, + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/llamafactory/tiny-random-Llama-3/cfce0a36a7aad541df51.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/llamafactory/tiny-random-Llama-3/cfce0a36a7aad541df51.json new file mode 100644 index 0000000000000000000000000000000000000000..985951ec31d99688d6a9f2f3088913292858a408 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/llamafactory/tiny-random-Llama-3/cfce0a36a7aad541df51.json @@ -0,0 +1,77 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 2, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 2, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.3.0.dev1", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/unsloth/Llama-3.2-1B-Instruct/38a5aecfa62be8b081c0.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/unsloth/Llama-3.2-1B-Instruct/38a5aecfa62be8b081c0.json new file mode 100644 index 0000000000000000000000000000000000000000..6c4d11132e576f710384e40866d2306385507476 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/unsloth/Llama-3.2-1B-Instruct/38a5aecfa62be8b081c0.json @@ -0,0 +1,78 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 24, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 128, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 128, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.0.dev1", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 128, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 24, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/unsloth/Llama-3.2-1B-Instruct/bece693cb5ff2eaedc7d.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/unsloth/Llama-3.2-1B-Instruct/bece693cb5ff2eaedc7d.json new file mode 100644 index 0000000000000000000000000000000000000000..b37c012b8de4311564d312e0db455b81aa1ce41c --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/llama/unsloth/Llama-3.2-1B-Instruct/bece693cb5ff2eaedc7d.json @@ -0,0 +1,78 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 4, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.3.0.dev1", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/mixtral/dacorvo/Mixtral-tiny/1324c0afc0fb590822ad.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/mixtral/dacorvo/Mixtral-tiny/1324c0afc0fb590822ad.json new file mode 100644 index 0000000000000000000000000000000000000000..4cef32eafadc72598528fee29ffc8bd1d3ed53e4 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/mixtral/dacorvo/Mixtral-tiny/1324c0afc0fb590822ad.json @@ -0,0 +1,73 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "dacorvo/Mixtral-tiny", + "_task": "text-generation", + "architectures": [ + "MixtralForCausalLM" + ], + "attention_dropout": 0.0, + "head_dim": 32, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3584, + "max_position_embeddings": 1024, + "model_type": "mixtral", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "dacorvo/Mixtral-tiny", + "checkpoint_revision": "c557ba205ddff6ea911f4719e0d543d6c08356b6", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.3.0.dev1", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_experts_per_tok": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 8, + "num_local_experts": 8, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_theta": 10000.0, + "router_aux_loss_coef": 0.001, + "router_jitter_noise": 0.0, + "sliding_window": 4096, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 32000 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/mixtral/dacorvo/Mixtral-tiny/3c5f98b57fbf4eed7011.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/mixtral/dacorvo/Mixtral-tiny/3c5f98b57fbf4eed7011.json new file mode 100644 index 0000000000000000000000000000000000000000..bc22200f7902327a730a4cdba88cd1463733255a --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/mixtral/dacorvo/Mixtral-tiny/3c5f98b57fbf4eed7011.json @@ -0,0 +1,73 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "dacorvo/Mixtral-tiny", + "_task": "text-generation", + "architectures": [ + "MixtralForCausalLM" + ], + "attention_dropout": 0.0, + "head_dim": 32, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3584, + "max_position_embeddings": 1024, + "model_type": "mixtral", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 2, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "dacorvo/Mixtral-tiny", + "checkpoint_revision": "c557ba205ddff6ea911f4719e0d543d6c08356b6", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 2, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.3.0.dev1", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_experts_per_tok": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 8, + "num_local_experts": 8, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_theta": 10000.0, + "router_aux_loss_coef": 0.001, + "router_jitter_noise": 0.0, + "sliding_window": 4096, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 32000 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/mixtral/dacorvo/Mixtral-tiny/e50ed7102c39809e27ac.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/mixtral/dacorvo/Mixtral-tiny/e50ed7102c39809e27ac.json new file mode 100644 index 0000000000000000000000000000000000000000..5d8ded2e0cadb34394415461281d23f65bad2f91 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/mixtral/dacorvo/Mixtral-tiny/e50ed7102c39809e27ac.json @@ -0,0 +1,73 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "dacorvo/Mixtral-tiny", + "_task": "text-generation", + "architectures": [ + "MixtralForCausalLM" + ], + "attention_dropout": 0.0, + "head_dim": 32, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3584, + "max_position_embeddings": 1024, + "model_type": "mixtral", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "dacorvo/Mixtral-tiny", + "checkpoint_revision": "c557ba205ddff6ea911f4719e0d543d6c08356b6", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.3.0.dev1", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_experts_per_tok": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 8, + "num_local_experts": 8, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_theta": 10000.0, + "router_aux_loss_coef": 0.001, + "router_jitter_noise": 0.0, + "sliding_window": 4096, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 32000 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/phi3/microsoft/Phi-3-mini-4k-instruct/6d5db110aa4df2b11b8a.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/phi3/microsoft/Phi-3-mini-4k-instruct/6d5db110aa4df2b11b8a.json new file mode 100644 index 0000000000000000000000000000000000000000..74db402beea94e500cb3f4a8f77b9660e0165818 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/phi3/microsoft/Phi-3-mini-4k-instruct/6d5db110aa4df2b11b8a.json @@ -0,0 +1,55 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "microsoft/Phi-3-mini-4k-instruct", + "_task": "text-generation", + "architectures": [ + "Phi3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "auto_map": { + "AutoConfig": "microsoft/Phi-3-mini-4k-instruct--configuration_phi3.Phi3Config", + "AutoModelForCausalLM": "microsoft/Phi-3-mini-4k-instruct--modeling_phi3.Phi3ForCausalLM" + }, + "embd_pdrop": 0.0, + "hidden_act": "silu", + "hidden_size": 3072, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 4096, + "model_type": "phi3", + "neuron": { + "_serialized_key": "HloNeuronConfig", + "all_reduce_dtype": null, + "allow_flash_attention": false, + "attention_layout": "HSB", + "attn_output_transposed": false, + "auto_cast_type": "bf16", + "batch_size": 4, + "checkpoint_id": "microsoft/Phi-3-mini-4k-instruct", + "checkpoint_revision": "0a67737cc96d2554230f90338b163bc6380a2a85", + "collectives_layout": "HSB", + "continuous_batching": true, + "fuse_qkv": true, + "group_query_attention": null, + "log_softmax_scores": false, + "neuronxcc_version": "2.17.194.0+d312836f", + "optimum_neuron_version": "0.3.0.dev1", + "output_all_logits": false, + "sequence_length": 4096, + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 32, + "original_max_position_embeddings": 4096, + "partial_rotary_factor": 1.0, + "resid_pdrop": 0.0, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 10000.0, + "sliding_window": 2047, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 32064 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/phi3/yujiepan/phi-4-tiny-random/2ae83bdd0abceabde586.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/phi3/yujiepan/phi-4-tiny-random/2ae83bdd0abceabde586.json new file mode 100644 index 0000000000000000000000000000000000000000..51ecee0d74bf7f4205108309ea56c4e5a4ac9d3d --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/phi3/yujiepan/phi-4-tiny-random/2ae83bdd0abceabde586.json @@ -0,0 +1,52 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/phi-4-tiny-random", + "_task": "text-generation", + "architectures": [ + "Phi3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "auto_map": {}, + "embd_pdrop": 0.0, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 32, + "max_position_embeddings": 16384, + "model_type": "phi3", + "neuron": { + "_serialized_key": "HloNeuronConfig", + "all_reduce_dtype": null, + "allow_flash_attention": false, + "attention_layout": "HSB", + "attn_output_transposed": false, + "auto_cast_type": "bf16", + "batch_size": 1, + "checkpoint_id": "yujiepan/phi-4-tiny-random", + "checkpoint_revision": "18a9a1168dc97ac6d128f811925670c275610f5a", + "collectives_layout": "HSB", + "continuous_batching": false, + "fuse_qkv": true, + "group_query_attention": "replicated-heads", + "log_softmax_scores": false, + "neuronxcc_version": "2.17.194.0+d312836f", + "optimum_neuron_version": "0.3.0.dev1", + "output_all_logits": false, + "sequence_length": 100, + "tp_degree": 2 + }, + "num_attention_heads": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 1, + "original_max_position_embeddings": 16384, + "partial_rotary_factor": 1.0, + "resid_pdrop": 0.0, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 250000, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 100352 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/phi3/yujiepan/phi-4-tiny-random/3ed3625ef80163d27a4c.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/phi3/yujiepan/phi-4-tiny-random/3ed3625ef80163d27a4c.json new file mode 100644 index 0000000000000000000000000000000000000000..0b39e09fdd981bd22b3a2220391784a1cc253c19 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/phi3/yujiepan/phi-4-tiny-random/3ed3625ef80163d27a4c.json @@ -0,0 +1,52 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/phi-4-tiny-random", + "_task": "text-generation", + "architectures": [ + "Phi3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "auto_map": {}, + "embd_pdrop": 0.0, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 32, + "max_position_embeddings": 16384, + "model_type": "phi3", + "neuron": { + "_serialized_key": "HloNeuronConfig", + "all_reduce_dtype": null, + "allow_flash_attention": false, + "attention_layout": "HSB", + "attn_output_transposed": false, + "auto_cast_type": "fp16", + "batch_size": 2, + "checkpoint_id": "yujiepan/phi-4-tiny-random", + "checkpoint_revision": "18a9a1168dc97ac6d128f811925670c275610f5a", + "collectives_layout": "HSB", + "continuous_batching": true, + "fuse_qkv": true, + "group_query_attention": "replicated-heads", + "log_softmax_scores": false, + "neuronxcc_version": "2.17.194.0+d312836f", + "optimum_neuron_version": "0.3.0.dev1", + "output_all_logits": false, + "sequence_length": 100, + "tp_degree": 2 + }, + "num_attention_heads": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 1, + "original_max_position_embeddings": 16384, + "partial_rotary_factor": 1.0, + "resid_pdrop": 0.0, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 250000, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 100352 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/phi3/yujiepan/phi-4-tiny-random/78bb146dc5773156a959.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/phi3/yujiepan/phi-4-tiny-random/78bb146dc5773156a959.json new file mode 100644 index 0000000000000000000000000000000000000000..67faa73f3256763a9e6d2147fd2c2b98966f7c42 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/phi3/yujiepan/phi-4-tiny-random/78bb146dc5773156a959.json @@ -0,0 +1,52 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/phi-4-tiny-random", + "_task": "text-generation", + "architectures": [ + "Phi3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "auto_map": {}, + "embd_pdrop": 0.0, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 32, + "max_position_embeddings": 16384, + "model_type": "phi3", + "neuron": { + "_serialized_key": "HloNeuronConfig", + "all_reduce_dtype": null, + "allow_flash_attention": false, + "attention_layout": "HSB", + "attn_output_transposed": false, + "auto_cast_type": "fp16", + "batch_size": 1, + "checkpoint_id": "yujiepan/phi-4-tiny-random", + "checkpoint_revision": "18a9a1168dc97ac6d128f811925670c275610f5a", + "collectives_layout": "HSB", + "continuous_batching": false, + "fuse_qkv": true, + "group_query_attention": "replicated-heads", + "log_softmax_scores": false, + "neuronxcc_version": "2.17.194.0+d312836f", + "optimum_neuron_version": "0.3.0.dev1", + "output_all_logits": false, + "sequence_length": 100, + "tp_degree": 2 + }, + "num_attention_heads": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 1, + "original_max_position_embeddings": 16384, + "partial_rotary_factor": 1.0, + "resid_pdrop": 0.0, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 250000, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 100352 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/Qwen/Qwen2.5-0.5B/6f449a39c06210b4b51a.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/Qwen/Qwen2.5-0.5B/6f449a39c06210b4b51a.json new file mode 100644 index 0000000000000000000000000000000000000000..c556c037122dfbad48571b48299a6826f1b2fabf --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/Qwen/Qwen2.5-0.5B/6f449a39c06210b4b51a.json @@ -0,0 +1,71 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen2.5-0.5B", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "hidden_act": "silu", + "hidden_size": 896, + "initializer_range": 0.02, + "intermediate_size": 4864, + "max_position_embeddings": 32768, + "max_window_layers": 24, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 4, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "Qwen/Qwen2.5-0.5B", + "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.3.0.dev1", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 14, + "num_hidden_layers": 24, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": 32768, + "tie_word_embeddings": true, + "use_cache": true, + "use_mrope": false, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/Qwen/Qwen2.5-0.5B/91f06166632f7d2d7771.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/Qwen/Qwen2.5-0.5B/91f06166632f7d2d7771.json new file mode 100644 index 0000000000000000000000000000000000000000..860b69d18a402bcfa80933283e8fc5a4a66395d0 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/Qwen/Qwen2.5-0.5B/91f06166632f7d2d7771.json @@ -0,0 +1,71 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen2.5-0.5B", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "hidden_act": "silu", + "hidden_size": 896, + "initializer_range": 0.02, + "intermediate_size": 4864, + "max_position_embeddings": 32768, + "max_window_layers": 24, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "Qwen/Qwen2.5-0.5B", + "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 24, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 128, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 128, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.0.dev1", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 128, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 24, + "vocab_parallel": false + }, + "num_attention_heads": 14, + "num_hidden_layers": 24, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": 32768, + "tie_word_embeddings": true, + "use_cache": true, + "use_mrope": false, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/Qwen/Qwen2.5-0.5B/9a804e057317591235d2.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/Qwen/Qwen2.5-0.5B/9a804e057317591235d2.json new file mode 100644 index 0000000000000000000000000000000000000000..93be311cacd8ddda195f8cec66372b7830815ef7 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/Qwen/Qwen2.5-0.5B/9a804e057317591235d2.json @@ -0,0 +1,71 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen2.5-0.5B", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "hidden_act": "silu", + "hidden_size": 896, + "initializer_range": 0.02, + "intermediate_size": 4864, + "max_position_embeddings": 32768, + "max_window_layers": 24, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "Qwen/Qwen2.5-0.5B", + "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 128, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 128, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.0.dev1", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 128, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 14, + "num_hidden_layers": 24, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": 32768, + "tie_word_embeddings": true, + "use_cache": true, + "use_mrope": false, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/Qwen/Qwen2.5-0.5B/c65c50ec2ec44d68f235.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/Qwen/Qwen2.5-0.5B/c65c50ec2ec44d68f235.json new file mode 100644 index 0000000000000000000000000000000000000000..aace1a40cdc895d5610a7fa6a9c9f41528481741 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/Qwen/Qwen2.5-0.5B/c65c50ec2ec44d68f235.json @@ -0,0 +1,71 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen2.5-0.5B", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "hidden_act": "silu", + "hidden_size": 896, + "initializer_range": 0.02, + "intermediate_size": 4864, + "max_position_embeddings": 32768, + "max_window_layers": 24, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "Qwen/Qwen2.5-0.5B", + "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 1, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 128, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 128, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.0.dev1", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 128, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 1, + "vocab_parallel": false + }, + "num_attention_heads": 14, + "num_hidden_layers": 24, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": 32768, + "tie_word_embeddings": true, + "use_cache": true, + "use_mrope": false, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/yujiepan/qwen2.5-128k-tiny-random/0f369de663b01a949497.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/yujiepan/qwen2.5-128k-tiny-random/0f369de663b01a949497.json new file mode 100644 index 0000000000000000000000000000000000000000..625d08dca70ac1b81a75b7fef2f459399a6f6532 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/yujiepan/qwen2.5-128k-tiny-random/0f369de663b01a949497.json @@ -0,0 +1,75 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/qwen2.5-128k-tiny-random", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "hidden_act": "silu", + "hidden_size": 8, + "initializer_range": 0.02, + "intermediate_size": 16, + "max_position_embeddings": 32768, + "max_window_layers": 1, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "yujiepan/qwen2.5-128k-tiny-random", + "checkpoint_revision": "c8296d4ca3f87782876d2382fbb6481d1beb8ef0", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.0.dev1", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": { + "factor": 4.0, + "original_max_position_embeddings": 32768, + "rope_type": "yarn", + "type": "yarn" + }, + "rope_theta": 1000000.0, + "sliding_window": 131072, + "tie_word_embeddings": false, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 152064 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/yujiepan/qwen2.5-128k-tiny-random/23dbff0523662bd7d6be.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/yujiepan/qwen2.5-128k-tiny-random/23dbff0523662bd7d6be.json new file mode 100644 index 0000000000000000000000000000000000000000..45276a6595cd3b61f50b7c8e45f6b662140c0c4d --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/yujiepan/qwen2.5-128k-tiny-random/23dbff0523662bd7d6be.json @@ -0,0 +1,75 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/qwen2.5-128k-tiny-random", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "hidden_act": "silu", + "hidden_size": 8, + "initializer_range": 0.02, + "intermediate_size": 16, + "max_position_embeddings": 32768, + "max_window_layers": 1, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "yujiepan/qwen2.5-128k-tiny-random", + "checkpoint_revision": "c8296d4ca3f87782876d2382fbb6481d1beb8ef0", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.0.dev1", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": { + "factor": 4.0, + "original_max_position_embeddings": 32768, + "rope_type": "yarn", + "type": "yarn" + }, + "rope_theta": 1000000.0, + "sliding_window": 131072, + "tie_word_embeddings": false, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 152064 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/yujiepan/qwen2.5-128k-tiny-random/d8449f47ba76c9710cb1.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/yujiepan/qwen2.5-128k-tiny-random/d8449f47ba76c9710cb1.json new file mode 100644 index 0000000000000000000000000000000000000000..f8e969e38469674a00f91fb87a1df44c82526340 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen2/yujiepan/qwen2.5-128k-tiny-random/d8449f47ba76c9710cb1.json @@ -0,0 +1,75 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/qwen2.5-128k-tiny-random", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "hidden_act": "silu", + "hidden_size": 8, + "initializer_range": 0.02, + "intermediate_size": 16, + "max_position_embeddings": 32768, + "max_window_layers": 1, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 2, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "yujiepan/qwen2.5-128k-tiny-random", + "checkpoint_revision": "c8296d4ca3f87782876d2382fbb6481d1beb8ef0", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 2, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.3.0.dev1", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": { + "factor": 4.0, + "original_max_position_embeddings": 32768, + "rope_type": "yarn", + "type": "yarn" + }, + "rope_theta": 1000000.0, + "sliding_window": 131072, + "tie_word_embeddings": false, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 152064 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen3/Qwen/Qwen3-1.7B/baf33bdd4a8de9a04620.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen3/Qwen/Qwen3-1.7B/baf33bdd4a8de9a04620.json new file mode 100644 index 0000000000000000000000000000000000000000..a356113a6d590db9eb7348d7d25fb79685e2574f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev1/qwen3/Qwen/Qwen3-1.7B/baf33bdd4a8de9a04620.json @@ -0,0 +1,72 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-1.7B", + "_task": "text-generation", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 6144, + "max_position_embeddings": 40960, + "max_window_layers": 28, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 4, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "Qwen/Qwen3-1.7B", + "checkpoint_revision": "0060bc56d46589041c1048efd1a397421b1142b5", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.3.0.dev1", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 16, + "num_hidden_layers": 28, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/3722d0e82203fbbe93fe.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/3722d0e82203fbbe93fe.json new file mode 100644 index 0000000000000000000000000000000000000000..3691c901541f069077de436fafb30a523f5cfab6 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/3722d0e82203fbbe93fe.json @@ -0,0 +1,73 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "_task": "text-generation", + "architectures": [ + "GraniteForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attention_multiplier": 1.0, + "embedding_multiplier": 1.0, + "hidden_act": "silu", + "hidden_size": 32, + "initializer_range": 0.02, + "intermediate_size": 64, + "logits_scaling": 1.0, + "max_position_embeddings": 2048, + "mlp_bias": false, + "model_type": "granite", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "checkpoint_revision": "c3074ebc0ac2fe545305f5e5f6cce2cc9b2aa0c5", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.0.dev2", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "residual_multiplier": 1.0, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 10000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 49152 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/a8be13be525f2d91669b.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/a8be13be525f2d91669b.json new file mode 100644 index 0000000000000000000000000000000000000000..a7a1b611044c6cb7a27610aaddfa11009c0e9f21 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/a8be13be525f2d91669b.json @@ -0,0 +1,73 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "_task": "text-generation", + "architectures": [ + "GraniteForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attention_multiplier": 1.0, + "embedding_multiplier": 1.0, + "hidden_act": "silu", + "hidden_size": 32, + "initializer_range": 0.02, + "intermediate_size": 64, + "logits_scaling": 1.0, + "max_position_embeddings": 2048, + "mlp_bias": false, + "model_type": "granite", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "checkpoint_revision": "c3074ebc0ac2fe545305f5e5f6cce2cc9b2aa0c5", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.0.dev2", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "residual_multiplier": 1.0, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 10000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 49152 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/ca40c0099b06c7de4aa6.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/ca40c0099b06c7de4aa6.json new file mode 100644 index 0000000000000000000000000000000000000000..9d5353428471822248dcf59e562f129da521aad1 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/ca40c0099b06c7de4aa6.json @@ -0,0 +1,73 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "_task": "text-generation", + "architectures": [ + "GraniteForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attention_multiplier": 1.0, + "embedding_multiplier": 1.0, + "hidden_act": "silu", + "hidden_size": 32, + "initializer_range": 0.02, + "intermediate_size": 64, + "logits_scaling": 1.0, + "max_position_embeddings": 2048, + "mlp_bias": false, + "model_type": "granite", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 2, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "hf-internal-testing/tiny-random-GraniteForCausalLM", + "checkpoint_revision": "c3074ebc0ac2fe545305f5e5f6cce2cc9b2aa0c5", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 2, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.3.0.dev2", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "residual_multiplier": 1.0, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 10000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 49152 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/granite/ibm-granite/granite-3.1-2b-instruct/e3ae33ec4036373b3782.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/granite/ibm-granite/granite-3.1-2b-instruct/e3ae33ec4036373b3782.json new file mode 100644 index 0000000000000000000000000000000000000000..60cd8556db54e5916e2e1da94b78d36d021cb9ba --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/granite/ibm-granite/granite-3.1-2b-instruct/e3ae33ec4036373b3782.json @@ -0,0 +1,51 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "ibm-granite/granite-3.1-2b-instruct", + "_task": "text-generation", + "architectures": [ + "GraniteForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.1, + "attention_multiplier": 0.015625, + "embedding_multiplier": 12.0, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "logits_scaling": 8.0, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "granite", + "neuron": { + "_serialized_key": "HloNeuronConfig", + "all_reduce_dtype": null, + "allow_flash_attention": true, + "attention_layout": "HSB", + "attn_output_transposed": false, + "auto_cast_type": "bf16", + "batch_size": 4, + "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct", + "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d", + "collectives_layout": "HSB", + "continuous_batching": true, + "fuse_qkv": true, + "group_query_attention": "shard-over-heads", + "log_softmax_scores": false, + "neuronxcc_version": "2.17.194.0+d312836f", + "optimum_neuron_version": "0.3.0.dev2", + "output_all_logits": false, + "sequence_length": 4096, + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 40, + "num_key_value_heads": 8, + "residual_multiplier": 0.22, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 5000000.0, + "tie_word_embeddings": true, + "use_cache": true, + "vocab_size": 49155 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/llamafactory/tiny-random-Llama-3/288dd60e3240f860ed00.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/llamafactory/tiny-random-Llama-3/288dd60e3240f860ed00.json new file mode 100644 index 0000000000000000000000000000000000000000..0aec4766869e27a9d7e325c11ded5bc4719f15d9 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/llamafactory/tiny-random-Llama-3/288dd60e3240f860ed00.json @@ -0,0 +1,77 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 2, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 2, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.3.0.dev2", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/llamafactory/tiny-random-Llama-3/2ff87cc8e903ea3484ac.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/llamafactory/tiny-random-Llama-3/2ff87cc8e903ea3484ac.json new file mode 100644 index 0000000000000000000000000000000000000000..f9e33a0b528098aa1de20a5c6ef47f68185ff6f6 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/llamafactory/tiny-random-Llama-3/2ff87cc8e903ea3484ac.json @@ -0,0 +1,77 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.0.dev2", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/llamafactory/tiny-random-Llama-3/bcefb76a05ead11c9fcf.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/llamafactory/tiny-random-Llama-3/bcefb76a05ead11c9fcf.json new file mode 100644 index 0000000000000000000000000000000000000000..2ed4bbbe004aabc578e6aacbfb90b8894fd35683 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/llamafactory/tiny-random-Llama-3/bcefb76a05ead11c9fcf.json @@ -0,0 +1,77 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.0.dev2", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/llamafactory/tiny-random-Llama-3/ec2b5e8bc22f267c16fe.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/llamafactory/tiny-random-Llama-3/ec2b5e8bc22f267c16fe.json new file mode 100644 index 0000000000000000000000000000000000000000..c35b5953e1121a9eafda62ec6862e5912bab8856 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/llamafactory/tiny-random-Llama-3/ec2b5e8bc22f267c16fe.json @@ -0,0 +1,77 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 131072, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 131072, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.0.dev2", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 131072, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/unsloth/Llama-3.2-1B-Instruct/078092168933c6413d2a.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/unsloth/Llama-3.2-1B-Instruct/078092168933c6413d2a.json new file mode 100644 index 0000000000000000000000000000000000000000..335268f83626ee7106717c2c28ff205f0fce267f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/unsloth/Llama-3.2-1B-Instruct/078092168933c6413d2a.json @@ -0,0 +1,78 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": null, + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.3.0.dev2", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/unsloth/Llama-3.2-1B-Instruct/0a8784a00d0c8111b947.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/unsloth/Llama-3.2-1B-Instruct/0a8784a00d0c8111b947.json new file mode 100644 index 0000000000000000000000000000000000000000..89a2676c044d205cefe2c26a8d6334b584d46137 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/unsloth/Llama-3.2-1B-Instruct/0a8784a00d0c8111b947.json @@ -0,0 +1,78 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": null, + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.3.0.dev2", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 5, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/unsloth/Llama-3.2-1B-Instruct/3413a608b29245feb044.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/unsloth/Llama-3.2-1B-Instruct/3413a608b29245feb044.json new file mode 100644 index 0000000000000000000000000000000000000000..b0418adc9d0cfdcf27d1f3d189d90c1743f9a2b2 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/llama/unsloth/Llama-3.2-1B-Instruct/3413a608b29245feb044.json @@ -0,0 +1,78 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 4, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.3.0.dev2", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/mixtral/dacorvo/Mixtral-tiny/6a2a704cfc87e507ca13.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/mixtral/dacorvo/Mixtral-tiny/6a2a704cfc87e507ca13.json new file mode 100644 index 0000000000000000000000000000000000000000..83d9d035bbbd882bafa5a83c36fa382d406a5649 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/mixtral/dacorvo/Mixtral-tiny/6a2a704cfc87e507ca13.json @@ -0,0 +1,73 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "dacorvo/Mixtral-tiny", + "_task": "text-generation", + "architectures": [ + "MixtralForCausalLM" + ], + "attention_dropout": 0.0, + "head_dim": 32, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3584, + "max_position_embeddings": 1024, + "model_type": "mixtral", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "dacorvo/Mixtral-tiny", + "checkpoint_revision": "c557ba205ddff6ea911f4719e0d543d6c08356b6", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.3.0.dev2", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_experts_per_tok": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 8, + "num_local_experts": 8, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_theta": 10000.0, + "router_aux_loss_coef": 0.001, + "router_jitter_noise": 0.0, + "sliding_window": 4096, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 32000 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/mixtral/dacorvo/Mixtral-tiny/8a00465bf47387193d57.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/mixtral/dacorvo/Mixtral-tiny/8a00465bf47387193d57.json new file mode 100644 index 0000000000000000000000000000000000000000..5bcd9cc3ba289b63a355d97b2902e5946e2c547e --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/mixtral/dacorvo/Mixtral-tiny/8a00465bf47387193d57.json @@ -0,0 +1,73 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "dacorvo/Mixtral-tiny", + "_task": "text-generation", + "architectures": [ + "MixtralForCausalLM" + ], + "attention_dropout": 0.0, + "head_dim": 32, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3584, + "max_position_embeddings": 1024, + "model_type": "mixtral", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "dacorvo/Mixtral-tiny", + "checkpoint_revision": "c557ba205ddff6ea911f4719e0d543d6c08356b6", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.3.0.dev2", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_experts_per_tok": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 8, + "num_local_experts": 8, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_theta": 10000.0, + "router_aux_loss_coef": 0.001, + "router_jitter_noise": 0.0, + "sliding_window": 4096, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 32000 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/mixtral/dacorvo/Mixtral-tiny/8a9199743c35e18e3bd0.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/mixtral/dacorvo/Mixtral-tiny/8a9199743c35e18e3bd0.json new file mode 100644 index 0000000000000000000000000000000000000000..94280ab241953e8ee41eea5696b2a323dfe60111 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/mixtral/dacorvo/Mixtral-tiny/8a9199743c35e18e3bd0.json @@ -0,0 +1,73 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "dacorvo/Mixtral-tiny", + "_task": "text-generation", + "architectures": [ + "MixtralForCausalLM" + ], + "attention_dropout": 0.0, + "head_dim": 32, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3584, + "max_position_embeddings": 1024, + "model_type": "mixtral", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 2, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "dacorvo/Mixtral-tiny", + "checkpoint_revision": "c557ba205ddff6ea911f4719e0d543d6c08356b6", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 2, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.3.0.dev2", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_experts_per_tok": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 8, + "num_local_experts": 8, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_theta": 10000.0, + "router_aux_loss_coef": 0.001, + "router_jitter_noise": 0.0, + "sliding_window": 4096, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 32000 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/phi3/yujiepan/phi-4-tiny-random/1a5b82eb620bbc773cea.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/phi3/yujiepan/phi-4-tiny-random/1a5b82eb620bbc773cea.json new file mode 100644 index 0000000000000000000000000000000000000000..8295f5730c425f6792402975af5efaaaed0fa6ed --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/phi3/yujiepan/phi-4-tiny-random/1a5b82eb620bbc773cea.json @@ -0,0 +1,74 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/phi-4-tiny-random", + "_task": "text-generation", + "architectures": [ + "Phi3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "auto_map": {}, + "embd_pdrop": 0.0, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 32, + "max_position_embeddings": 16384, + "model_type": "phi3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "yujiepan/phi-4-tiny-random", + "checkpoint_revision": "18a9a1168dc97ac6d128f811925670c275610f5a", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.0.dev2", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 1, + "original_max_position_embeddings": 16384, + "partial_rotary_factor": 1.0, + "resid_pdrop": 0.0, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 250000, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 100352 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/phi3/yujiepan/phi-4-tiny-random/8961aa887fe7e291ece4.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/phi3/yujiepan/phi-4-tiny-random/8961aa887fe7e291ece4.json new file mode 100644 index 0000000000000000000000000000000000000000..58fbc3eac28b34130a54e22178e39d4bcc7698fe --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/phi3/yujiepan/phi-4-tiny-random/8961aa887fe7e291ece4.json @@ -0,0 +1,74 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/phi-4-tiny-random", + "_task": "text-generation", + "architectures": [ + "Phi3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "auto_map": {}, + "embd_pdrop": 0.0, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 32, + "max_position_embeddings": 16384, + "model_type": "phi3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 2, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "yujiepan/phi-4-tiny-random", + "checkpoint_revision": "18a9a1168dc97ac6d128f811925670c275610f5a", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 2, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.3.0.dev2", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 1, + "original_max_position_embeddings": 16384, + "partial_rotary_factor": 1.0, + "resid_pdrop": 0.0, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 250000, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 100352 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/phi3/yujiepan/phi-4-tiny-random/9ea48b55a0a83cfc7c31.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/phi3/yujiepan/phi-4-tiny-random/9ea48b55a0a83cfc7c31.json new file mode 100644 index 0000000000000000000000000000000000000000..40e356156195f024a84c928d5a71338c7efb695d --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/phi3/yujiepan/phi-4-tiny-random/9ea48b55a0a83cfc7c31.json @@ -0,0 +1,74 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/phi-4-tiny-random", + "_task": "text-generation", + "architectures": [ + "Phi3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "auto_map": {}, + "embd_pdrop": 0.0, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 32, + "max_position_embeddings": 16384, + "model_type": "phi3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "yujiepan/phi-4-tiny-random", + "checkpoint_revision": "18a9a1168dc97ac6d128f811925670c275610f5a", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.0.dev2", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 2, + "num_hidden_layers": 2, + "num_key_value_heads": 1, + "original_max_position_embeddings": 16384, + "partial_rotary_factor": 1.0, + "resid_pdrop": 0.0, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 250000, + "sliding_window": null, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 100352 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/qwen2/Qwen/Qwen2.5-0.5B/184ac9147c5c3a01108b.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/qwen2/Qwen/Qwen2.5-0.5B/184ac9147c5c3a01108b.json new file mode 100644 index 0000000000000000000000000000000000000000..35f7f17908fbf61e1a6901a743e7d029e50e7b86 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/qwen2/Qwen/Qwen2.5-0.5B/184ac9147c5c3a01108b.json @@ -0,0 +1,71 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen2.5-0.5B", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "hidden_act": "silu", + "hidden_size": 896, + "initializer_range": 0.02, + "intermediate_size": 4864, + "max_position_embeddings": 32768, + "max_window_layers": 24, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "Qwen/Qwen2.5-0.5B", + "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 128, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 128, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.0.dev2", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 128, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 14, + "num_hidden_layers": 24, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": 32768, + "tie_word_embeddings": true, + "use_cache": true, + "use_mrope": false, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/qwen2/Qwen/Qwen2.5-0.5B/b274da74f7d452704179.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/qwen2/Qwen/Qwen2.5-0.5B/b274da74f7d452704179.json new file mode 100644 index 0000000000000000000000000000000000000000..c1e066ea554df889d980f2fe8f8327b8de5caf03 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/qwen2/Qwen/Qwen2.5-0.5B/b274da74f7d452704179.json @@ -0,0 +1,71 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen2.5-0.5B", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "hidden_act": "silu", + "hidden_size": 896, + "initializer_range": 0.02, + "intermediate_size": 4864, + "max_position_embeddings": 32768, + "max_window_layers": 24, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 4, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "Qwen/Qwen2.5-0.5B", + "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.3.0.dev2", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 14, + "num_hidden_layers": 24, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": 32768, + "tie_word_embeddings": true, + "use_cache": true, + "use_mrope": false, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/qwen2/yujiepan/qwen2.5-128k-tiny-random/50d867d7bf6414aa7f5c.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/qwen2/yujiepan/qwen2.5-128k-tiny-random/50d867d7bf6414aa7f5c.json new file mode 100644 index 0000000000000000000000000000000000000000..f974a74db59886b4f746e5025a150a95bcd1d1d1 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/qwen2/yujiepan/qwen2.5-128k-tiny-random/50d867d7bf6414aa7f5c.json @@ -0,0 +1,75 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/qwen2.5-128k-tiny-random", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "hidden_act": "silu", + "hidden_size": 8, + "initializer_range": 0.02, + "intermediate_size": 16, + "max_position_embeddings": 32768, + "max_window_layers": 1, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 2, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "yujiepan/qwen2.5-128k-tiny-random", + "checkpoint_revision": "c8296d4ca3f87782876d2382fbb6481d1beb8ef0", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 2, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.3.0.dev2", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": { + "factor": 4.0, + "original_max_position_embeddings": 32768, + "rope_type": "yarn", + "type": "yarn" + }, + "rope_theta": 1000000.0, + "sliding_window": 131072, + "tie_word_embeddings": false, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 152064 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/qwen2/yujiepan/qwen2.5-128k-tiny-random/6762cbc52990269daa58.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/qwen2/yujiepan/qwen2.5-128k-tiny-random/6762cbc52990269daa58.json new file mode 100644 index 0000000000000000000000000000000000000000..c3e410477517b4dfc804c32715b0a88df964dcc1 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/qwen2/yujiepan/qwen2.5-128k-tiny-random/6762cbc52990269daa58.json @@ -0,0 +1,75 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/qwen2.5-128k-tiny-random", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "hidden_act": "silu", + "hidden_size": 8, + "initializer_range": 0.02, + "intermediate_size": 16, + "max_position_embeddings": 32768, + "max_window_layers": 1, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "yujiepan/qwen2.5-128k-tiny-random", + "checkpoint_revision": "c8296d4ca3f87782876d2382fbb6481d1beb8ef0", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.0.dev2", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": { + "factor": 4.0, + "original_max_position_embeddings": 32768, + "rope_type": "yarn", + "type": "yarn" + }, + "rope_theta": 1000000.0, + "sliding_window": 131072, + "tie_word_embeddings": false, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 152064 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/qwen2/yujiepan/qwen2.5-128k-tiny-random/899cd61a155b97ddd046.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/qwen2/yujiepan/qwen2.5-128k-tiny-random/899cd61a155b97ddd046.json new file mode 100644 index 0000000000000000000000000000000000000000..71da86815cb70f248396abeea8d175652b2dfaa4 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/qwen2/yujiepan/qwen2.5-128k-tiny-random/899cd61a155b97ddd046.json @@ -0,0 +1,75 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "yujiepan/qwen2.5-128k-tiny-random", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "hidden_act": "silu", + "hidden_size": 8, + "initializer_range": 0.02, + "intermediate_size": 16, + "max_position_embeddings": 32768, + "max_window_layers": 1, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "yujiepan/qwen2.5-128k-tiny-random", + "checkpoint_revision": "c8296d4ca3f87782876d2382fbb6481d1beb8ef0", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.3.0.dev2", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": { + "factor": 4.0, + "original_max_position_embeddings": 32768, + "rope_type": "yarn", + "type": "yarn" + }, + "rope_theta": 1000000.0, + "sliding_window": 131072, + "tie_word_embeddings": false, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 152064 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/qwen3/Qwen/Qwen3-1.7B/079acd550201c732fce8.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/qwen3/Qwen/Qwen3-1.7B/079acd550201c732fce8.json new file mode 100644 index 0000000000000000000000000000000000000000..74af2193065020be3ee3b365d1441205db58fda2 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.3.0.dev2/qwen3/Qwen/Qwen3-1.7B/079acd550201c732fce8.json @@ -0,0 +1,72 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen3-1.7B", + "_task": "text-generation", + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 6144, + "max_position_embeddings": 40960, + "max_window_layers": 28, + "model_type": "qwen3", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 4, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "Qwen/Qwen3-1.7B", + "checkpoint_revision": "0060bc56d46589041c1048efd1a397421b1142b5", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.3.0.dev2", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 16, + "num_hidden_layers": 28, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_014569533dcbafbc3ea9+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_014569533dcbafbc3ea9+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_014569533dcbafbc3ea9+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_014569533dcbafbc3ea9+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_014569533dcbafbc3ea9+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_014569533dcbafbc3ea9+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_014569533dcbafbc3ea9+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..72cd4b445ebea14ec63c3bd52a3bf74935a45d17 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_014569533dcbafbc3ea9+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d37a05e9076514bd247ba643d2dfb34bdca5f1924f390835ca6431fd5d60cf9 +size 80261 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_014569533dcbafbc3ea9+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_014569533dcbafbc3ea9+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..f3ce3ac6393e06ea802e280cf8cf6cc6b1cdd9ba --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_014569533dcbafbc3ea9+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f5670fe5126454249b42797df432aedae725b5486cb2d590368215780fceef68 +size 205824 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_01f5b55fd0ce0ced4fe2+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_01f5b55fd0ce0ced4fe2+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_01f5b55fd0ce0ced4fe2+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_01f5b55fd0ce0ced4fe2+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_01f5b55fd0ce0ced4fe2+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_01f5b55fd0ce0ced4fe2+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_01f5b55fd0ce0ced4fe2+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..25943c86ad01d378af231e8be742f4c651a02ddf --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_01f5b55fd0ce0ced4fe2+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7be5b5a49dcc4c9758c543d4f2cfad320811b45edbd7caeb4ed84223bc9e9ee7 +size 149919 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_01f5b55fd0ce0ced4fe2+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_01f5b55fd0ce0ced4fe2+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..ee9609eb4fcb9af9fcb6e4bb9e6e6ba4b42c5827 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_01f5b55fd0ce0ced4fe2+431f5505/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88f7a3fea94c28696d619f82622b9504234c6b3af7d50102628d12b0952d9484 +size 594944 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_0266cb26c6deb0adcd96+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_0266cb26c6deb0adcd96+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_0266cb26c6deb0adcd96+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_0266cb26c6deb0adcd96+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_0266cb26c6deb0adcd96+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_0266cb26c6deb0adcd96+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_0266cb26c6deb0adcd96+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..f0e9aaa862adcd81237840cf8f54c82bd1327e3b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_0266cb26c6deb0adcd96+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:892c32ed92799e95d2ab40d5a34738b04d2f7143e1f4624f0929e59758e23c64 +size 535749 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_0266cb26c6deb0adcd96+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_0266cb26c6deb0adcd96+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..d76ae94b4a4a396fe00b113ab1a65e61056cb5cc --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_0266cb26c6deb0adcd96+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:81343b6030b10b8e0afc06e398050dec11ff840e24955618dc85d1e7fbc5d78c +size 1772544 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_02f045f6902463c49bce+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_02f045f6902463c49bce+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_02f045f6902463c49bce+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_02f045f6902463c49bce+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_02f045f6902463c49bce+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_02f045f6902463c49bce+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_02f045f6902463c49bce+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..74f086f9a77ea3dbff4417e7e76d349b09a45f06 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_02f045f6902463c49bce+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b1379bf9d9198be3627ce2a98a8bc77592586561dba15736ada02f1a62b03546 +size 46622 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_02f045f6902463c49bce+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_02f045f6902463c49bce+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..e4c6c725bf95acdede558533a717975290a90ed8 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_02f045f6902463c49bce+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4bf046ebf5a86a78e87113f066962dd06a177293863e91ecea5b65641c65b57d +size 144384 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_036bcd15be933a0d28e7+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_036bcd15be933a0d28e7+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_036bcd15be933a0d28e7+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_036bcd15be933a0d28e7+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_036bcd15be933a0d28e7+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_036bcd15be933a0d28e7+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_036bcd15be933a0d28e7+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..bbebb9f453a1bd75c70c8c155948cc8e02e5537e --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_036bcd15be933a0d28e7+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c11c1403ca22d657c1075711aa73aa5591516ea0af30bb5b9d996686c28986c8 +size 882680 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_036bcd15be933a0d28e7+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_036bcd15be933a0d28e7+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..f62a3d2b7dcb62de72c6dd616b3eb3efb582683f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_036bcd15be933a0d28e7+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:98686e443277c12a8b58b7c24fa3c41d30c6d1dfd8f93dcf4fbad61bfac54f65 +size 16292864 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_05ffd8d270e309fd1907+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_05ffd8d270e309fd1907+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_05ffd8d270e309fd1907+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_05ffd8d270e309fd1907+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_05ffd8d270e309fd1907+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_05ffd8d270e309fd1907+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_05ffd8d270e309fd1907+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..89fb2b90befb41896556c5c9f89b02353f2144e2 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_05ffd8d270e309fd1907+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dca9281588d6a3cf8ee5b74d30f801989295e891600352c26bcc2227c6605920 +size 69007 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_05ffd8d270e309fd1907+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_05ffd8d270e309fd1907+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..0638d28445a202c7d1dd7b2be1a65fda5f1ea321 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_05ffd8d270e309fd1907+431f5505/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c925d197d7421f2455f4e15588b3587c5635c019ab6193402275b59fe07ac013 +size 1219584 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_061fdc842caa70a3fe1f+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_061fdc842caa70a3fe1f+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_061fdc842caa70a3fe1f+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_061fdc842caa70a3fe1f+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_061fdc842caa70a3fe1f+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_061fdc842caa70a3fe1f+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_061fdc842caa70a3fe1f+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..6240d7fad258db83a139b204f752988d8fae8c35 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_061fdc842caa70a3fe1f+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7da48877712718e281765474b71b2b7197a4394e8f1c2aa326df846e21f1d6ac +size 563496 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_061fdc842caa70a3fe1f+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_061fdc842caa70a3fe1f+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..c3091c3924c89f79d73ff1496202abf570c2a1a3 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_061fdc842caa70a3fe1f+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf4a8f9b622920e767a57582e10bc9ec2f1aaa1fa3e330093ba80d09a509cf8b +size 1158144 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_0ae7505139b34db6da76+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_0ae7505139b34db6da76+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_0ae7505139b34db6da76+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_0ae7505139b34db6da76+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_0ae7505139b34db6da76+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_0ae7505139b34db6da76+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_0ae7505139b34db6da76+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..b61de38637a4d9f9d698233b40608df201dbb962 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_0ae7505139b34db6da76+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c29aaad2d55b23d820dfe7a867a3081ca69ec7efd6acae2ff16a83c9285e258d +size 46338 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_0ae7505139b34db6da76+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_0ae7505139b34db6da76+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..8c133b3ebc9102f80bd7d9cf6dd5c996bb62a8c8 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_0ae7505139b34db6da76+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f5ea3a262b6d33977dc72f997dce8d228cb0504ba8dfdb3e545789d974d05a2 +size 144384 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_0bbe60dde8eaacbc8218+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_0bbe60dde8eaacbc8218+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_0bbe60dde8eaacbc8218+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_0bbe60dde8eaacbc8218+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_0bbe60dde8eaacbc8218+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_0bbe60dde8eaacbc8218+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_0bbe60dde8eaacbc8218+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..f86c6d883b197fa2f2af4bc8ea4e89e342564b74 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_0bbe60dde8eaacbc8218+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e1d6b91a43d0f9f90c646826c509fe753b4689880129a8155140db6a1621186 +size 46541 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_0bbe60dde8eaacbc8218+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_0bbe60dde8eaacbc8218+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..87e225531acdac05008b4336cb15e99cb3529c40 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_0bbe60dde8eaacbc8218+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4868ab12ff273c74e0a4eb1a1631f62a3ff9692ce6df293df4a6d0d808573392 +size 144384 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_0bbe60dde8eaacbc8218+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_0bbe60dde8eaacbc8218+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..e23051223c3e3d5a42bd38fa7004dc088a2fbe8e --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_0bbe60dde8eaacbc8218+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8905529633425558763cbf12d6c6817d71ab06370472e5e67169aa7a59119f8 +size 152045 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_0f88705903403514996d+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_0f88705903403514996d+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_0f88705903403514996d+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_0f88705903403514996d+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_0f88705903403514996d+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_0f88705903403514996d+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_0f88705903403514996d+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..d082a2234e7eeaebd09433e05b15ab90d4839531 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_0f88705903403514996d+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0991e707bfd4b423cddb443d20b4f5aec4b1c262e379e65b64c27b13445ce083 +size 7099 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_0f88705903403514996d+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_0f88705903403514996d+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..c3b274d0ef56557a38baccd2d144b3cf5d45f6cd Binary files /dev/null and b/neuronxcc-2.17.194.0+d312836f/MODULE_0f88705903403514996d+431f5505/model.neff differ diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_1215feca19e3858f9ef6+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_1215feca19e3858f9ef6+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_1215feca19e3858f9ef6+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_1215feca19e3858f9ef6+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_1215feca19e3858f9ef6+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_1215feca19e3858f9ef6+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_1215feca19e3858f9ef6+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..6d19bba5f796281cf6bdab96649c48970e6bcf1b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_1215feca19e3858f9ef6+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09596247b06e97404db0e3f45a6e58f5ebefe9afc54db0909b7232a603298766 +size 53803 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_1215feca19e3858f9ef6+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_1215feca19e3858f9ef6+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..98a31e148d347c0e24bb57624effdc703a94976a --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_1215feca19e3858f9ef6+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a688781c302c2ccfe6ce93506b7149064bbc731423ba74ef2621e257d3158b7 +size 164864 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_12343f539f64e8427d30+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_12343f539f64e8427d30+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_12343f539f64e8427d30+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_12343f539f64e8427d30+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_12343f539f64e8427d30+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_12343f539f64e8427d30+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_12343f539f64e8427d30+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..746c90a483fbfad7ec871a21d231753a43f5b193 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_12343f539f64e8427d30+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d1289a2c559df7c88f73d3c8226a619c96ef2625ca170e14352c3afcbd373f7 +size 163488 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_12343f539f64e8427d30+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_12343f539f64e8427d30+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..15de543f506f1b67308a9558699494fe6abf5d49 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_12343f539f64e8427d30+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0a7a8b0bd716df3ff6b7191cdd5a90a1ffc2ab74577b2f2cfd80d003a8a98c2 +size 213853184 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_1250cbb754ea34b4df39+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_1250cbb754ea34b4df39+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_1250cbb754ea34b4df39+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_1250cbb754ea34b4df39+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_1250cbb754ea34b4df39+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_1250cbb754ea34b4df39+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_1250cbb754ea34b4df39+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..d70aa60f81f148020768aeaf290bc8ee0707a21b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_1250cbb754ea34b4df39+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fbe405f977ba3d02427fbc57c6e26c42d54a7107829fb44a5b2c93cd232639bc +size 197724 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_1250cbb754ea34b4df39+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_1250cbb754ea34b4df39+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..1dd04df56e99633178d7e0cb98aeecfec2a43583 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_1250cbb754ea34b4df39+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cea7b337842ad8f8189070452b9ab593aeb9b7e4f0f94420722650c1c8eb2853 +size 246784 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_1250cbb754ea34b4df39+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_1250cbb754ea34b4df39+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..80f8860c574aa37721bdd96e25501ee8c5d5bbdc --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_1250cbb754ea34b4df39+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d070b3828aa5b82e01a08ab35b4ad3fc4fe55e3b2a7038d36b8b071f5c43f0b +size 257393 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_13aef406efd76bc89e83+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_13aef406efd76bc89e83+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_13aef406efd76bc89e83+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_13aef406efd76bc89e83+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_13aef406efd76bc89e83+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_13aef406efd76bc89e83+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_13aef406efd76bc89e83+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..5176f3ee876fe791e98a260f91d1ed1a55360076 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_13aef406efd76bc89e83+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9337504de5e6bf6ee8c41d65ccbb10279e0b02d975b02eafe497da5bd7356119 +size 79977 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_13aef406efd76bc89e83+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_13aef406efd76bc89e83+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..cd9dca7a940ac833beb1f2fa8c9d1eeaab71b071 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_13aef406efd76bc89e83+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df9ee181afb449e46b134d2f0a06f66268d86f801e16ef4bf6386fe48b7e99e2 +size 205824 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_14073ca9fb30b9c830ff+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_14073ca9fb30b9c830ff+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_14073ca9fb30b9c830ff+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_14073ca9fb30b9c830ff+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_14073ca9fb30b9c830ff+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_14073ca9fb30b9c830ff+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_14073ca9fb30b9c830ff+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..46f295524e7a9f8b3fb6dcb73ce7f3535a237194 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_14073ca9fb30b9c830ff+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73ecaba55fb149b84609a0fcda804b09a672d6f769b4dbba251cc65c06fb1ffa +size 170118 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_14073ca9fb30b9c830ff+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_14073ca9fb30b9c830ff+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..23a853315ef8a152168971e0614a77f904a50c1e --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_14073ca9fb30b9c830ff+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9635c307d1d4f32af90134a1f3c72119e6a300e86eb5828f374c9092ad311bf +size 216064 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_14073ca9fb30b9c830ff+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_14073ca9fb30b9c830ff+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..ef1bce54d951dce3ca686e792c6713a8c2a2d6dc --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_14073ca9fb30b9c830ff+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df55c6ef9103600cb43b5d4160809987f5681dc20c9a91d0e4b5b1c9c56442de +size 223770 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_14e06fee0b82efd1d1ac+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_14e06fee0b82efd1d1ac+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_14e06fee0b82efd1d1ac+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_14e06fee0b82efd1d1ac+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_14e06fee0b82efd1d1ac+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_14e06fee0b82efd1d1ac+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_14e06fee0b82efd1d1ac+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..c85de92a248a02d84c467a85557369c81279837a --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_14e06fee0b82efd1d1ac+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:71a43f8bc843e3648d0edd9e5ce2e5d6bff4eefa4bfd320c09064611eefe4799 +size 175084 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_14e06fee0b82efd1d1ac+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_14e06fee0b82efd1d1ac+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..4d713af28496e1cdaa9983c85aeb46bae0830100 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_14e06fee0b82efd1d1ac+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d390dafe8fbb260d63d5ec2050bd5e1599ad0abde1ed9b95f82b8e3761a8930 +size 246784 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_14e06fee0b82efd1d1ac+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_14e06fee0b82efd1d1ac+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..775f337fc4c70b9be4139a6c12c478419439e45b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_14e06fee0b82efd1d1ac+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eea4e42db654ecf8f6c4bb716bfc8c97aab96ad2977ccbe3d40bb8a498734859 +size 254559 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_154b94d9a246be73fae7+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_154b94d9a246be73fae7+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_154b94d9a246be73fae7+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_154b94d9a246be73fae7+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_154b94d9a246be73fae7+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_154b94d9a246be73fae7+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_154b94d9a246be73fae7+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..098ce4a2a149182948cfcb3c3c054a380ccb8ca8 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_154b94d9a246be73fae7+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:63751a0bb5e7aa9b0373ea81eb9ba1650b5eb601b4169dd868046c083c38c781 +size 971593 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_154b94d9a246be73fae7+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_154b94d9a246be73fae7+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..9866f2ba36d1b366e5b587bc59c21831197c0256 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_154b94d9a246be73fae7+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d945a6bef3d712dbcb309c056d799eaefa88d0b05342e5420d0f6d014cbc82e +size 3369984 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_154b94d9a246be73fae7+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_154b94d9a246be73fae7+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..d6b00f34f8d0437751a92aa607cd661068dbca47 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_154b94d9a246be73fae7+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2019af15f575af164d7ff719706b72c086c5cadae8453404f2cbd0a16472fcec +size 3543112 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_17d13efb22a842d128de+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_17d13efb22a842d128de+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_17d13efb22a842d128de+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_17d13efb22a842d128de+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_17d13efb22a842d128de+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_17d13efb22a842d128de+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_17d13efb22a842d128de+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..4e960534685e074413aa15a61c5a996943f74e06 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_17d13efb22a842d128de+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f076e176f226686a5ad4f3d489a0bf778feca70606a40c1f0f4a111d1b867c8e +size 2581731 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_17d13efb22a842d128de+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_17d13efb22a842d128de+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..5f154de5b455ee5885d585217d8da36bb919883c --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_17d13efb22a842d128de+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:86d9a195a1a6c2ff84e1c34d3a9d2ccd05bf1a98d65884c97c5bcf6a1824c6fc +size 4905984 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_17d13efb22a842d128de+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_17d13efb22a842d128de+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..1582a1df5afddf5f693726335da6e4bf2f0473bf --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_17d13efb22a842d128de+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b24a2705bfd13773ac1383ef4a68fd9a310e749472d1fac28e4d82dd936db79 +size 5079029 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_1bce819dd943e86ebacb+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_1bce819dd943e86ebacb+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_1bce819dd943e86ebacb+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_1bce819dd943e86ebacb+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_1bce819dd943e86ebacb+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_1bce819dd943e86ebacb+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_1bce819dd943e86ebacb+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..411594502dd598133d11633e47a660f41b4bc2fa --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_1bce819dd943e86ebacb+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:afd240cc4a05841d65b1048379c1685964791ed5c532c39eb202a9d41acc13f4 +size 131996 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_1bce819dd943e86ebacb+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_1bce819dd943e86ebacb+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..aaea93e29b4943609575fea5ce271902126f583f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_1bce819dd943e86ebacb+431f5505/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c72076a88eff79f57d987b4f958b700cbb7b4641b721f223fbca0c7d34424183 +size 1281024 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_1cadac86f33fc48d4ed3+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_1cadac86f33fc48d4ed3+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_1cadac86f33fc48d4ed3+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_1cadac86f33fc48d4ed3+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_1cadac86f33fc48d4ed3+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_1cadac86f33fc48d4ed3+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_1cadac86f33fc48d4ed3+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..d3e9af545b95ddb8246df46b8da6ee7d7937c696 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_1cadac86f33fc48d4ed3+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:194ca1afc0b2393a712fa0d607d4cc450e98a3c006647e3cf659b26887e350dd +size 567050 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_1cadac86f33fc48d4ed3+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_1cadac86f33fc48d4ed3+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..48760aa82865a42fcb27c7f5ee40797530c9bab3 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_1cadac86f33fc48d4ed3+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55a7cd325b039324e57ba4ee9b9d00e4958c5454011569307705d01b1da9545d +size 1147904 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_1e78bd200a100a9daaf0+26ac6be0/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_1e78bd200a100a9daaf0+26ac6be0/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1f7e03e762be6e5f685a6c8f08ee8ef5c0b6f76b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_1e78bd200a100a9daaf0+26ac6be0/compile_flags.json @@ -0,0 +1 @@ +"--enable-saturate-infinity --enable-mixed-precision-accumulation --model-type transformer -O1 --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2' --internal-enable-dge-levels vector_dynamic_offsets --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_1e78bd200a100a9daaf0+26ac6be0/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_1e78bd200a100a9daaf0+26ac6be0/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_1e78bd200a100a9daaf0+26ac6be0/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_1e78bd200a100a9daaf0+26ac6be0/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..835287151af0189e456c43eabea0e9b2f8710236 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_1e78bd200a100a9daaf0+26ac6be0/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e79cc303169501720491636df85a8ef222e195d022e9b0af0c8c9a23aa555c2 +size 81609 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_1e78bd200a100a9daaf0+26ac6be0/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_1e78bd200a100a9daaf0+26ac6be0/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..6263fdba458cb3ad8c2d44dd2adb197d76b3d66b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_1e78bd200a100a9daaf0+26ac6be0/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f9f58954fc76cbb93a6552b432728a6fd38d3787465f175ec0145193a4a866dc +size 359424 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_1ed7027480f23aac5a36+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_1ed7027480f23aac5a36+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_1ed7027480f23aac5a36+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_1ed7027480f23aac5a36+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_1ed7027480f23aac5a36+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_1ed7027480f23aac5a36+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_1ed7027480f23aac5a36+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..4fbcc1c99efd0e019080212514afa1e659d45548 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_1ed7027480f23aac5a36+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0af141684fa8d6e3a930f1253b5861b2ab6d6ca64c63c4dc64c906567e713b25 +size 79977 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_1ed7027480f23aac5a36+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_1ed7027480f23aac5a36+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..2d4a704559a63fd5d9be3dae771071c329edd210 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_1ed7027480f23aac5a36+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:06139007efdc8ef6bafb0c7d75ab3c662473ad3c4ab19369daee603cfd248257 +size 205824 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_1fc614255e6a9d7b109d+841d78e1/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_1fc614255e6a9d7b109d+841d78e1/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..818cfe200b33b363185427f171177827d0934540 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_1fc614255e6a9d7b109d+841d78e1/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/speculation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_1fc614255e6a9d7b109d+841d78e1/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_1fc614255e6a9d7b109d+841d78e1/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_1fc614255e6a9d7b109d+841d78e1/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_1fc614255e6a9d7b109d+841d78e1/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..a8ff6161bee73b0a6b11149b8d832700ec54fe6a --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_1fc614255e6a9d7b109d+841d78e1/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33fdc7be286618543839c1656a87744f5365657343413672bd10a29d7530cac8 +size 1032672 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_1fc614255e6a9d7b109d+841d78e1/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_1fc614255e6a9d7b109d+841d78e1/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..26e4ef49989f66f0b404ea7c0a2744ba2acdfeb8 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_1fc614255e6a9d7b109d+841d78e1/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:843df4540a967409c8538056292b7e9a1245f6f8929c2ac0138d40e35fcea505 +size 2704384 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_20a8ad605d63a097ec04+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_20a8ad605d63a097ec04+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_20a8ad605d63a097ec04+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_20a8ad605d63a097ec04+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_20a8ad605d63a097ec04+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_20a8ad605d63a097ec04+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_20a8ad605d63a097ec04+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..5529a2d64b6b01f284f84fe8ea0b7c335a921feb --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_20a8ad605d63a097ec04+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7fcb4e8851b0c4215eb7a6f9a0a852caae2c097338bdde91b4da140824e622f0 +size 2028263 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_20a8ad605d63a097ec04+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_20a8ad605d63a097ec04+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..4998815a10f0a89828cfb97ca23b1fd32204af33 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_20a8ad605d63a097ec04+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9bb2d9f0caeb843f03b35c29a5fc097ee21e5a947ad65f7c12df468e5e445602 +size 25211904 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_2295a69ee277ef5b31cd+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_2295a69ee277ef5b31cd+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_2295a69ee277ef5b31cd+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_2295a69ee277ef5b31cd+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_2295a69ee277ef5b31cd+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_2295a69ee277ef5b31cd+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_2295a69ee277ef5b31cd+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..c37a21b65677e0f7d7158cce7b9b2d31b2cfb2f5 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_2295a69ee277ef5b31cd+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0aee0c66ffe758681a87713cc171dfcf19e8dfc176924f00d6f2f3e5cc241652 +size 540237 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_2295a69ee277ef5b31cd+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_2295a69ee277ef5b31cd+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..a768d362e2d9d5d3e3c9352a67ef283a89de3fa3 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_2295a69ee277ef5b31cd+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ffa95db94ec779a570456d332c8276ed1329d37ce79e3ba713b23a368e996a4 +size 543744 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_2295a69ee277ef5b31cd+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_2295a69ee277ef5b31cd+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..aee19dd50f6357631739c67209323538e48a1754 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_2295a69ee277ef5b31cd+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce4eb121bc2d59ba1b4fa909741e79b348b26812a9fe1c1e0409574d939d7d78 +size 677911 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_2305ba70fd0a2832ceca+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_2305ba70fd0a2832ceca+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_2305ba70fd0a2832ceca+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_2305ba70fd0a2832ceca+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_2305ba70fd0a2832ceca+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_2305ba70fd0a2832ceca+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_2305ba70fd0a2832ceca+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..9678d5d13094c7cbfe1ed3593336fc92a3d0ba68 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_2305ba70fd0a2832ceca+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40b3c4817ffe70e83920707fc0d6ffc7239e7d0c177dde1e8471140aa5e06d71 +size 163488 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_2305ba70fd0a2832ceca+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_2305ba70fd0a2832ceca+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..223b7f947af4321a8552641f3378050af286aa60 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_2305ba70fd0a2832ceca+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3329c1312200c8e16a2c3db39263f73ea972faf42bf67e01cae053bc5e3beca +size 213853184 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_23f5615c0d551c1e6267+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_23f5615c0d551c1e6267+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_23f5615c0d551c1e6267+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_23f5615c0d551c1e6267+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_23f5615c0d551c1e6267+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_23f5615c0d551c1e6267+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_23f5615c0d551c1e6267+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..abcf462ee174e72df2b6d4b8348f9d25f6cab010 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_23f5615c0d551c1e6267+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:156d8cdb7bc2cf2e38ea4ec1b9df003a6781eed356ee4beb4fc7765f18996bd2 +size 53519 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_23f5615c0d551c1e6267+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_23f5615c0d551c1e6267+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..c9316473f7f2ce8748bc0255c4487d51c2386f0e --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_23f5615c0d551c1e6267+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ca181b2223b6ee5b10a11ed5da089262d192f6e1d60055a06ae54ae54ca4ea2 +size 164864 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_26b84de33e6524d9507a+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_26b84de33e6524d9507a+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_26b84de33e6524d9507a+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_26b84de33e6524d9507a+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_26b84de33e6524d9507a+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_26b84de33e6524d9507a+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_26b84de33e6524d9507a+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..44d2d60463b3c3959fb60fb18e7ad54455b73253 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_26b84de33e6524d9507a+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2df65817e2703ab6983c175431e26c37a0ab5ee333592a6575d221e97a643fc5 +size 81115 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_26b84de33e6524d9507a+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_26b84de33e6524d9507a+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..25db6c990a7294a1c4809845638da06b155ea7c4 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_26b84de33e6524d9507a+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:629254561002f6a231db1c9da209fc813092957c8f2d614063721685a2bb1b47 +size 226304 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_28996c645289206ff473+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_28996c645289206ff473+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_28996c645289206ff473+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_28996c645289206ff473+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_28996c645289206ff473+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_28996c645289206ff473+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_28996c645289206ff473+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..fab2df07043d16d8e4f8be2fd6ab0e9f465ddd16 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_28996c645289206ff473+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd95ce79e3100f0f971325b4e1a0b5d080fb4443153405b5869b9cb4c8870c91 +size 378945 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_28996c645289206ff473+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_28996c645289206ff473+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..8b54245249a63b9679f08fecbb863def34d98f5f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_28996c645289206ff473+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe54150ac740c2430e66289586ee7d6049c299eeb3e0d2db71b5ffe42c844a8a +size 564224 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_29f24c5cd251b3858465+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_29f24c5cd251b3858465+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_29f24c5cd251b3858465+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_29f24c5cd251b3858465+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_29f24c5cd251b3858465+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_29f24c5cd251b3858465+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_29f24c5cd251b3858465+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..1feec1f30330ec06c78b65172cff216f0e6867f6 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_29f24c5cd251b3858465+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e77174854b9cfca7d0a00ea7bdcb8a76168e601e4863a800cd93e5999c4a72f9 +size 970248 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_29f24c5cd251b3858465+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_29f24c5cd251b3858465+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..56d2efb34bc5f5c9b201eefa8630c452ddae81c2 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_29f24c5cd251b3858465+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8de8383c29f051862f83755fd1c27b37ed7198c4a3ad2fb60b7c3df85d25c852 +size 2161664 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_29f24c5cd251b3858465+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_29f24c5cd251b3858465+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..6ebcc397fdd77f7b3bbf49a0fed5b4146ae00ad9 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_29f24c5cd251b3858465+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:16fc610fa821208677e8e0be9e43ce9d87f6be52ed369788828c5ca87b9f9c38 +size 2231293 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_2c2bde636c6bfd7ff3d2+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_2c2bde636c6bfd7ff3d2+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_2c2bde636c6bfd7ff3d2+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_2c2bde636c6bfd7ff3d2+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_2c2bde636c6bfd7ff3d2+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_2c2bde636c6bfd7ff3d2+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_2c2bde636c6bfd7ff3d2+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..3896babd2c7b18e58568f42dd2d98813dfa378a4 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_2c2bde636c6bfd7ff3d2+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b003122b37563ff19e4c2d28bc52b81a23c0510a6e7dd43292f0f777bed842b +size 1887935 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_2c2bde636c6bfd7ff3d2+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_2c2bde636c6bfd7ff3d2+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..81c05a102555a487c8aeb18ffddc37f7bcf16ca5 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_2c2bde636c6bfd7ff3d2+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e6aeaf87f5edff5233503800ffd18f97c872b18ca3ff2ad3ecc68f0305e905c +size 25160704 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_2c3058cc319329755e86+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_2c3058cc319329755e86+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_2c3058cc319329755e86+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_2c3058cc319329755e86+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_2c3058cc319329755e86+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_2c3058cc319329755e86+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_2c3058cc319329755e86+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..ac17e9f6652613272c35300a4932d559cad63a09 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_2c3058cc319329755e86+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aba3cd4ac3324f4b36c4953f5b1acb31a61a6566e91a96aac24cce071aa02e49 +size 120433 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_2c3058cc319329755e86+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_2c3058cc319329755e86+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..ed1e80374c94743d6e5f4a157d5f6519f27b5d94 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_2c3058cc319329755e86+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8f4d837a54ef7c757455e6a04e635189834f0692b26cd6f5fae305a1acfee41 +size 185344 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_2c3058cc319329755e86+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_2c3058cc319329755e86+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..72d4bc7d310a5b63dbcbc92a9b8063ac76aabb35 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_2c3058cc319329755e86+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d21ec9075554f54b62b487e36f84d09e3bd0b0b5ff9b5feadbe62ec18f259299 +size 193010 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_2d5d17bc5aed8b62d1bc+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_2d5d17bc5aed8b62d1bc+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_2d5d17bc5aed8b62d1bc+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_2d5d17bc5aed8b62d1bc+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_2d5d17bc5aed8b62d1bc+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_2d5d17bc5aed8b62d1bc+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_2d5d17bc5aed8b62d1bc+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..1a548604c4387c1b2757443d1f7ef9633e1a437d --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_2d5d17bc5aed8b62d1bc+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f3f22f5a5aed80511ff092bb6c5efd7cee45e557c3b5bd10bdc47754f159d3b +size 540237 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_2d5d17bc5aed8b62d1bc+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_2d5d17bc5aed8b62d1bc+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..d7dd05c8c24936f685af8137b1e30b9603684458 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_2d5d17bc5aed8b62d1bc+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d970a56f621f6c119ee6f8fdebf4107a3c8d50fc760b157ce0f1a87412d41a78 +size 553984 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_2d5d17bc5aed8b62d1bc+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_2d5d17bc5aed8b62d1bc+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..ef99ee290057db16812cf266e1c9ef4d58f3741d --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_2d5d17bc5aed8b62d1bc+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:291b718353dffab0649ec844f053b3402735e48e4fff5c9bef9275c03effb4fe +size 688151 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_2e1c23833792efa0f9c8+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_2e1c23833792efa0f9c8+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_2e1c23833792efa0f9c8+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_2e1c23833792efa0f9c8+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_2e1c23833792efa0f9c8+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_2e1c23833792efa0f9c8+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_2e1c23833792efa0f9c8+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..619a65842b2ffc48ba20f589dcc8bc9879805968 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_2e1c23833792efa0f9c8+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b022dbe3118ee82874ce21f77ed663ca595c5a2581b72ea6e2e9566efd79f748 +size 170200 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_2e1c23833792efa0f9c8+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_2e1c23833792efa0f9c8+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..2ae13e0afdecee38bf24591968f79a3f4923336e --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_2e1c23833792efa0f9c8+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6aac636da500fe56a6f42934c4d48cf4f4c82c5baafa2364d29b7ee1439ead79 +size 216064 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_2e1c23833792efa0f9c8+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_2e1c23833792efa0f9c8+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..612cabdf1f243c6e3af44181cb3cee24c6e28403 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_2e1c23833792efa0f9c8+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e3acaf672bce0cf0f241b88be8135f0bca7b2cb0662e214d3480cd833690b70 +size 223770 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_30c8e5dffb371f5a2fc0+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_30c8e5dffb371f5a2fc0+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_30c8e5dffb371f5a2fc0+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_30c8e5dffb371f5a2fc0+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_30c8e5dffb371f5a2fc0+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_30c8e5dffb371f5a2fc0+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_30c8e5dffb371f5a2fc0+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..7b0878321a326d9651bc711177139b2a61bd19e2 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_30c8e5dffb371f5a2fc0+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:004b9b85c6107fa2c5fa61968fb6480630b9675f8c16dd035cb22224ae5e3334 +size 42250 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_30c8e5dffb371f5a2fc0+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_30c8e5dffb371f5a2fc0+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..f5babce32d19e552cb665a370126c4c31ac950d2 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_30c8e5dffb371f5a2fc0+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b9b1d05f5b7ba9ea8e949c79029fc93bceb57a5bce6f6406342526f04b04d02 +size 164864 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_30c8e5dffb371f5a2fc0+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_30c8e5dffb371f5a2fc0+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..0465c7bcc35d7e06eb47e6b93939579c1625d81e --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_30c8e5dffb371f5a2fc0+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ac05beb6194393c1ed9395f1597491351292740aa8c66f0d8178bdd0765f85d +size 172461 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_3192063576450b7b66df+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_3192063576450b7b66df+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_3192063576450b7b66df+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_3192063576450b7b66df+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_3192063576450b7b66df+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_3192063576450b7b66df+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_3192063576450b7b66df+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..34ea2b7be93b8e31055d0fd5bfce8bab64882789 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_3192063576450b7b66df+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bacdeeef300c5c2d0e96727f7fa21475aa2b6423e02bbdeb9a14676b19ec8195 +size 143137 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_3192063576450b7b66df+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_3192063576450b7b66df+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..b06fda2664af8c4bc693274ad12a9245323cd786 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_3192063576450b7b66df+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45d8b80a8c94b7dbe95af2e17930caf5f111fde2354654744ee0802bc22c9988 +size 185344 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_3192063576450b7b66df+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_3192063576450b7b66df+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..6805760afc02a2cd5497a72df5aef4bcf05d973a --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_3192063576450b7b66df+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e4988382c18c5913bd1d53fa95ab5491cce3fead13881bc3fb5e82bc186578f +size 195507 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_32582ba4c0f78135ecd9+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_32582ba4c0f78135ecd9+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_32582ba4c0f78135ecd9+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_32582ba4c0f78135ecd9+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_32582ba4c0f78135ecd9+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_32582ba4c0f78135ecd9+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_32582ba4c0f78135ecd9+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..732a9d29e72fdd5e77d349bdd738d22afd57b810 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_32582ba4c0f78135ecd9+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea2f4fdd2dc3f40fa53b583672fd85c31699c42c5cbfd33752b42f91bbd3b3e5 +size 1627840 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_32582ba4c0f78135ecd9+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_32582ba4c0f78135ecd9+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..c32b15b04dc488e56cb9bf5dbb51d03157585f0b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_32582ba4c0f78135ecd9+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3b4beff862687dfddad2ee3158199866eeee4ab5a768e6e70be5976b38b6ef3 +size 8571904 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_32582ba4c0f78135ecd9+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_32582ba4c0f78135ecd9+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..8b3d3cadd09ef7e521205349f32e5155b3a5d301 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_32582ba4c0f78135ecd9+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a8e5029c7adc21a9b08f58f731aaa798e8d0c60ad7d9ac423c8fd932f200bd5 +size 8709155 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_357d74bcc250c7615bf7+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_357d74bcc250c7615bf7+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_357d74bcc250c7615bf7+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_357d74bcc250c7615bf7+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_357d74bcc250c7615bf7+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_357d74bcc250c7615bf7+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_357d74bcc250c7615bf7+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..b2dfc9e84d051f93581a365c3a69a8a8bcb2a369 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_357d74bcc250c7615bf7+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a707c38f55510f08cc4791c50f8cbd9ad09725f95e8452a5ba2b64c6bcb1f67 +size 112633 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_357d74bcc250c7615bf7+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_357d74bcc250c7615bf7+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..0e39296f988ee211d9d8290fd1467a335cc9f4d0 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_357d74bcc250c7615bf7+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8fd924bca325e89e065fdc0bdf20698ef7921237c82218668160464b433b3fe3 +size 154624 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_3596a0566dd0e4bfadf9+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_3596a0566dd0e4bfadf9+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_3596a0566dd0e4bfadf9+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_3596a0566dd0e4bfadf9+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_3596a0566dd0e4bfadf9+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_3596a0566dd0e4bfadf9+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_3596a0566dd0e4bfadf9+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..d390d6162c244b31fe5f9a0922a8c3b7a41d17d9 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_3596a0566dd0e4bfadf9+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c7dd29a073f3fa0392d95c2627579d6ad44d7a5df8449a558245ff6fba8761a +size 694657 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_3596a0566dd0e4bfadf9+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_3596a0566dd0e4bfadf9+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..8ef5031a672b1673f35ccdcba19fcb5ff4b721b1 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_3596a0566dd0e4bfadf9+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da9051456594b2157cf3f4b4f6f03942ae53455e63b1052e964e371ac229924d +size 3533824 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_3596a0566dd0e4bfadf9+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_3596a0566dd0e4bfadf9+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..b07c203f77e5e7aadb15b452ff16eaac0e4d05c3 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_3596a0566dd0e4bfadf9+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8b8eb69752ecbe66175f39164781b74cd2ca14963551884beef69399aa01b6b +size 3663806 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_37bbedb3065730ed804d+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_37bbedb3065730ed804d+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_37bbedb3065730ed804d+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_37bbedb3065730ed804d+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_37bbedb3065730ed804d+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_37bbedb3065730ed804d+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_37bbedb3065730ed804d+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..6a0997fc6dea40f63e76f38a65e9305f420d6b14 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_37bbedb3065730ed804d+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c995d029f5e6638ba2c5b3ba53974db125901dd91d08b036492ec788838a860 +size 175084 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_37bbedb3065730ed804d+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_37bbedb3065730ed804d+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..2280c026ff31c3c721352f413b97650d2c6f440d --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_37bbedb3065730ed804d+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:699c332e9b4e3a30d3c13f7d463f314e86432089b6081068f6c1bb903332c2c0 +size 246784 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_37bbedb3065730ed804d+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_37bbedb3065730ed804d+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..d70db4cee13e5afa91e2543518a0ab643e4fcafd --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_37bbedb3065730ed804d+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8dee360f558901db6900c8d561db2945dcbef7feb990479f630016494f80fc2d +size 254559 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_389d56c4ece62e7f27bf+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_389d56c4ece62e7f27bf+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_389d56c4ece62e7f27bf+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_389d56c4ece62e7f27bf+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_389d56c4ece62e7f27bf+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_389d56c4ece62e7f27bf+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_389d56c4ece62e7f27bf+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..de3bf9f18b5ea927bd2e28fd6091f2ebfae179f8 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_389d56c4ece62e7f27bf+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c29860ef74850d2f987f6bf2ef0b807dea2b0f7b1325e879b5c8ccf102522cf7 +size 172375 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_389d56c4ece62e7f27bf+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_389d56c4ece62e7f27bf+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..234e74fb643dc8bd74db8273e1032732ca5bdfba --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_389d56c4ece62e7f27bf+431f5505/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2bbe7f251e8d317c8c4ada3ca1cacaf4840ead4adc50f28f8dbb59e129735b8f +size 2243584 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_39acf6f0bda0ded27c43+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_39acf6f0bda0ded27c43+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_39acf6f0bda0ded27c43+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_39acf6f0bda0ded27c43+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_39acf6f0bda0ded27c43+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_39acf6f0bda0ded27c43+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_39acf6f0bda0ded27c43+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..c3ad759ffe280adf83f10a76fa5d9316e02eaffe --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_39acf6f0bda0ded27c43+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4e6c084f20b2d486c6b89853774630543769d5a08209ccbef74cb26d6f91ec0 +size 684127 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_39acf6f0bda0ded27c43+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_39acf6f0bda0ded27c43+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..e94cf40ca1d9fa3b73250acfd741d3de656b5ac9 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_39acf6f0bda0ded27c43+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cbc3f50b81cf52bb14156c7a68e925b374160fda2e0f7b3204ce1aa556de183d +size 19487744 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_3b4391082ea94bb985dd+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_3b4391082ea94bb985dd+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_3b4391082ea94bb985dd+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_3b4391082ea94bb985dd+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_3b4391082ea94bb985dd+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_3b4391082ea94bb985dd+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_3b4391082ea94bb985dd+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..b3d44cb31f591666ecab33d6df40cf35928859e9 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_3b4391082ea94bb985dd+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb9aedabec1b9544b48b353e2a649d4029e73996ae0381582b2035ad33d17024 +size 833960 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_3b4391082ea94bb985dd+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_3b4391082ea94bb985dd+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..de4a40c5e46fea76ae401acffc49d6849dea8c07 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_3b4391082ea94bb985dd+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:799d6c2e202f01a8cf15fc7e0e795d1ce5618d204549943c0daa79586b1ac09a +size 32103424 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_3c6243e358220353ba94+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_3c6243e358220353ba94+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_3c6243e358220353ba94+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_3c6243e358220353ba94+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_3c6243e358220353ba94+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_3c6243e358220353ba94+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_3c6243e358220353ba94+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..ad29d3a5941cfb376b35d4a5183e402c138fb699 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_3c6243e358220353ba94+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb7ea35f87ba50a601a1ae0efe18ff3e43303263bfbfad87867d741509d3dec6 +size 159819 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_3c6243e358220353ba94+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_3c6243e358220353ba94+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..0ec751be8c9c39f707a897e4ebe1f2379621071e --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_3c6243e358220353ba94+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85b237fb6ce83ee0c2fc85cd3ee4e6fdf0e6c95f731520b402e0721c63facc39 +size 216064 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_3da0cc0b0cb73796573e+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_3da0cc0b0cb73796573e+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_3da0cc0b0cb73796573e+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_3da0cc0b0cb73796573e+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_3da0cc0b0cb73796573e+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_3da0cc0b0cb73796573e+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_3da0cc0b0cb73796573e+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..d53c0401a29441a9341db9ed8e0ec9d409efa0e9 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_3da0cc0b0cb73796573e+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a82326b1df270e0e2fbc4a80381418ba02230d8fdc88844e08e0cda0db8f23c +size 1474893 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_3da0cc0b0cb73796573e+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_3da0cc0b0cb73796573e+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..67938bef5ad3b2c69c38e32e7a7942679f415d88 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_3da0cc0b0cb73796573e+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:246e7fbdcee16934a3d8b7e9d9267ac95140dab4c3f583fcaf3142095e0c03a0 +size 1475584 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_3da0cc0b0cb73796573e+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_3da0cc0b0cb73796573e+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..ae174d93d4802d1c3e08acfe362f9c6beb55715b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_3da0cc0b0cb73796573e+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:658beadac2a5c00d5feb6f0cd3be153c63b27426e19430c2530ad276a4cbe653 +size 1620495 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_3fd5107b3bbf73dca906+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_3fd5107b3bbf73dca906+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_3fd5107b3bbf73dca906+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_3fd5107b3bbf73dca906+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_3fd5107b3bbf73dca906+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..8fc6b368ba6faa0be28fa7e38133e85747865a8d --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_3fd5107b3bbf73dca906+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4da71cd8c2d73b196d80edfd3a059991fe59320506d544bab5f7c789c0ce46b3 +size 2550812 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_41612d58bf9ba6f268b0+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_41612d58bf9ba6f268b0+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_41612d58bf9ba6f268b0+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_41612d58bf9ba6f268b0+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_41612d58bf9ba6f268b0+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_41612d58bf9ba6f268b0+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_41612d58bf9ba6f268b0+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..eb7cac970b8a89127f14f94aa95f7772256988f2 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_41612d58bf9ba6f268b0+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea173b3def731778aae171bcbbb909ddc1467d1eb8304f5dde6f6ef914e45ec0 +size 87577 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_41612d58bf9ba6f268b0+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_41612d58bf9ba6f268b0+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..b753a127faa16ee6be99736cb6c2d1f1dfe5602a --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_41612d58bf9ba6f268b0+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:747f2f2298b6ad897eedb7a9ae8b4184a1dad89956bfffa354da44231c8d1cd9 +size 246784 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_44e4fd5eeb6561e287c8+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_44e4fd5eeb6561e287c8+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_44e4fd5eeb6561e287c8+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_44e4fd5eeb6561e287c8+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_44e4fd5eeb6561e287c8+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_44e4fd5eeb6561e287c8+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_44e4fd5eeb6561e287c8+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..7bb0b120bff742c4ce614760a13603b3ae8c63d9 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_44e4fd5eeb6561e287c8+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d61fbae86b10dd4e6cf69425f1150b9b764adb3a04f29eab1f0ee31dc18f1c1 +size 159819 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_44e4fd5eeb6561e287c8+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_44e4fd5eeb6561e287c8+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..610234eecae5adb5d685f47ccd06156122760593 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_44e4fd5eeb6561e287c8+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b788954a3ac03a763f2c1a6abe75cc69b038eb4838ab71fa49c38aceb156595 +size 216064 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_462f82cae5a4b2e24531+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_462f82cae5a4b2e24531+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_462f82cae5a4b2e24531+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_462f82cae5a4b2e24531+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_462f82cae5a4b2e24531+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_462f82cae5a4b2e24531+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_462f82cae5a4b2e24531+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..807ec0d3cdbde8179a51bbac7f8124feb76db4b4 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_462f82cae5a4b2e24531+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb9699a4e8777c6749ca1b08e97ee2988e45d0c0564a56ebfbde8dbae695b870 +size 175002 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_462f82cae5a4b2e24531+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_462f82cae5a4b2e24531+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..7b3f2756a3557a814dac45acd7593f4416dc2c5d --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_462f82cae5a4b2e24531+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15f384e9e62bf76d9e2a2a0d17225b8ec3b0a10cf597959c0e79792442cd5e4d +size 246784 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_462f82cae5a4b2e24531+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_462f82cae5a4b2e24531+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..5fcb7dc7d3b27ef62bad829b3132d2c7275fe7f1 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_462f82cae5a4b2e24531+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:efd0c20afae665a8c066e0018140480d3f50b1069a17f201ee7820bb2ac56c7c +size 254559 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_46b9d2bfbdf1b2752484+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_46b9d2bfbdf1b2752484+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_46b9d2bfbdf1b2752484+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_46b9d2bfbdf1b2752484+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_46b9d2bfbdf1b2752484+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_46b9d2bfbdf1b2752484+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_46b9d2bfbdf1b2752484+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..619394612635f8be14b36ee82d229dc5bff4337f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_46b9d2bfbdf1b2752484+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ccbb8991ce4c08190c00f2f5df8080b34521b6b52fc50a720b11ab9f0d4965e4 +size 69044 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_46b9d2bfbdf1b2752484+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_46b9d2bfbdf1b2752484+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..1d95f42494c2502affecb4920bb1b7dfc48e70b9 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_46b9d2bfbdf1b2752484+431f5505/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04609027b9bb96427b8e7010fdbb9040b869e374401486d32ac650a12e642f99 +size 1158144 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_47164c525d06157b6333+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_47164c525d06157b6333+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_47164c525d06157b6333+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_47164c525d06157b6333+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_47164c525d06157b6333+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_47164c525d06157b6333+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_47164c525d06157b6333+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..fa5b4c47f0c9efb5e5eac3f3f580ad9b50eab2d7 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_47164c525d06157b6333+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5caed578255a4ac824b20657d446241c513d5961d789b7cc1e5b5fd8d856cb1d +size 132732 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_47164c525d06157b6333+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_47164c525d06157b6333+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..c0e27720fdb25c601e75abfda95a7739d7c4a07d --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_47164c525d06157b6333+431f5505/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f08b1c452783b4baf8f2b5f6defedc167246f4ef34870a88a1b6bb5611fd1d0e +size 1281024 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_47e3db014921f702834c+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_47e3db014921f702834c+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_47e3db014921f702834c+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_47e3db014921f702834c+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_47e3db014921f702834c+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_47e3db014921f702834c+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_47e3db014921f702834c+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..b4a4e3bdf79b1b3a43dd42b5c82639d0f104800a --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_47e3db014921f702834c+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45a358d2864f1e64fdebdf422142db62e9f545f381aeabbd85b3a74942e38345 +size 613271 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_47e3db014921f702834c+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_47e3db014921f702834c+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..d11e6c0c55912155c50f3eac49cedeb4465b7b1c --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_47e3db014921f702834c+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f8efa0af5aff936df88b6f2d84dde3f657fd52047e3b3ea53c4ad6ab86397b46 +size 1209344 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_47e3db014921f702834c+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_47e3db014921f702834c+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..50dd61b7b32261d2210b1fb44197b3b63478eafe --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_47e3db014921f702834c+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14c269c276103bbf1025162f907e7f4bb0d5ed44e1552be0b309e5d825b20f5d +size 1354370 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_4a2733ad0c48499b3cb1+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_4a2733ad0c48499b3cb1+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_4a2733ad0c48499b3cb1+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_4a2733ad0c48499b3cb1+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_4a2733ad0c48499b3cb1+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_4a2733ad0c48499b3cb1+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_4a2733ad0c48499b3cb1+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..faca943254b5b0c6d5b7f7d259921ca5ecd20bb9 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_4a2733ad0c48499b3cb1+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c6c331526b8f81328de18d402f353bb1407e0e23fd156d22c02bfb034a6e206b +size 172368 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_4a2733ad0c48499b3cb1+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_4a2733ad0c48499b3cb1+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..a902545e4e1c2afed7ef585789bede78e6dce365 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_4a2733ad0c48499b3cb1+431f5505/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c003282b2e3e4ae95ae4b5cd9d54e32380fc65dce9bfe986514fe59cef7835c3 +size 2243584 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_4c948ba1d275cea9b124+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_4c948ba1d275cea9b124+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_4c948ba1d275cea9b124+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_4c948ba1d275cea9b124+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_4c948ba1d275cea9b124+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_4c948ba1d275cea9b124+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_4c948ba1d275cea9b124+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..2cb9abfb8e1ee1c3a50d91035b5819017ac4f072 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_4c948ba1d275cea9b124+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9cce13c2c8d6e621050c4f6d2a392fb09e41dd7942671696a8fe6a6738dfe181 +size 10362 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_4c948ba1d275cea9b124+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_4c948ba1d275cea9b124+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..8a131482b172f4d5bcdd500e7c733b18c4dab230 Binary files /dev/null and b/neuronxcc-2.17.194.0+d312836f/MODULE_4c948ba1d275cea9b124+431f5505/model.neff differ diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_4e28e03424738806f8ca+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_4e28e03424738806f8ca+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_4e28e03424738806f8ca+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_4e28e03424738806f8ca+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_4e28e03424738806f8ca+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_4e28e03424738806f8ca+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_4e28e03424738806f8ca+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..852b4343bbf535442bfd7968bd4e70da4a1b43f2 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_4e28e03424738806f8ca+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc52c78a504427be98bec0efa83bc40a4177085a749ee7d7925ee0136db6d2ce +size 842826 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_4e28e03424738806f8ca+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_4e28e03424738806f8ca+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..a5aed410bde1ac5abfc5d79773b453d9fa00a074 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_4e28e03424738806f8ca+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa7858cc532af479f3724179495879fea25ddc59a0a0c269f88056ab467020ac +size 6503424 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_506394735e22714a6fbc+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_506394735e22714a6fbc+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_506394735e22714a6fbc+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_506394735e22714a6fbc+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_506394735e22714a6fbc+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_506394735e22714a6fbc+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_506394735e22714a6fbc+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..94d88881a8b837b98369c9ec0808183fd14f2db2 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_506394735e22714a6fbc+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8dd89891855efbf46d77661bc94ffa5c299634698c6b824b61b5c8760a06785c +size 1214876 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_506394735e22714a6fbc+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_506394735e22714a6fbc+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..821f634f583a8beb92f22dcba0d0b3bcbdb717b2 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_506394735e22714a6fbc+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:167deb75f8fa57846d3f92d9eaf136b80fe3a20586b0b464d036f126dd44389e +size 1168384 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_5147f0f8001e6350b5d5+bfc62e4c/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_5147f0f8001e6350b5d5+bfc62e4c/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..892fab43157a43dc2f6c0bc74da09f06fb265ea9 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_5147f0f8001e6350b5d5+bfc62e4c/compile_flags.json @@ -0,0 +1 @@ +"--enable-saturate-infinity --enable-mixed-precision-accumulation --model-type transformer -O1 --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2' --internal-enable-dge-levels vector_dynamic_offsets --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_5147f0f8001e6350b5d5+bfc62e4c/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_5147f0f8001e6350b5d5+bfc62e4c/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_5147f0f8001e6350b5d5+bfc62e4c/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_5147f0f8001e6350b5d5+bfc62e4c/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..2d32bbdd4f7a5360a24bf2aff036c20fb3df19ed --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_5147f0f8001e6350b5d5+bfc62e4c/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:885f72461238da9773ec54f9077526c61272e9168da7e56bb3714173bbfd247f +size 189416 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_5147f0f8001e6350b5d5+bfc62e4c/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_5147f0f8001e6350b5d5+bfc62e4c/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..5f259c978846ea13f189f62a61e296f9b16bf398 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_5147f0f8001e6350b5d5+bfc62e4c/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a57b155a9bc0950a11fd4958b21e8f04e2005f9e6fc5acfa44cb2839078dd59 +size 267264 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_5147f0f8001e6350b5d5+bfc62e4c/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_5147f0f8001e6350b5d5+bfc62e4c/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..bb25794e8a3f66eee4d39a35701f5ba09bb1ecc2 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_5147f0f8001e6350b5d5+bfc62e4c/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07386a196931d31078b1e2df77fac94f7836e56fd8157ceba41755b17a782462 +size 278562 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_5233b5f6cf574796f38a+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_5233b5f6cf574796f38a+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_5233b5f6cf574796f38a+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_5233b5f6cf574796f38a+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_5233b5f6cf574796f38a+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_5233b5f6cf574796f38a+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_5233b5f6cf574796f38a+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..6119318c336c8f78ac75bb168bbc986b1c5f59d8 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_5233b5f6cf574796f38a+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b711200dfd242da19e652bc20b035b0c3436c19ecf253b6a5f2d0f22febb349a +size 172324 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_5233b5f6cf574796f38a+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_5233b5f6cf574796f38a+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..f3cdf9ae90b0df8ca1fd3642d8760d66b72bae73 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_5233b5f6cf574796f38a+431f5505/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d97120bee25e3b2ff4a812692964b8c62248afb0d20652b898baa6856e8966f +size 2397184 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_53400f8937ebf3494e5f+26ac6be0/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_53400f8937ebf3494e5f+26ac6be0/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1f7e03e762be6e5f685a6c8f08ee8ef5c0b6f76b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_53400f8937ebf3494e5f+26ac6be0/compile_flags.json @@ -0,0 +1 @@ +"--enable-saturate-infinity --enable-mixed-precision-accumulation --model-type transformer -O1 --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2' --internal-enable-dge-levels vector_dynamic_offsets --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_53400f8937ebf3494e5f+26ac6be0/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_53400f8937ebf3494e5f+26ac6be0/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_53400f8937ebf3494e5f+26ac6be0/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_53400f8937ebf3494e5f+26ac6be0/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..a3151e16035bb377efcc98c9e86327e3df98403e --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_53400f8937ebf3494e5f+26ac6be0/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:afa08a2197418946b128018941e30011676c3fcf15e6e0e9d001191e50975203 +size 217277 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_53400f8937ebf3494e5f+26ac6be0/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_53400f8937ebf3494e5f+26ac6be0/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..c5ef86b5a6cfa94045cb6e63aea5032a3b32cbf2 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_53400f8937ebf3494e5f+26ac6be0/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:818181934eb62f57d81f84e9e61ec5bd71204c948e995f09ffd21d6bf80afdb7 +size 369664 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_537ba05ff4cad22bf765+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_537ba05ff4cad22bf765+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_537ba05ff4cad22bf765+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_537ba05ff4cad22bf765+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_537ba05ff4cad22bf765+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_537ba05ff4cad22bf765+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_537ba05ff4cad22bf765+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..539266b9843547c38764e29fef288c878baa0631 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_537ba05ff4cad22bf765+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a291625187a1f7883041ff8f28a2f6c2fbaf81cb2ac415cff81ef8302f22c530 +size 1534341 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_537ba05ff4cad22bf765+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_537ba05ff4cad22bf765+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..80fb54dab72b6cdceb101b4548f3f0a601c5ab0e --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_537ba05ff4cad22bf765+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ea59e1967c4a21d13fb1450c5832c29a6186ce61bf6a02a5ec97db0999a6fdd +size 6226944 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_5459a61db398caa4e50a+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_5459a61db398caa4e50a+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_5459a61db398caa4e50a+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_5459a61db398caa4e50a+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_5459a61db398caa4e50a+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_5459a61db398caa4e50a+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_5459a61db398caa4e50a+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..d78b0c6a1a09cde2b62d4bdbfdd4382574fe28d8 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_5459a61db398caa4e50a+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:399ea0cecb79d737af165310850865468334bd30b7c44a83a8d0a6fc47ce961a +size 149919 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_5459a61db398caa4e50a+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_5459a61db398caa4e50a+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..ea564e18e7ed131d41e78ab5e6fe826b9e434408 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_5459a61db398caa4e50a+431f5505/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6526c6a9f9f6c2f66e506334fccf76bf20848811464579f3af1de57a04c8e4e6 +size 594944 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_57517a23a64ac02bfe43+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_57517a23a64ac02bfe43+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_57517a23a64ac02bfe43+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_57517a23a64ac02bfe43+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_57517a23a64ac02bfe43+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_57517a23a64ac02bfe43+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_57517a23a64ac02bfe43+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..dacb08764b602024ec2b053f99c1271d8694c8ad --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_57517a23a64ac02bfe43+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:519727684ec5f38496af8d18e5a993a235e95305440a05037262064f5610ed5c +size 2273224 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_57517a23a64ac02bfe43+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_57517a23a64ac02bfe43+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..d24168ff0f3a66e6b21fcfee33b833790390ab10 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_57517a23a64ac02bfe43+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:160f89aae54dc286c1f7bc881d5aa22f7a18244839f3f502c84f6fcc34dfa21b +size 6411264 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_575f797c9c6fc13486af+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_575f797c9c6fc13486af+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_575f797c9c6fc13486af+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_575f797c9c6fc13486af+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_575f797c9c6fc13486af+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_575f797c9c6fc13486af+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_575f797c9c6fc13486af+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..ce2a3bf3336f6bb70e419807e84fec6a110ed816 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_575f797c9c6fc13486af+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:46b2ed73a544b049277a3e081d48cd74f24a067c1c0d8df51acf59a9db985d1e +size 2400727 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_575f797c9c6fc13486af+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_575f797c9c6fc13486af+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..4f4be35ee915d4e394e25914879a78be7501cb45 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_575f797c9c6fc13486af+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85dd8e9800800942b8f4f44efc280befa8d4d41e1d9a489a9440de4d6f0e3768 +size 4885504 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_575f797c9c6fc13486af+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_575f797c9c6fc13486af+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..a11f06f5eadbe7ba1ca0165dd4e2966757d866df --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_575f797c9c6fc13486af+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a8781e8141bf01e24b6e3d3eb546d7fe388d234ecfd5bca446268e7a007ed7d +size 5058549 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_589baa5ead70f9ec464a+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_589baa5ead70f9ec464a+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_589baa5ead70f9ec464a+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_589baa5ead70f9ec464a+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_589baa5ead70f9ec464a+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_589baa5ead70f9ec464a+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_589baa5ead70f9ec464a+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..9211fba2aba3128de46f1f5f973d6d901eb8f456 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_589baa5ead70f9ec464a+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:401233f1288017ae6d1a5139e126e22161c3152a6145c923b5b78040dd3f8c83 +size 171444 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_589baa5ead70f9ec464a+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_589baa5ead70f9ec464a+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..9b41ed5096109bd095ef6d5bb110406ff6dfaf16 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_589baa5ead70f9ec464a+431f5505/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2eae1d3728ce2c5fe18d5df1ddec5217ef5ad2b4a0973aea8e3e9707bcc9507 +size 7353344 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_58ceff19c65e10158e3f+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_58ceff19c65e10158e3f+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_58ceff19c65e10158e3f+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_58ceff19c65e10158e3f+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_58ceff19c65e10158e3f+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_58ceff19c65e10158e3f+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_58ceff19c65e10158e3f+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..4f49fcfad1163e734c145c2ceb69ec19733b7554 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_58ceff19c65e10158e3f+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f345bcdf3b91a6a4e1a747855943ee57d1861dd9a3c0cca95b9bce913fc3153 +size 120433 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_58ceff19c65e10158e3f+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_58ceff19c65e10158e3f+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..1845c1c3ff59da967f1dd202d350f10bd763e133 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_58ceff19c65e10158e3f+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa327af0bd70b02e867e3e87943a23e672dc704f9c52a2343068ad6388a712c3 +size 185344 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_58ceff19c65e10158e3f+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_58ceff19c65e10158e3f+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..838fe83162d53620ed2315ec202266038449d721 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_58ceff19c65e10158e3f+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb2d414a87a831fc5a1f7f85fb9dff8a818c28e77bad76b248cdf808e3720cd8 +size 193010 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_5995189e21fa80e009d5+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_5995189e21fa80e009d5+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_5995189e21fa80e009d5+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_5995189e21fa80e009d5+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_5995189e21fa80e009d5+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_5995189e21fa80e009d5+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_5995189e21fa80e009d5+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..029f0e93c4340fe229b051c5b91f7e238d5cfaec --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_5995189e21fa80e009d5+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8fc852967a2ebbac25ed57529b2948f81f6bdaafb51c59e38ab4d9fd5647e796 +size 161606 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_5995189e21fa80e009d5+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_5995189e21fa80e009d5+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..e3b3a8ea1db87c2c39477faeba3296794d5ba968 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_5995189e21fa80e009d5+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:acc8a1920bd9b425d09dc0c8ceaef87904da7deb1f527be2ec0537f4e05e596c +size 236544 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_5a10198534c5f2725fd7+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_5a10198534c5f2725fd7+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_5a10198534c5f2725fd7+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_5a10198534c5f2725fd7+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_5a10198534c5f2725fd7+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_5a10198534c5f2725fd7+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_5a10198534c5f2725fd7+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..4ec6bbca2edf6e0fe9b91b0191e44c7e399d8c98 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_5a10198534c5f2725fd7+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:139f7b04dda15aec79d28e9bf545c6e1eeece9fb0c01daa83243cf65ba4b6c0e +size 51946 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_5a10198534c5f2725fd7+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_5a10198534c5f2725fd7+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..d3899b5557af9a01a3fa82ea76208b383111139d --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_5a10198534c5f2725fd7+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:16b2b7e5fff429c100db385331042c2811d2d7549b39531f3bb1366ba816b73a +size 185344 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_5a10198534c5f2725fd7+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_5a10198534c5f2725fd7+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..f3685b81ae502564e0b30e3e64c24a73ebece278 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_5a10198534c5f2725fd7+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1dff4fc9a5a0665d40a52fee48039ca1e7f406e579c1a8537ee198d319618005 +size 195507 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_5a423b91dc8b9a373b12+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_5a423b91dc8b9a373b12+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_5a423b91dc8b9a373b12+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_5a423b91dc8b9a373b12+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_5a423b91dc8b9a373b12+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..c99cbcacc2327486cbc3518d8d009679d2f104b1 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_5a423b91dc8b9a373b12+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d33a1cd799da49294617b2445cab66236a026ac0e51133ae310607b88893c2e3 +size 945923 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_5dadd23af77b433b987a+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_5dadd23af77b433b987a+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_5dadd23af77b433b987a+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_5dadd23af77b433b987a+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_5dadd23af77b433b987a+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_5dadd23af77b433b987a+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_5dadd23af77b433b987a+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..575d38254c088edac374d254a80350780620f1a0 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_5dadd23af77b433b987a+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b0b98c40c8af21949e37b444f5af460a1e3340ee18cf0fcc6d649bac4a7797d +size 904662 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_5dadd23af77b433b987a+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_5dadd23af77b433b987a+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..336af7a29741636b52985c635cb0a37631efbb6c --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_5dadd23af77b433b987a+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c07dfa82b4cfb6180dc5520b83ff4cc71b82977ce5b8f9a9985696f707a2228a +size 36506624 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_5e05631e9022e187a8b9+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_5e05631e9022e187a8b9+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_5e05631e9022e187a8b9+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_5e05631e9022e187a8b9+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_5e05631e9022e187a8b9+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..df69480f80b87175ee027b3999114ed9e7f23246 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_5e05631e9022e187a8b9+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f2d0772cf3bdd271e9fe396d7ac39333b80ba4a64d2cce91bb55ebbea5490919 +size 917457 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_5e3af8e039ed38dc1451+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_5e3af8e039ed38dc1451+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_5e3af8e039ed38dc1451+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_5e3af8e039ed38dc1451+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_5e3af8e039ed38dc1451+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_5e3af8e039ed38dc1451+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_5e3af8e039ed38dc1451+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..8fe4ec612f27170c25d41b3b0c6070a13afeab6e --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_5e3af8e039ed38dc1451+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:603218278a42c788949131e26b851876358cc9ea52800c5c25e2ac44f5c507c3 +size 2210522 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_5e3af8e039ed38dc1451+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_5e3af8e039ed38dc1451+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..95e6528a0bab29467db07f5a4f8d47a282e04360 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_5e3af8e039ed38dc1451+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:495da33df4a2e3e0917e8956f17b6483ae15866e56a37dfac502145856c9eef4 +size 6319104 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_5ee958efa9d808fa7242+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_5ee958efa9d808fa7242+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_5ee958efa9d808fa7242+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_5ee958efa9d808fa7242+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_5ee958efa9d808fa7242+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..b05700abe163df17e1594d5e6060449eb6ff5dd3 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_5ee958efa9d808fa7242+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd1fcf86d544f20a6b87ccb6a53143af5126e05c2711a0b999a3e53a94fba9c5 +size 966389 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_5fa284430c100416769a+26ac6be0/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_5fa284430c100416769a+26ac6be0/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1f7e03e762be6e5f685a6c8f08ee8ef5c0b6f76b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_5fa284430c100416769a+26ac6be0/compile_flags.json @@ -0,0 +1 @@ +"--enable-saturate-infinity --enable-mixed-precision-accumulation --model-type transformer -O1 --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2' --internal-enable-dge-levels vector_dynamic_offsets --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_5fa284430c100416769a+26ac6be0/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_5fa284430c100416769a+26ac6be0/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_5fa284430c100416769a+26ac6be0/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_5fa284430c100416769a+26ac6be0/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..b1cd5a4dc8f28797098bd2713741098a6f573546 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_5fa284430c100416769a+26ac6be0/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1526da0d028a11f93e160ca3eecf06a77040487a673d909c3fcf534944003fbf +size 218822 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_5fa284430c100416769a+26ac6be0/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_5fa284430c100416769a+26ac6be0/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..3f212692c48d27d9cc013628f024c51cd72ee6aa --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_5fa284430c100416769a+26ac6be0/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:30fe64b5ea2bac7beda5641507cb6b75675ec910a2bd9fd0ae91ed7df07520db +size 431104 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_616ec6966b1c927020d5+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_616ec6966b1c927020d5+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_616ec6966b1c927020d5+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_616ec6966b1c927020d5+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_616ec6966b1c927020d5+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_616ec6966b1c927020d5+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_616ec6966b1c927020d5+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..c711f4b934c96fba89b969203e7ea147a42de1e1 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_616ec6966b1c927020d5+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:281e25e094b622c731b685f7968393993ea0e54c8f0becc759d3b1e7f5a88717 +size 780142 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_616ec6966b1c927020d5+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_616ec6966b1c927020d5+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..89f857a63e2d3560e29eea5dcf91b77430289548 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_616ec6966b1c927020d5+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d560864f727a3c6b2c1fc3b77bbbf23287074487f7b02f1f0f0b92d1993a62f +size 5889024 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_616ec6966b1c927020d5+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_616ec6966b1c927020d5+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..d3d7640250595531d3c6c934d4ebb503de820986 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_616ec6966b1c927020d5+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e511669141d590982dcd4452a375cd3525dba928cdf3bd17eed8a57cfeec82a +size 6019850 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_61794b8717d8b5a8853d+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_61794b8717d8b5a8853d+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_61794b8717d8b5a8853d+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_61794b8717d8b5a8853d+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_61794b8717d8b5a8853d+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_61794b8717d8b5a8853d+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_61794b8717d8b5a8853d+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..ec94b17abac5f40c669dd990e1d18231862b85bc --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_61794b8717d8b5a8853d+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cecb8b2e6d9a2ea5d80a7f910d95611058e25448a5765cc2bdf5e2d5a6465519 +size 7092 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_61794b8717d8b5a8853d+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_61794b8717d8b5a8853d+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..6a5bd59dbc0f17129db225dc56419682e35d3fce Binary files /dev/null and b/neuronxcc-2.17.194.0+d312836f/MODULE_61794b8717d8b5a8853d+431f5505/model.neff differ diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_619d574d0a0470964273+bfc62e4c/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_619d574d0a0470964273+bfc62e4c/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..892fab43157a43dc2f6c0bc74da09f06fb265ea9 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_619d574d0a0470964273+bfc62e4c/compile_flags.json @@ -0,0 +1 @@ +"--enable-saturate-infinity --enable-mixed-precision-accumulation --model-type transformer -O1 --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2' --internal-enable-dge-levels vector_dynamic_offsets --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_619d574d0a0470964273+bfc62e4c/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_619d574d0a0470964273+bfc62e4c/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_619d574d0a0470964273+bfc62e4c/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_619d574d0a0470964273+bfc62e4c/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..38d63d0af5bd104db901ccf5682c22f6e62c53f1 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_619d574d0a0470964273+bfc62e4c/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3cca1dca931c22a1141ea848a43f53e1bdb1eb2b3ef2265a8286775fde065bac +size 189414 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_619d574d0a0470964273+bfc62e4c/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_619d574d0a0470964273+bfc62e4c/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..a43264ba12bf9522562d46a0b75603b0f7e93e1c --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_619d574d0a0470964273+bfc62e4c/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc6269f6296ba4e8aeacd3baaf25290b7fdc2a995cdb785b4b8216483a6dad0c +size 267264 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_619d574d0a0470964273+bfc62e4c/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_619d574d0a0470964273+bfc62e4c/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..b8bf7083ee8e5fe2222b281021a9ed0635470ab1 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_619d574d0a0470964273+bfc62e4c/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ed1644dfca658675ba1161dd39310cf790869a2f7e45cabefea2c99f4875c31 +size 278562 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_634439d56d5bb67e9812+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_634439d56d5bb67e9812+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_634439d56d5bb67e9812+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_634439d56d5bb67e9812+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_634439d56d5bb67e9812+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_634439d56d5bb67e9812+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_634439d56d5bb67e9812+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..c1fb56e7082598b8366ecc1e26e3508fca8b84f7 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_634439d56d5bb67e9812+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ac7b363ad14391ea6e90f95eff1d88a5146cc28af5699413964e7fc4febe0b2 +size 55929 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_634439d56d5bb67e9812+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_634439d56d5bb67e9812+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..26c75f37364b760d300715130cda08963aed7d99 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_634439d56d5bb67e9812+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82e85e7c33a12ba1aded0285eb00c9631d44713a2e29c7e17268dcf5109dce34 +size 2366464 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_634439d56d5bb67e9812+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_634439d56d5bb67e9812+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..f4e89d7da3b61b35b25092ea0c2d9f009c71afa8 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_634439d56d5bb67e9812+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4acd67a4467ef3a0cb39e5f4ff6d7234573efbd930e665c2631b471c1a2ab93 +size 2376747 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_6557b82d7b377cd89bc7+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_6557b82d7b377cd89bc7+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_6557b82d7b377cd89bc7+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_6557b82d7b377cd89bc7+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_6557b82d7b377cd89bc7+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_6557b82d7b377cd89bc7+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_6557b82d7b377cd89bc7+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..e555d6704bc91fede87ce374ea97bd97bb06fad0 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_6557b82d7b377cd89bc7+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ec46e38cc84cd784f0920bd6358fffa01c7e67230c84573d0f3a7f89fa18554 +size 977926 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_6557b82d7b377cd89bc7+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_6557b82d7b377cd89bc7+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..6df8cd04b22da726d04f5cd129e3d53b349a9ec8 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_6557b82d7b377cd89bc7+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e091ae75e1440a5da5acf08299d17a61ef35c0cb387c8b01a715274c3c72ead7 +size 7855104 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_6557b82d7b377cd89bc7+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_6557b82d7b377cd89bc7+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..d91bfa8e488e97672e29ee599396fabc1f665dbc --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_6557b82d7b377cd89bc7+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a3588611a69dd57f77d779be59e050cac0a8d4905dc22b70918e200c885df2d +size 8028152 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_673e51ac384fab9697ae+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_673e51ac384fab9697ae+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_673e51ac384fab9697ae+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_673e51ac384fab9697ae+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_673e51ac384fab9697ae+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..5e61681ff03d9dd6c61b17c68b443c4dab10193b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_673e51ac384fab9697ae+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dbdb65ede47484276cbb461fd2f18e9e7f30d802794b8eaff72aaae4a32f58e2 +size 945923 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_685829f3c21ea0a6ae49+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_685829f3c21ea0a6ae49+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_685829f3c21ea0a6ae49+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_685829f3c21ea0a6ae49+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_685829f3c21ea0a6ae49+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_685829f3c21ea0a6ae49+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_685829f3c21ea0a6ae49+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..9c52a0a7afa891dd38b6fc093c1c2a1663f11043 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_685829f3c21ea0a6ae49+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b96feb0f745237e193de0694a7eca8dc685a11603924cf3b69ee7a465a8f50f +size 1495411 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_685829f3c21ea0a6ae49+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_685829f3c21ea0a6ae49+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..0615b30dd59e52b599b61aeb65b535cb9b44922c --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_685829f3c21ea0a6ae49+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5ab24e6f5a4812d92272a37662fbacd5a374c1722bc58905de5eeee45824529 +size 1475584 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_685829f3c21ea0a6ae49+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_685829f3c21ea0a6ae49+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..62ce8cef5d6702ba5db12437a63a482a882b8c59 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_685829f3c21ea0a6ae49+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7244a43a256bfa15b634a7e5e0cb6be2d06c40320eb2174d9b1f22b4104440aa +size 1620495 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_6ae2ede62d472bc6d7f6+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_6ae2ede62d472bc6d7f6+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_6ae2ede62d472bc6d7f6+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_6ae2ede62d472bc6d7f6+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_6ae2ede62d472bc6d7f6+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_6ae2ede62d472bc6d7f6+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_6ae2ede62d472bc6d7f6+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..923f63b2f5b7f20107631d517615974094181b1f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_6ae2ede62d472bc6d7f6+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:073bf71eb73762331f3ca510110f5f8ceb6bfbe1166f4daa3f2c165612223274 +size 161552 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_6ae2ede62d472bc6d7f6+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_6ae2ede62d472bc6d7f6+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..f9b87e16c3d4715a99029fa2e3bfd73c682d605c --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_6ae2ede62d472bc6d7f6+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:348e530a961c0aa85db2e222fea3388a5a28fb020a3d4cf617d71195d7226d00 +size 236544 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_6c0a877521d4ddb694a0+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_6c0a877521d4ddb694a0+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_6c0a877521d4ddb694a0+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_6c0a877521d4ddb694a0+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_6c0a877521d4ddb694a0+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..0ad26984923816bb113a1d1b615201345ca1f8c0 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_6c0a877521d4ddb694a0+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a78092af6d0ac273d51ad6b0c0472a34906a89040183360a458412798c303322 +size 939590 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_6c8ee4ae1f75b2b77a4b+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_6c8ee4ae1f75b2b77a4b+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_6c8ee4ae1f75b2b77a4b+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_6c8ee4ae1f75b2b77a4b+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_6c8ee4ae1f75b2b77a4b+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_6c8ee4ae1f75b2b77a4b+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_6c8ee4ae1f75b2b77a4b+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..d914c63b7d9f51d75362e6ce39b4618a9bfa2ffa --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_6c8ee4ae1f75b2b77a4b+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e64f711d59616053676d4c52871e26b476790b67af3c61e4dfd0b718c26b1c1e +size 1055910 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_6c8ee4ae1f75b2b77a4b+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_6c8ee4ae1f75b2b77a4b+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..2b14b1ad2f68126d829c7343ef8139f72610ba77 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_6c8ee4ae1f75b2b77a4b+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94897bd4bd3e98c6ac72adb5a18d582200d033e86bc7ae23d99f8a26aab9a4c6 +size 4393984 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_6e790833dac8835aeef7+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_6e790833dac8835aeef7+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_6e790833dac8835aeef7+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_6e790833dac8835aeef7+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_6e790833dac8835aeef7+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_6e790833dac8835aeef7+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_6e790833dac8835aeef7+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..bbc10999d6b768913b295fb7d50ff69b73b6139b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_6e790833dac8835aeef7+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1987826c9843da28c0546864697f9250aec2e029886b4f9779f3ace1623ea1b3 +size 175084 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_6e790833dac8835aeef7+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_6e790833dac8835aeef7+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..9a37d4e7a40f2db8d09ee3c2b4ea74b426211839 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_6e790833dac8835aeef7+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b6eaabf37ae9ca1d81252c8e1b47bd23c6366310ed6320212bc61af2540e490 +size 246784 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_6e790833dac8835aeef7+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_6e790833dac8835aeef7+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..e5647301ecdeb407a68b300004a1d5cc5916d977 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_6e790833dac8835aeef7+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01eaa59c69b0d14228345344b7029f2fb23c75936267182fc78767beaf4eabfd +size 254559 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_6eaa88a4e11b815b0091+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_6eaa88a4e11b815b0091+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_6eaa88a4e11b815b0091+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_6eaa88a4e11b815b0091+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_6eaa88a4e11b815b0091+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_6eaa88a4e11b815b0091+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_6eaa88a4e11b815b0091+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..ffb9bcb09c0a047f2e7a5db3b81f64cdb022773a --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_6eaa88a4e11b815b0091+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:290a333734a6f32bd4a043390c8618db9935ec367258ea9d69bca56bf8680cf8 +size 2213870 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_6eaa88a4e11b815b0091+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_6eaa88a4e11b815b0091+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..7e0327f0afbbf7870421af3fd453338f40e63b63 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_6eaa88a4e11b815b0091+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8511c68da5acc49a80573ad13fdb6a01a6e15513a526ad554ab3952df96518ee +size 6431744 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_6ec54d1cb60ff01fc2a1+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_6ec54d1cb60ff01fc2a1+165e9558/model.hlo_module.pb index 43163afb5a72384f57113ffa4393e78e522ae7ac..5f7c1d77ff486a52b8b5790e1a7c20c5fd258da4 100644 --- a/neuronxcc-2.17.194.0+d312836f/MODULE_6ec54d1cb60ff01fc2a1+165e9558/model.hlo_module.pb +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_6ec54d1cb60ff01fc2a1+165e9558/model.hlo_module.pb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4f76afee43d9ab1c90e2861e613f9def3701cbb27a695115a188726fd38284b9 +oid sha256:b710eb3bd6f9bbeddfde6e63a203f82d9c6a2cd4f7a9979a8070496650e30d7c size 643836 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_6ec54d1cb60ff01fc2a1+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_6ec54d1cb60ff01fc2a1+165e9558/model.neff index fae4507d2d0b82c4e47110fe9b46642809250a0d..1803a918678e060a061ad3c84735358f4566739d 100644 --- a/neuronxcc-2.17.194.0+d312836f/MODULE_6ec54d1cb60ff01fc2a1+165e9558/model.neff +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_6ec54d1cb60ff01fc2a1+165e9558/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:801605fa14a383139c360251c7a08bd5d158f79a40753600bdc0a66ab2a36e93 +oid sha256:7d3bfa569a65174bfa017177c83e29cf5919211aa512640890b51f0f296ac4af size 8551424 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_6ec54d1cb60ff01fc2a1+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_6ec54d1cb60ff01fc2a1+165e9558/wrapped_neff.hlo index 8c250b837c7421e8ff418f8aa5acb4bf9194e5ea..c61ee5b39b72566e3ed15ee361a426eb434a476d 100644 --- a/neuronxcc-2.17.194.0+d312836f/MODULE_6ec54d1cb60ff01fc2a1+165e9558/wrapped_neff.hlo +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_6ec54d1cb60ff01fc2a1+165e9558/wrapped_neff.hlo @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1eee74f9a28f931e230d4cec08de4730f4ce3d09c3101afba10b4c4745793293 +oid sha256:4c87f43a31fdf2e2b3188b3cdef41d6c017e4afcd0915b40ef4b5c722e9fc2a7 size 8688675 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_71d6f1724eb9ca6bbf34+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_71d6f1724eb9ca6bbf34+431f5505/model.neff index 223042e7624e79ffb06211e10f72760d0927be01..4cf0cdc8a8757397478d63b7e9412fc53ef318a7 100644 --- a/neuronxcc-2.17.194.0+d312836f/MODULE_71d6f1724eb9ca6bbf34+431f5505/model.neff +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_71d6f1724eb9ca6bbf34+431f5505/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c3791490780e64f6f24c2be525649341ca9472f93b09a314ce6441f66e96f372 +oid sha256:e1769f4ed8755d6b56ab33b3ba059b8f67b3f0dff5a098fc5ead75908c8dc238 size 3974144 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_71d753ec4c8142531990+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_71d753ec4c8142531990+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_71d753ec4c8142531990+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_71d753ec4c8142531990+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_71d753ec4c8142531990+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_71d753ec4c8142531990+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_71d753ec4c8142531990+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..3a212bf590655fc59212e91375ac58d4ac3abf8a --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_71d753ec4c8142531990+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:160a54b2690d5bb2eb17da1565a42a86d3b770ae6db9cfd6a30f9031f4e63c2b +size 131781 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_71d753ec4c8142531990+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_71d753ec4c8142531990+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..c75e70b560f3a8364c0cdb4df9b3b6ed94ca3e20 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_71d753ec4c8142531990+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e32c160596287903b26b5dbc6039064f3cd94156385b9306f2b3d39781bb867 +size 164864 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_72055a9b8fe0e4cf6282+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_72055a9b8fe0e4cf6282+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_72055a9b8fe0e4cf6282+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_72055a9b8fe0e4cf6282+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_72055a9b8fe0e4cf6282+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_72055a9b8fe0e4cf6282+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_72055a9b8fe0e4cf6282+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..d8f700803ddd88a984cb69571d5c90192c908208 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_72055a9b8fe0e4cf6282+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c3cba0e8b4abc9f24c20c096e91396a99ef159040de90ec9ddc5b1ae5408b6e +size 954337 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_72055a9b8fe0e4cf6282+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_72055a9b8fe0e4cf6282+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..16d6918e63281327e1668a7eaa2b8a531e04dca6 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_72055a9b8fe0e4cf6282+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:68b983e299412657806b2376708453d42dd4b0ce005d3af0fa793997f1f6aa7b +size 1526784 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_72055a9b8fe0e4cf6282+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_72055a9b8fe0e4cf6282+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..2242d0e6f7b42756110fed26c6da3ab802c314fe --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_72055a9b8fe0e4cf6282+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97fe7acb5eda6b5fcf5d68c7c29edb30e80dd6e75c08125164e81a1d309673f9 +size 1613754 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_720ca15364a9dc388010+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_720ca15364a9dc388010+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_720ca15364a9dc388010+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_720ca15364a9dc388010+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_720ca15364a9dc388010+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_720ca15364a9dc388010+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_720ca15364a9dc388010+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..51b825d0fa0fab60595db769d2ff737c11465013 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_720ca15364a9dc388010+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:63fcbbb5e1bb855b003523e14014bfcb217347b29ddb60d9d54381864e136269 +size 411861 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_720ca15364a9dc388010+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_720ca15364a9dc388010+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..c09e68190d03105322800b09221ebe9c2cb4a3cd --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_720ca15364a9dc388010+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c47c97d95129cdf7c42e82535cb88a6f1c6c486badfb88552fbcb3b898e7fb73 +size 1639424 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_720ca15364a9dc388010+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_720ca15364a9dc388010+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..9556375996b284e23d86409bed1009088565dcf5 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_720ca15364a9dc388010+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b29b6d08b588f5ded26a2fb9618b69b61f7532f15cdaf43bf38f6335af2ca5e0 +size 1709133 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_72281753d7f57952bfe4+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_72281753d7f57952bfe4+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_72281753d7f57952bfe4+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_72281753d7f57952bfe4+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_72281753d7f57952bfe4+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_72281753d7f57952bfe4+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_72281753d7f57952bfe4+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..cee68abee60c0300cdc0f7363612aae53fb136c5 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_72281753d7f57952bfe4+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3de47ddfe9c2dd2dd851b262ed8e72f81d7bd5088bd4c866e33eccb1235c731 +size 88686 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_72281753d7f57952bfe4+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_72281753d7f57952bfe4+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..d714cc233ea87d3373db30b3e4342fb76028267e --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_72281753d7f57952bfe4+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:670eb5f6e43393459448f206825777f42128ddb80a24d3859cee614b17f37054 +size 236544 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_72281753d7f57952bfe4+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_72281753d7f57952bfe4+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..2d298b31a325d9db281b468fc86d636891ed5277 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_72281753d7f57952bfe4+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e79163920afe05ec7bb4285c4a6cc84559fbbf3b4fde58c15eccb5c8001b019c +size 247153 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_79528694e33b295d9797+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_79528694e33b295d9797+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_79528694e33b295d9797+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_79528694e33b295d9797+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_79528694e33b295d9797+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_79528694e33b295d9797+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_79528694e33b295d9797+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..3173036d7027fb055ff3c3785cd8660e8b807265 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_79528694e33b295d9797+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c256f0a3816ee65563b4989bd810188f6018639ca7204d5a69b1f774a42f755 +size 170118 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_79528694e33b295d9797+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_79528694e33b295d9797+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..c47a5f16e5afd26963785a9c0e50169d96f5f4d2 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_79528694e33b295d9797+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95cd85b860ecb2d73cd3e2f3fe2efd949dbde68b26917ff0a565f470bff87b56 +size 216064 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_79528694e33b295d9797+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_79528694e33b295d9797+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..fe22d2751a4bd033cde52b4206e8683ee30fd096 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_79528694e33b295d9797+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13e767e8a7e4f2a5a55166066c6643b52b773ec28bed262fd8a56988add3cfc2 +size 223770 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_7a301946be6cceeea745+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_7a301946be6cceeea745+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_7a301946be6cceeea745+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_7a301946be6cceeea745+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_7a301946be6cceeea745+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_7a301946be6cceeea745+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_7a301946be6cceeea745+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..879520f19ff6a13d521b6d6a364af4d38a0d43de --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_7a301946be6cceeea745+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1aad9b2b230b8bd0f6780dfae3d281a4911b9b1e19dde64baedcefaaf0093ce6 +size 1522160 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_7a301946be6cceeea745+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_7a301946be6cceeea745+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..aeb3e612d8b353e0eb26e4086d43403b111db015 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_7a301946be6cceeea745+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab4b3234c3fe1c49da3d0477204b1c2ed449f5cfd7708effa75f8d1e4e8644bb +size 6226944 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_7af318ed51d57f96cca6+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_7af318ed51d57f96cca6+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_7af318ed51d57f96cca6+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_7af318ed51d57f96cca6+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_7af318ed51d57f96cca6+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_7af318ed51d57f96cca6+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_7af318ed51d57f96cca6+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..6cf59b185ef0600d8bead608268c2cace1a2f154 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_7af318ed51d57f96cca6+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97758a2b3a85873d3978bdcc4ad957312312aadec91e4e6ed596de7aa41731fc +size 47478 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_7af318ed51d57f96cca6+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_7af318ed51d57f96cca6+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..a0b4cfaf9bedce6c08095241f5d5f72d2b4c6fc7 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_7af318ed51d57f96cca6+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea7afc2cf7419cc0ec0088f52f3ca452408f18ce12d91004649ec710e00ed064 +size 154624 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_7b5cd8868740b1a8ff8e+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_7b5cd8868740b1a8ff8e+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_7b5cd8868740b1a8ff8e+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_7b5cd8868740b1a8ff8e+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_7b5cd8868740b1a8ff8e+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_7b5cd8868740b1a8ff8e+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_7b5cd8868740b1a8ff8e+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..5be2400b66b8d7e2b992659e83df2930b2a9e9cf --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_7b5cd8868740b1a8ff8e+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8215827868d03b945307eb003b3b70a2705bbc6c53e01edcb9d301eb8ffade42 +size 115571 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_7b5cd8868740b1a8ff8e+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_7b5cd8868740b1a8ff8e+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..aa50505112770fec77fef7876472cfe40a011eb7 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_7b5cd8868740b1a8ff8e+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cba5802db6482a705bcec4cd4e007ae29c5a52a3e5b72b0f8abfba6dfc9ee4cd +size 175104 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_7b5cd8868740b1a8ff8e+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_7b5cd8868740b1a8ff8e+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..078e1d90e6d5e32ab799a5b56b799183edf695f5 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_7b5cd8868740b1a8ff8e+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ff3118938311de64dc3042f8cc8e41cdfcb50e6122d60325a64500b8f3a988e +size 182701 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_7c7c536a078a2c0f91a7+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_7c7c536a078a2c0f91a7+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_7c7c536a078a2c0f91a7+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_7c7c536a078a2c0f91a7+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_7c7c536a078a2c0f91a7+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_7c7c536a078a2c0f91a7+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_7c7c536a078a2c0f91a7+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..2149c8b65934ee41538f0ff8bb1de51761e3f668 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_7c7c536a078a2c0f91a7+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec9dcc2d291a29395a7d3b0ecd3c329ac16dec5a55d463e1547fdacbbde664e4 +size 10010 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_7c7c536a078a2c0f91a7+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_7c7c536a078a2c0f91a7+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..7dee00ba4c074cebc719e028b81cd5a2fb31505b Binary files /dev/null and b/neuronxcc-2.17.194.0+d312836f/MODULE_7c7c536a078a2c0f91a7+431f5505/model.neff differ diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_7d17be30edb500bd0e79+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_7d17be30edb500bd0e79+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_7d17be30edb500bd0e79+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_7d17be30edb500bd0e79+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_7d17be30edb500bd0e79+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_7d17be30edb500bd0e79+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_7d17be30edb500bd0e79+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..f42e36fca152cd98db08f3a0b190d6ca2b1b636a --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_7d17be30edb500bd0e79+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01367b6b5a410c495ff9d3fa49e16ba09bb17b22715607ce41e30dcaf108db9b +size 87577 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_7d17be30edb500bd0e79+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_7d17be30edb500bd0e79+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..255fb28b22bbf89aea66e2d6e6f0e36415d7a396 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_7d17be30edb500bd0e79+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55614536bfcab0b8aa8783521e0ebffff16849c91730f0a25f0137121fa6b906 +size 246784 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_7d97d3b9bee047397c87+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_7d97d3b9bee047397c87+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_7d97d3b9bee047397c87+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_7d97d3b9bee047397c87+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_7d97d3b9bee047397c87+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_7d97d3b9bee047397c87+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_7d97d3b9bee047397c87+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..aa59f323cb2a944c811131f19da4b3762461443e --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_7d97d3b9bee047397c87+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cab022b86f9fcc17f6f5257ee73c050070878f0ff17329d5f8568376524b746b +size 2547878 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_7d97d3b9bee047397c87+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_7d97d3b9bee047397c87+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..8650c92905a0f90d1997e449aab5c0da4f1e41a8 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_7d97d3b9bee047397c87+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c030b0cc1aa42b6fcce4a6b7d27ff80fca41e934c4bcb5d1711edcb917a8eb0 +size 6974464 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_7d97d3b9bee047397c87+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_7d97d3b9bee047397c87+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..cc6ab0841afe21825646053248ae83c90703e8c8 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_7d97d3b9bee047397c87+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:78ecc5b043b769824c2ba802e802cca42019fec94cfc027d6923eeaa7aebf5a3 +size 7147636 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_7ef58b1e1f63c382674a+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_7ef58b1e1f63c382674a+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_7ef58b1e1f63c382674a+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_7ef58b1e1f63c382674a+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_7ef58b1e1f63c382674a+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_7ef58b1e1f63c382674a+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_7ef58b1e1f63c382674a+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..68c9081e2c9fd41b4c228031d5ebbfe2bcb043d7 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_7ef58b1e1f63c382674a+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:98e1951490d49fb75e778d7f8c0e52948394c03460591978b4cb220979b5031f +size 1493793 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_7ef58b1e1f63c382674a+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_7ef58b1e1f63c382674a+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..b2dd9a889b24230e2c3da6b02a7993d6f57c0744 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_7ef58b1e1f63c382674a+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6f04496b3f2a63d65ead03f246aa068f4d05759f4860ffdde14f8d09a18fd1e +size 37458944 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_801af15b5001c885c608+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_801af15b5001c885c608+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_801af15b5001c885c608+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_801af15b5001c885c608+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_801af15b5001c885c608+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_801af15b5001c885c608+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_801af15b5001c885c608+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..518f11935a993586e9078a1a25a4ce79406a6cdc --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_801af15b5001c885c608+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3b1de1a3e01bdc313f7f3dacafdb718658a6f997794cbc7be8663052a9d3788 +size 2555184 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_801af15b5001c885c608+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_801af15b5001c885c608+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..c61c5c1ce77bc3832a263659e22d35cea869af45 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_801af15b5001c885c608+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bcc62499576d70a546268e447fb45748c51771d04ccef20a6dadf097c1ea8050 +size 7046144 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_801af15b5001c885c608+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_801af15b5001c885c608+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..a52770fd57f8939da0c904c10422429719331057 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_801af15b5001c885c608+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d938122f210a7dd80a3749a9db583c9be7d15f8403f566e588d53bd4a0139cab +size 7219431 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_804dc8509b11b248fd01+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_804dc8509b11b248fd01+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_804dc8509b11b248fd01+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_804dc8509b11b248fd01+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_804dc8509b11b248fd01+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_804dc8509b11b248fd01+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_804dc8509b11b248fd01+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..d0b8fb4b3ce39ae848a26ded374fb5f358fa2b53 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_804dc8509b11b248fd01+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69fab159e3098155acc2dd737cb7adce6353a1c7bf3bca3f51cff88057fef535 +size 80261 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_804dc8509b11b248fd01+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_804dc8509b11b248fd01+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..87d7a0a5bb0c80f200f3850ee8b3a3dcd4fe2c90 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_804dc8509b11b248fd01+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3154d6105d0857d02a151e1d1a162d16f36348e806cae1ea0e15e14a34febcda +size 205824 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_8118fc175fd139050980+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_8118fc175fd139050980+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_8118fc175fd139050980+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_8118fc175fd139050980+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_8118fc175fd139050980+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_8118fc175fd139050980+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_8118fc175fd139050980+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..b289becf98c29c0a571f6bc1f15778738170ce13 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_8118fc175fd139050980+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:878bed2983b51b805414d118157372a96d926a5b9ea4bd8eb85c065ee0c1d8e5 +size 1268268 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_8118fc175fd139050980+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_8118fc175fd139050980+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..bdcd3b7d06894abe5e123dc970e5a6f0c0992dca --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_8118fc175fd139050980+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7143807111fce915902dbd79bd0c74ce10033a6cc83143eaa2e3baf996eac7e2 +size 6401024 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_8454944255b12a861559+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_8454944255b12a861559+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_8454944255b12a861559+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_8454944255b12a861559+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_8454944255b12a861559+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..c46d714ccce964090191a0b0346157aa2cb567c7 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_8454944255b12a861559+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90678240da35ab496780bc71748aa7fd8b09f4bb2e2d37b927a085847851a00e +size 523648 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_857bfa5db7b34c9a9421+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_857bfa5db7b34c9a9421+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_857bfa5db7b34c9a9421+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_857bfa5db7b34c9a9421+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_857bfa5db7b34c9a9421+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_857bfa5db7b34c9a9421+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_857bfa5db7b34c9a9421+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..3ca90971c7cb3ce9400fca9bcac1cb70f8bc0979 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_857bfa5db7b34c9a9421+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ef04c56dfa88ba3744238690dd27414699ab135700c99f5f366574da1704460 +size 88686 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_857bfa5db7b34c9a9421+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_857bfa5db7b34c9a9421+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..07fa907695665cdeecb55ac97dbf2ce42d5b2634 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_857bfa5db7b34c9a9421+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b96ecdcfc5ff6f5c8bb2c6c748d4dc4fea4c4013f221ee4cbce75c1367605cc +size 236544 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_857bfa5db7b34c9a9421+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_857bfa5db7b34c9a9421+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..3e7bd1f1570a3261c3e5a4f83bfbb316c9dd5358 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_857bfa5db7b34c9a9421+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a96e164632fabab7abd4e40b82d9ffddbb90bbb3261ddde437c1e7bebddcb01 +size 247153 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_86c70bcc620d1fdc9c0e+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_86c70bcc620d1fdc9c0e+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_86c70bcc620d1fdc9c0e+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_86c70bcc620d1fdc9c0e+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_86c70bcc620d1fdc9c0e+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_86c70bcc620d1fdc9c0e+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_86c70bcc620d1fdc9c0e+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..f7859f368f2058def82c3962b734c4d252223c7e --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_86c70bcc620d1fdc9c0e+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ebb5699f2623ab6ca092335dd914f1b12c6e22ec62337a8202d8dbdb79fd4078 +size 197724 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_86c70bcc620d1fdc9c0e+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_86c70bcc620d1fdc9c0e+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..89bd8462a40c976e769d73702f615ec0bbf6998a --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_86c70bcc620d1fdc9c0e+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49054c8c97e939c851e44dea55e571f5ba6a5060888a4ff8183b161b9eeb0220 +size 246784 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_86c70bcc620d1fdc9c0e+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_86c70bcc620d1fdc9c0e+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..a2115495a10b332ec3380df82ef4b56570029265 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_86c70bcc620d1fdc9c0e+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5abdab3e84c6531e5571396e01b58b3bfb35fb7dec9d21870a04410a3f7613ac +size 257393 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_89848ebfebd7d11a23ab+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_89848ebfebd7d11a23ab+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_89848ebfebd7d11a23ab+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_89848ebfebd7d11a23ab+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_89848ebfebd7d11a23ab+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..afa3655aebf9a9522cac8a1712b8b21d28b12107 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_89848ebfebd7d11a23ab+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b139cfa84e4179f620d157a2f5fd2a271bb7453b0e7113153fe155b50ad1aed +size 383578 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_89c0bf163fa9c488f5fd+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_89c0bf163fa9c488f5fd+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_89c0bf163fa9c488f5fd+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_89c0bf163fa9c488f5fd+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_89c0bf163fa9c488f5fd+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_89c0bf163fa9c488f5fd+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_89c0bf163fa9c488f5fd+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..0c75a1f244ed476853d00d7abb580e574e79e908 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_89c0bf163fa9c488f5fd+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce2b4b0d783f9c34d6a5059e6bd44a0dce7a103520fdb3a1d8ee19218af6c5e4 +size 1325710 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_89c0bf163fa9c488f5fd+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_89c0bf163fa9c488f5fd+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..e13fea94489965e8bfb4fa60079e8feedfe7252c --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_89c0bf163fa9c488f5fd+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:998a96b2c80bb5096da6855fa8ea1aa02cec6b2e21d36b2b1ba47c73b44492f0 +size 19969024 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_8a721d51fb81f729b755+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_8a721d51fb81f729b755+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_8a721d51fb81f729b755+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_8a721d51fb81f729b755+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_8a721d51fb81f729b755+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_8a721d51fb81f729b755+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_8a721d51fb81f729b755+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..a8543c3726035644a9abaa8f663d4ebf7d9aa85d --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_8a721d51fb81f729b755+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04ff809696f5c1785e8968f71f7fd539fb4c543dbf2db678d3695e47008715b8 +size 114413 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_8a721d51fb81f729b755+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_8a721d51fb81f729b755+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..5bc95a4d73b1e350690335cd902556c7fdb52c4c --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_8a721d51fb81f729b755+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c9cda3ea9fa1b5d0e7731ed35cee573bbdc6a3feb1a6c961073d021375fe1744 +size 154624 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_8c59a264b46ce4b30223+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_8c59a264b46ce4b30223+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_8c59a264b46ce4b30223+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_8c59a264b46ce4b30223+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_8c59a264b46ce4b30223+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..185a89df3e63dc9e467cc590fd49aeb666acb26e --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_8c59a264b46ce4b30223+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32207d864061997b733a99dcf1382daee8a3183eda3ba816c4b7dff82bab9fc2 +size 939590 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_8d4a4e991c9ba3c051b5+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_8d4a4e991c9ba3c051b5+165e9558/model.hlo_module.pb index 2d39455bc77ed3608a56b10208b69ba149becac9..62f1251e44ed207300b184f0da08583c9061a879 100644 --- a/neuronxcc-2.17.194.0+d312836f/MODULE_8d4a4e991c9ba3c051b5+165e9558/model.hlo_module.pb +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_8d4a4e991c9ba3c051b5+165e9558/model.hlo_module.pb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:700b741ab6d230975421a5f5586b1eff563e80c3cfe196f38128861d87b2daa3 +oid sha256:2947ac290d2cd5ee883a3ac6a70100840d60be5d3023a198925ee77c9659a5c9 size 647962 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_8d4a4e991c9ba3c051b5+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_8d4a4e991c9ba3c051b5+165e9558/model.neff index 18ee44ef0e07248b2f2fb7959f200751ed20a90b..cce3c61cbe660b3328dd455ce1f9d75021f301cb 100644 --- a/neuronxcc-2.17.194.0+d312836f/MODULE_8d4a4e991c9ba3c051b5+165e9558/model.neff +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_8d4a4e991c9ba3c051b5+165e9558/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6a9ebcb7f06131fe03ca82fe43904b87d9d959c9692ccb29e8b7b84dc619e99a +oid sha256:7ede75e853a41a2fcd7eb69b4808a6532aca457b22ac06d93cdc82d6a6ef02d7 size 10650624 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_8d4a4e991c9ba3c051b5+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_8d4a4e991c9ba3c051b5+165e9558/wrapped_neff.hlo index f032167152d0f1f8010a3a43dfcb4622fa0a6133..4816f328d5168d460334cc02797dde2635a268dd 100644 --- a/neuronxcc-2.17.194.0+d312836f/MODULE_8d4a4e991c9ba3c051b5+165e9558/wrapped_neff.hlo +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_8d4a4e991c9ba3c051b5+165e9558/wrapped_neff.hlo @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d8b2faed5c11816517fb12018461300fbb1b65f248137575e8476155429f5009 +oid sha256:d7e72eee1aada03be54345608adf36a2a14dd4a09e4ca74b0b4e1f43c48fd340 size 10788002 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_8fa8f6042a6e8b712a5e+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_8fa8f6042a6e8b712a5e+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_8fa8f6042a6e8b712a5e+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_8fa8f6042a6e8b712a5e+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_8fa8f6042a6e8b712a5e+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_8fa8f6042a6e8b712a5e+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_8fa8f6042a6e8b712a5e+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..98b91c77e3f923670f46571d8ef0c24b3cdcf650 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_8fa8f6042a6e8b712a5e+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93f80fc9d34da45c12058a4a2e19aeb07925dd0308836234b22745b6f8c75975 +size 112683 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_8fa8f6042a6e8b712a5e+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_8fa8f6042a6e8b712a5e+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..7ca2c69a7dd748d7bf5c1b9a7744ae36f4f5b47e --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_8fa8f6042a6e8b712a5e+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5cc96305ae41b8647b53b4826fedc3f7f9d4cfd755e88955e88fb18ed777f830 +size 154624 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_93df084c0ff46a4171f7+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_93df084c0ff46a4171f7+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_93df084c0ff46a4171f7+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_93df084c0ff46a4171f7+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_93df084c0ff46a4171f7+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..83893be71d9728f4d1552b7d7238c63238e4e5db --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_93df084c0ff46a4171f7+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:764b792a19734d86d9955a6731822b4a916ffd9915b9c9d78247c6c0bd628d9f +size 940070 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_94ba84f59b341a98c066+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_94ba84f59b341a98c066+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_94ba84f59b341a98c066+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_94ba84f59b341a98c066+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_94ba84f59b341a98c066+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_94ba84f59b341a98c066+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_94ba84f59b341a98c066+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..d71f3b29f54c7c04856a9c56a09ce9e0b18d7373 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_94ba84f59b341a98c066+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ba65d5335646539f3aa28ede06c54054cea89d33efa6900bb95e22d5e9aeb0f +size 53519 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_94ba84f59b341a98c066+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_94ba84f59b341a98c066+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..61226b4a0a42b21eb979000d4f07fd00ffa9323b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_94ba84f59b341a98c066+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39b3c90fdb13ea980144897c0236d2d8432d49c31695db60316c9665c66bcd39 +size 164864 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_978a1d4ca28e11a7fe66+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_978a1d4ca28e11a7fe66+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_978a1d4ca28e11a7fe66+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_978a1d4ca28e11a7fe66+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_978a1d4ca28e11a7fe66+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..5118a49c780d68596c38bd009a069a8a49d27d25 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_978a1d4ca28e11a7fe66+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b10c19e280101ecdeb0d8603b0dea7d32697a82850026bd9f36a19843b7acb9 +size 523648 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_978d77f879e10731219e+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_978d77f879e10731219e+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_978d77f879e10731219e+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_978d77f879e10731219e+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_978d77f879e10731219e+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_978d77f879e10731219e+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_978d77f879e10731219e+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..f6106f721a29954ac9072ffb0652369d4a5fde19 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_978d77f879e10731219e+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90575d8ac097c7b3d12dd9134f57bf9eb110b7fd58639e5e54ffdb0b7cd12b06 +size 115493 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_978d77f879e10731219e+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_978d77f879e10731219e+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..e436dddfefd0fe4f58cccfc47bc57e87a282b2fc --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_978d77f879e10731219e+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:72abeacb6f080a03ae68139dcfc5cfe2e1df870552ef49f7adbdbebb55d392b9 +size 175104 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_978d77f879e10731219e+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_978d77f879e10731219e+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..268320b2197d4fc0c6ab5cea988bb65ecc616bed --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_978d77f879e10731219e+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0255462fffc4f5ce8264ef23982169dd0ab283292314c50b8af5989884c0123 +size 182701 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_9840ca94af4106910064+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_9840ca94af4106910064+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_9840ca94af4106910064+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_9840ca94af4106910064+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_9840ca94af4106910064+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_9840ca94af4106910064+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_9840ca94af4106910064+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..bc6fdd05ac93c21973f441c388a4daf857f8e8ed --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_9840ca94af4106910064+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f1919885a02be2064c711ae65035e0c7f1592e09f4dc17fe27ee5081ac6ed73 +size 81115 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_9840ca94af4106910064+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_9840ca94af4106910064+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..38f2981864713c370d63d29831b928f12960bb98 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_9840ca94af4106910064+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90cb39827c570734c13db14880848dd9218398c05064a113856d7c86f3de5f30 +size 226304 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_9a042dec7c8f1bb9bb73+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_9a042dec7c8f1bb9bb73+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_9a042dec7c8f1bb9bb73+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_9a042dec7c8f1bb9bb73+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_9a042dec7c8f1bb9bb73+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_9a042dec7c8f1bb9bb73+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_9a042dec7c8f1bb9bb73+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..47c37ed63a214364892fe16dbb18a26acb5c9542 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_9a042dec7c8f1bb9bb73+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3bd2354b87e72d380a363cd1cfc99ba880da6cb454791b59fd1278345941946 +size 7011 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_9a042dec7c8f1bb9bb73+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_9a042dec7c8f1bb9bb73+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..2a70a7b4fb9359b170c8f4380fe5d8cb4490ea48 Binary files /dev/null and b/neuronxcc-2.17.194.0+d312836f/MODULE_9a042dec7c8f1bb9bb73+431f5505/model.neff differ diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_9c37025554da49ae454e+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_9c37025554da49ae454e+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_9c37025554da49ae454e+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_9c37025554da49ae454e+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_9c37025554da49ae454e+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_9c37025554da49ae454e+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_9c37025554da49ae454e+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..170e46b40ac0b56c809e91059c3be6e81be09164 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_9c37025554da49ae454e+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c0116aa51c873801bce62eab5b30eedbeb02a12112baaecad8ff8960d50e370f +size 10013 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_9c37025554da49ae454e+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_9c37025554da49ae454e+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..02f2a5b21b9a8d34f2f484c9e3b6eadd815239f4 Binary files /dev/null and b/neuronxcc-2.17.194.0+d312836f/MODULE_9c37025554da49ae454e+431f5505/model.neff differ diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_9c3b0ea1f43b9125df5e+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_9c3b0ea1f43b9125df5e+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_9c3b0ea1f43b9125df5e+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_9c3b0ea1f43b9125df5e+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_9c3b0ea1f43b9125df5e+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_9c3b0ea1f43b9125df5e+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_9c3b0ea1f43b9125df5e+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..60644dfacdbe6d926a447c1d87fbeff565cf3854 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_9c3b0ea1f43b9125df5e+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f0ad95e983da77817794f6ca8727a7778343499256f4a2143a39c99c22c4d0f +size 589112 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_9c3b0ea1f43b9125df5e+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_9c3b0ea1f43b9125df5e+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..4a062c6caf83d61d00ccfdfe9d9838d5ec90710e --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_9c3b0ea1f43b9125df5e+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa5a86dead31163c2d51bd9b3206d59eb8f57c279a9728b31b514c800d4a5485 +size 1516544 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_9c3b0ea1f43b9125df5e+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_9c3b0ea1f43b9125df5e+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..b299a2ea8c17f6087a354568756ff706b9d5a9fd --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_9c3b0ea1f43b9125df5e+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:898a0ca136c13f16c9f6cab2a9620eeb4696c2898125092fd458af2557ad1d3b +size 1674868 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_9ceca1f24a9c467682e4+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_9ceca1f24a9c467682e4+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_9ceca1f24a9c467682e4+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_9ceca1f24a9c467682e4+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_9ceca1f24a9c467682e4+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_9ceca1f24a9c467682e4+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_9ceca1f24a9c467682e4+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..1de18ce65c36f490eab957a2c042a0c7b65cca48 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_9ceca1f24a9c467682e4+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9e8dc33d0a2e7cca61a080629c476153085eb635298213f83b262295ede1528 +size 170118 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_9ceca1f24a9c467682e4+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_9ceca1f24a9c467682e4+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..db6327536c2b4126e0265d56b103db542fccbcc5 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_9ceca1f24a9c467682e4+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f9d6c106ee9d7efd5ca7bdc125c0da4f20a885d28436589646a87e7069169f0 +size 216064 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_9ceca1f24a9c467682e4+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_9ceca1f24a9c467682e4+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..0cf2ada79836d737763456cd7e17d8b7e8301a82 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_9ceca1f24a9c467682e4+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0dd18dfae3e4b0dd6d82b4e16899dc55080b3a8037f39ceb6ff8c9997b6f0714 +size 223770 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_9e250fde8dcb316efa79+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_9e250fde8dcb316efa79+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_9e250fde8dcb316efa79+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_9e250fde8dcb316efa79+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_9e250fde8dcb316efa79+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_9e250fde8dcb316efa79+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_9e250fde8dcb316efa79+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..fe80d5b7baaa3accb5b439ca0b3b2634e9f287c5 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_9e250fde8dcb316efa79+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ee0837f51cca42a03aff9285ab1d5368e28472e631809ec21ce3f777e8cee16 +size 164777 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_9e250fde8dcb316efa79+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_9e250fde8dcb316efa79+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..a766d9b3537809126f20744e6c20853eca18fcc9 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_9e250fde8dcb316efa79+431f5505/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4b8c4603124eb044be652a39f22a41081ab3fecaeb2d06d068894d993dfe4b9 +size 1393664 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a113aac4f960a5da051f+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_a113aac4f960a5da051f+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a113aac4f960a5da051f+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a113aac4f960a5da051f+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_a113aac4f960a5da051f+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a113aac4f960a5da051f+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_a113aac4f960a5da051f+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..f4b3c00814a76cd2347c920c2aef5534c09387d1 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a113aac4f960a5da051f+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f72b9c5a8d3e71a562f16bf240e58376cc17f16766824a77ced0e1fba440606 +size 161606 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a113aac4f960a5da051f+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_a113aac4f960a5da051f+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..ecb5b12fdbfd8c0a572647d52b245a509234ac36 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a113aac4f960a5da051f+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b829001da76c1e08c005dcf50d602be56661ccf2466909a95033c960d9baa22d +size 236544 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a196e11ffd0a26eca3b0+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_a196e11ffd0a26eca3b0+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a196e11ffd0a26eca3b0+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a196e11ffd0a26eca3b0+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_a196e11ffd0a26eca3b0+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a196e11ffd0a26eca3b0+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_a196e11ffd0a26eca3b0+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..d66a8f9dcb1ec3f00ef3998d1694bc6353f4537f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a196e11ffd0a26eca3b0+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e7ab3e9082c00c6742ea4543b4464c593dd80bfc77d8961bc3da966750e76f6 +size 613271 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a196e11ffd0a26eca3b0+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_a196e11ffd0a26eca3b0+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..3a472c9b68abf0c292c123739184e8a3a7b224e4 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a196e11ffd0a26eca3b0+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb1c412750939c7d016b6554000e73d83c2b0878223176073dff6a2a43de7b33 +size 1117184 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a196e11ffd0a26eca3b0+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_a196e11ffd0a26eca3b0+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..b38f412fc0143206c587bca463c49843fe95d53a --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a196e11ffd0a26eca3b0+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89db4712c3825ebc994e14e872bb55d79143d00d2fec9e2ab5a2c80920d8767b +size 1262210 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a24e3ed896dae389d4f2+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_a24e3ed896dae389d4f2+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a24e3ed896dae389d4f2+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a24e3ed896dae389d4f2+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_a24e3ed896dae389d4f2+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a24e3ed896dae389d4f2+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_a24e3ed896dae389d4f2+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..1e653f5abd669a492651c4f0f37d8a1f73e1b34d --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a24e3ed896dae389d4f2+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:65f5fcd3424c4f630c535e174ec02e8aed0d785d1324f9cb78490d2d063b9de6 +size 50076 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a24e3ed896dae389d4f2+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_a24e3ed896dae389d4f2+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..4539901218d38e30c26dd7992d202a070bd0bd32 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a24e3ed896dae389d4f2+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e3cc16666ed2e0b5ad3b24c55223a34599bfcb1693daeecffc5045480f65baf +size 144384 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a4b551d188be0a834e52+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_a4b551d188be0a834e52+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a4b551d188be0a834e52+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a4b551d188be0a834e52+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_a4b551d188be0a834e52+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a4b551d188be0a834e52+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_a4b551d188be0a834e52+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..6a513b3df0afb58305c62518250a8c50ce51a5af --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a4b551d188be0a834e52+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d9f964194a0b98e25fa5175fd91c4ddcc262412711b04f4e54c1ac4de3426c6 +size 78990 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a4b551d188be0a834e52+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_a4b551d188be0a834e52+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..d1556ccc62eb795958fdb338c83360c2305bbe67 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a4b551d188be0a834e52+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c6bb1ac5ca49c4f28031d7a0e02f390b5b1186ecb7311b8dd3d91bd642ed6e2 +size 216064 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a4b551d188be0a834e52+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_a4b551d188be0a834e52+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..7c35ef18c2a3b5b814a20fc883b2cea38cf33ed6 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a4b551d188be0a834e52+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:226fc6d78d450780de5be7a3089e9b78572d7506fef26446114df91db1f72579 +size 223770 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a4ec5c639ea6c2922365+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_a4ec5c639ea6c2922365+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a4ec5c639ea6c2922365+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a4ec5c639ea6c2922365+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_a4ec5c639ea6c2922365+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a4ec5c639ea6c2922365+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_a4ec5c639ea6c2922365+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..edb26c200a329d769e69665ebcdf04ceefc7179f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a4ec5c639ea6c2922365+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f210b752780a23e9743882446a5bd9f555bd0d3acd713f29fd06b95a12fcb91e +size 175310 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a4ec5c639ea6c2922365+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_a4ec5c639ea6c2922365+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..633b224dfb5a659cce20778239826911e1fbc6fb --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a4ec5c639ea6c2922365+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58ab02def7be45d4f1f3c72e2411b2e5f00c2d5be315342a6c055b6ea6479a13 +size 472064 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a4ec5c639ea6c2922365+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_a4ec5c639ea6c2922365+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..653b99cceaff36a01ff5963f5ec02f529119fe48 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a4ec5c639ea6c2922365+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d106cc6e7f4e1503e56906e88f89a5c8859e45cd1ca07ffbc3ed85c2c2495ab8 +size 479903 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a57600b672ea2ee05d8c+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_a57600b672ea2ee05d8c+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a57600b672ea2ee05d8c+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a57600b672ea2ee05d8c+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_a57600b672ea2ee05d8c+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a57600b672ea2ee05d8c+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_a57600b672ea2ee05d8c+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..dcf1fc29f46bb5ec5661836e62e780100263b474 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a57600b672ea2ee05d8c+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:522ff14dca729d409697b23ecd4ce266d075658cc64ae7ad985da05ca341c1a3 +size 698790 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a57600b672ea2ee05d8c+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_a57600b672ea2ee05d8c+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..ff300349243af0cb48d08039d12bb8e60eed8349 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a57600b672ea2ee05d8c+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7fe5283b8703a112b10f227326b933b63463efc0287cef968604c2f9debbcc5 +size 5817344 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a57600b672ea2ee05d8c+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_a57600b672ea2ee05d8c+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..888a3f82e24b9444f9101f4894f974b7fe643c74 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a57600b672ea2ee05d8c+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:47e19e9bf0e5f92302db3c37e2693734934d72608a6093bae7c4c20dfffcbce9 +size 5947452 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a74277ce7a6cac60e3fd+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_a74277ce7a6cac60e3fd+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a74277ce7a6cac60e3fd+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a74277ce7a6cac60e3fd+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_a74277ce7a6cac60e3fd+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a74277ce7a6cac60e3fd+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_a74277ce7a6cac60e3fd+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..ad90a3e37251d3051e47f955932b35bdb1fae4d8 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a74277ce7a6cac60e3fd+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9bdfa586e302eb86f47edba9519bc8c290973b14f4be2b802e0cfe37713c7dd +size 7099 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a74277ce7a6cac60e3fd+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_a74277ce7a6cac60e3fd+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..d2b63d511dcb6ffb3bd4000599b64d01e113c217 Binary files /dev/null and b/neuronxcc-2.17.194.0+d312836f/MODULE_a74277ce7a6cac60e3fd+431f5505/model.neff differ diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a75c2e49085ba31c8fd6+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_a75c2e49085ba31c8fd6+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a75c2e49085ba31c8fd6+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a75c2e49085ba31c8fd6+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_a75c2e49085ba31c8fd6+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a75c2e49085ba31c8fd6+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_a75c2e49085ba31c8fd6+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..a783dc225009434cfa17af43a37207cf73886937 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a75c2e49085ba31c8fd6+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8a9dcc9793d10402fb1f07fb45e8939406207c09b3c77e7632abde04ea36135 +size 143137 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a75c2e49085ba31c8fd6+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_a75c2e49085ba31c8fd6+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..744afa14c3770755ef1955097b71b08977f96f37 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a75c2e49085ba31c8fd6+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24a0e784a103064a75a86db0ef3696a69ea0f819df5d54e3f195bedad2b762d5 +size 185344 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a75c2e49085ba31c8fd6+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_a75c2e49085ba31c8fd6+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..5d2b7be4ff749a841702e062b440ef9f203221e0 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a75c2e49085ba31c8fd6+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd1d4e9eb58a8fdd2b1387f257497c8beb4784d146394bed37a6f0602d4b8eb7 +size 195507 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a87a55960cced183cfcf+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_a87a55960cced183cfcf+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a87a55960cced183cfcf+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a87a55960cced183cfcf+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_a87a55960cced183cfcf+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a87a55960cced183cfcf+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_a87a55960cced183cfcf+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..0f25626dfdfc0aa5b02dc9adac5b8ac47cb6f03a --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a87a55960cced183cfcf+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ede10668c5e1394e8759777987f8a1cd4af45ab0df63a548f344ee899a4ee1ae +size 1726417 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a87a55960cced183cfcf+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_a87a55960cced183cfcf+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..5d22d7da8494ba89d2d4f81e3bff216075b123b6 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a87a55960cced183cfcf+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b3ace989ed25ec58a0ba90c5e87f9fb2f6ab310f76c628966da756d6d240633 +size 3554304 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a87a55960cced183cfcf+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_a87a55960cced183cfcf+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..a00a1cc9b67b5b58132ae7e7fb9286cf16614518 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a87a55960cced183cfcf+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a2cc13201517e9f3c319d32ca46b22d67fb325dd447abb94fa62f4f3bfaa6ca +size 3684286 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_aa05d84ecf9b27f81079+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_aa05d84ecf9b27f81079+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_aa05d84ecf9b27f81079+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_aa05d84ecf9b27f81079+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_aa05d84ecf9b27f81079+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_aa05d84ecf9b27f81079+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_aa05d84ecf9b27f81079+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..ff22fe07da4399eff30c425405efbb886972f137 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_aa05d84ecf9b27f81079+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4be00605073f4db3fd213a8a27bbcde200b67212af9455929bd71e8176f725e1 +size 161552 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_aa05d84ecf9b27f81079+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_aa05d84ecf9b27f81079+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..717dd9347210c2f859bc66408081f107141393da --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_aa05d84ecf9b27f81079+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ce4dfc5bc60ba9f8a65d2f3835d57b8c63cd4913a003cb6d0d6ea54e87749a2 +size 236544 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_ab079fbd60921c54cb87+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_ab079fbd60921c54cb87+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_ab079fbd60921c54cb87+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_ab079fbd60921c54cb87+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_ab079fbd60921c54cb87+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_ab079fbd60921c54cb87+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_ab079fbd60921c54cb87+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..559521e7d7c13bfe1690db1c57744ecdf497301e --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_ab079fbd60921c54cb87+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50098abdb580db6b2a8cb129410493a5750662f6e11632923aa4bda70f832650 +size 131781 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_ab079fbd60921c54cb87+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_ab079fbd60921c54cb87+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..d3db94a76c74abe46fd9754004e0bbb2e1e9c2a5 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_ab079fbd60921c54cb87+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af0df4481d142b8c61eece507964330d7aff645d455ded810f45864879d04e37 +size 164864 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_ab111b44d64c2153350e+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_ab111b44d64c2153350e+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_ab111b44d64c2153350e+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_ab111b44d64c2153350e+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_ab111b44d64c2153350e+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_ab111b44d64c2153350e+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_ab111b44d64c2153350e+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..201ea4cd3a10c1924c395930d3dc74215b2299f0 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_ab111b44d64c2153350e+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:34a6bde87a50fbd666b5aa4727084b2c98ec8c749afbd08153f24987a705e6d8 +size 172368 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_ab111b44d64c2153350e+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_ab111b44d64c2153350e+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..b6c02160325f4494607e3e04b8a93b9aa9e601d3 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_ab111b44d64c2153350e+431f5505/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f9656dea46d0a4f1476aaa0be17f352807a5a6aff4cb71c6ec939bd5f76e06f +size 2243584 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_ac7123306d7d1dd877bc+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_ac7123306d7d1dd877bc+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_ac7123306d7d1dd877bc+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_ac7123306d7d1dd877bc+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_ac7123306d7d1dd877bc+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_ac7123306d7d1dd877bc+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_ac7123306d7d1dd877bc+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..23a19bfa109c80b40ef82002a5febb55f89fd404 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_ac7123306d7d1dd877bc+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ec031961421ea46578adbf00d706c9736984e44137dbff9d84c318d179bd4c7 +size 159819 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_ac7123306d7d1dd877bc+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_ac7123306d7d1dd877bc+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..185f876f1979dc969300e2dcdde76ffc26644e05 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_ac7123306d7d1dd877bc+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ce79df124530519425cfe3d7c854fdef43b6522ee595b461b2d604c30651736 +size 216064 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_ad1c55475518c7abfe46+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_ad1c55475518c7abfe46+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_ad1c55475518c7abfe46+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_ad1c55475518c7abfe46+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_ad1c55475518c7abfe46+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_ad1c55475518c7abfe46+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_ad1c55475518c7abfe46+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..f52d2c8cb1efbc8ecc84332735977f06bee65938 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_ad1c55475518c7abfe46+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7f2dfe3748b10aadd8a7c8982c2498b56277e01b3cff36d332022d4341c9a16 +size 698092 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_ad1c55475518c7abfe46+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_ad1c55475518c7abfe46+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..687b7f5cd0cce155a3ab38db437237d24321ce35 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_ad1c55475518c7abfe46+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d813f314ff1db4e4ef1bc06bcfe190542b692bdfddf56da7fa877bc2b3f97f7 +size 19917824 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b0335000c191d09c6572+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_b0335000c191d09c6572+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_b0335000c191d09c6572+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b0335000c191d09c6572+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_b0335000c191d09c6572+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b0335000c191d09c6572+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_b0335000c191d09c6572+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..f22ec6639fa8c491d21387672373c636841aba93 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_b0335000c191d09c6572+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d96bab248cc7e75d95514bc891d7aabb2068f00a6e639bfddb678811c7464315 +size 760668 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b0335000c191d09c6572+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_b0335000c191d09c6572+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..3a8b86a80c67174e1807331901a162b442e0e4b4 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_b0335000c191d09c6572+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f9844422ac9d1ed864e0affec4fe7323d792415a3c671c9d8bb504e6a3d4ed56 +size 31663104 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b04ad5c157e55310df86+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_b04ad5c157e55310df86+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_b04ad5c157e55310df86+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b04ad5c157e55310df86+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_b04ad5c157e55310df86+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..45dfe691502d361a203b642c6c55bcf9f53b0088 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_b04ad5c157e55310df86+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85d6e3af8f28f0c6f204be1ab259693c263e8f6a81d3ea4e5c5fe980bb5a6d8e +size 2494947 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b136461ab5b9b9010f35+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_b136461ab5b9b9010f35+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_b136461ab5b9b9010f35+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b136461ab5b9b9010f35+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_b136461ab5b9b9010f35+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..d105a6edabece89c48275e7736772e1be21fa945 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_b136461ab5b9b9010f35+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea86f9f5daadfb321afe24deac3b0d3813a2fbf862f00955a629350f345ffc20 +size 939590 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b202d269501d3ed0b483+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_b202d269501d3ed0b483+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_b202d269501d3ed0b483+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b202d269501d3ed0b483+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_b202d269501d3ed0b483+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b202d269501d3ed0b483+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_b202d269501d3ed0b483+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..4a135f56ab06aa5f14fd9e1a37224bd73aeb16d7 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_b202d269501d3ed0b483+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1fa95ac4e73f11d5287729e3d53f8afd97caf8f99446ef779f0b94aae920d3af +size 197724 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b202d269501d3ed0b483+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_b202d269501d3ed0b483+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..164fab078082c968ad5e828cbb1302418447f959 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_b202d269501d3ed0b483+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1503c4793f96619ae32f9b83c56940327c165b093bb9a4fd10ea68b26dff3944 +size 246784 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b202d269501d3ed0b483+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_b202d269501d3ed0b483+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..bca9a61167356d6d0a8338888d6d5d72591900c3 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_b202d269501d3ed0b483+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:339c6006ae0a087a1369554e5369992fe0c980ff8337431ca4a29ac1517bc26b +size 257393 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b254587b1ec9eec17e0e+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_b254587b1ec9eec17e0e+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_b254587b1ec9eec17e0e+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b254587b1ec9eec17e0e+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_b254587b1ec9eec17e0e+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b254587b1ec9eec17e0e+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_b254587b1ec9eec17e0e+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..6c1e5b5dd1b766d846192d05df3d7edaa7eea872 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_b254587b1ec9eec17e0e+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9635c56bb978143c80956f7a34fc573314117fc043d3747a51693ac32078aa58 +size 576230 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b254587b1ec9eec17e0e+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_b254587b1ec9eec17e0e+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..9a31ff3a21d19a78ba66a77fbf0d3616822d2b13 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_b254587b1ec9eec17e0e+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fcde9f0c96ab2eb517b4adcbe4e86f7350e6791d925d82d673683ba0ca0e1523 +size 1455104 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b254587b1ec9eec17e0e+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_b254587b1ec9eec17e0e+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..cc17e6ea40cf336f989192ae759f4cf6eb97a3c8 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_b254587b1ec9eec17e0e+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7384d249ffdd8b33de3231856ef834e137bccdc3336bbaeb080d91b858186f7 +size 1600015 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b5cb392eb50260fa24a3+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_b5cb392eb50260fa24a3+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_b5cb392eb50260fa24a3+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b5cb392eb50260fa24a3+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_b5cb392eb50260fa24a3+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b5cb392eb50260fa24a3+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_b5cb392eb50260fa24a3+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..4acd04ac2dcb327c4966f318ba1dadf537240257 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_b5cb392eb50260fa24a3+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22818ba046b107b573affc6921007ba42a0f09d420366d35ada951a6b3f4f681 +size 83712 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b5cb392eb50260fa24a3+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_b5cb392eb50260fa24a3+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..7433c69bd3745cca856c6c4158a88518a9d83df6 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_b5cb392eb50260fa24a3+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ecc685cb7b1d8c410900db5fea2dfb6acb717cbf289f0ddc5da0757e4d13379d +size 175104 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b73c22227b6353be7e03+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_b73c22227b6353be7e03+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_b73c22227b6353be7e03+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b73c22227b6353be7e03+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_b73c22227b6353be7e03+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b73c22227b6353be7e03+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_b73c22227b6353be7e03+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..e9528e8126774df5de46a58299e455bcb9fd6dc1 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_b73c22227b6353be7e03+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21343b8fa9351a01ba1d57333183fda23fbdfd33008254da1bab1cfc0831c0a2 +size 1072456 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b73c22227b6353be7e03+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_b73c22227b6353be7e03+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..a9809cb1713a0286f033d78e246b32fe8b2f21e8 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_b73c22227b6353be7e03+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c0bff86bf0062df499289ea3c072e19c4f613eec5e40d3871b9ef605a7434c32 +size 6790144 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b7c36bb23a045298987b+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_b7c36bb23a045298987b+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_b7c36bb23a045298987b+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b7c36bb23a045298987b+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_b7c36bb23a045298987b+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b7c36bb23a045298987b+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_b7c36bb23a045298987b+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..e2af9758223fb6b0e190a0a86f42d0c32e070ccf --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_b7c36bb23a045298987b+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:354d5757175e21d39a0167018745425af6b4112b9b6c721c3334a7a305f7f47b +size 1506082 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b7c36bb23a045298987b+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_b7c36bb23a045298987b+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..35219e8fcfed9afb681be8bdd7e80fc6bdd3f18e --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_b7c36bb23a045298987b+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:574da8c60a5b8cb9f76e011cdfb8dc7625067be7a673d6062e1009fc562b837c +size 37458944 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b8568f949badf968f33c+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_b8568f949badf968f33c+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_b8568f949badf968f33c+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b8568f949badf968f33c+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_b8568f949badf968f33c+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b8568f949badf968f33c+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_b8568f949badf968f33c+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..a69f8358f41901662c39eaadd5124838d69674a0 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_b8568f949badf968f33c+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:107d620aa0aac148e5e9c5be098a4c139e6d7d93f171c1c04d89fb134b5a01b6 +size 159842 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b8568f949badf968f33c+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_b8568f949badf968f33c+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..421e5d6a5a0b45a119cba0fd8076ff7bbce0d831 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_b8568f949badf968f33c+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b90ccae9b021817027f4755168c6c338936817f905ffdfe826b946f35f3946d +size 216064 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_ba9e5f7f86364cb7b38d+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_ba9e5f7f86364cb7b38d+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_ba9e5f7f86364cb7b38d+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_ba9e5f7f86364cb7b38d+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_ba9e5f7f86364cb7b38d+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_ba9e5f7f86364cb7b38d+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_ba9e5f7f86364cb7b38d+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..3b092bed410c6797b9f270b5623407482f995b4b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_ba9e5f7f86364cb7b38d+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8cee2d35f3a2ffaa555bdb67cbc9f8914afce3ec1209c1d68a86841fbc69e38f +size 83281 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_ba9e5f7f86364cb7b38d+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_ba9e5f7f86364cb7b38d+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..1dcc2036678bf76964bef24ef3aabc923a60c7ce --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_ba9e5f7f86364cb7b38d+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d17949c26936f8d25749d4146190b37faeaf3fa5ff13631ebfe2a919dfc45cfa +size 185344 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_ba9e5f7f86364cb7b38d+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_ba9e5f7f86364cb7b38d+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..b075203ee721aa0476daccd560d350feaf0afd85 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_ba9e5f7f86364cb7b38d+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7869fdca9c9fbcca495ce0d3c309a6b02af9f4351fc4ca9dffd6e41bb58e0e48 +size 193114 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_bc0dc6318052d18d4f59+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_bc0dc6318052d18d4f59+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_bc0dc6318052d18d4f59+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_bc0dc6318052d18d4f59+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_bc0dc6318052d18d4f59+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_bc0dc6318052d18d4f59+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_bc0dc6318052d18d4f59+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..bcab0eb40b46079b3fc9df7fdd1ba847fc99be73 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_bc0dc6318052d18d4f59+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f5fa6d4b2cde07fed55f8c1f27b7c3ad2137ff2a1268212cb379801768bc12a1 +size 44058 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_bc0dc6318052d18d4f59+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_bc0dc6318052d18d4f59+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..3168b5c45bae0d014f8dbcd05f4deb321a195761 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_bc0dc6318052d18d4f59+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c39f658933660ce1ea6e4dca694416613eea473055a9ae8eebb548900fcb514 +size 175104 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_bc0dc6318052d18d4f59+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_bc0dc6318052d18d4f59+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..27f6bb1422aa10a715a051d116d61603a2ab6ea7 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_bc0dc6318052d18d4f59+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:457f6a169e448fa7e1b9b0e9f49071213738020c0ff233df904db3e4410a6319 +size 182770 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_bd7733fff1675159a47f+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_bd7733fff1675159a47f+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_bd7733fff1675159a47f+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_bd7733fff1675159a47f+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_bd7733fff1675159a47f+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_bd7733fff1675159a47f+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_bd7733fff1675159a47f+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..793c42af9920384313e21123683cb361d7c98548 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_bd7733fff1675159a47f+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d631073a722fad9a871e190a12264a286b62fa006a0db13e3e756ac27bce1ec +size 116998 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_bd7733fff1675159a47f+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_bd7733fff1675159a47f+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..9d8820d3fab8a2bb2188244ac7716bb8bc67f3ee --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_bd7733fff1675159a47f+431f5505/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc313b6c628f4ba3273351a37c6894428d22d61a77d8c2ed36106df5577a5dec +size 3953664 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_bde7c6a8467969595669+26ac6be0/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_bde7c6a8467969595669+26ac6be0/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1f7e03e762be6e5f685a6c8f08ee8ef5c0b6f76b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_bde7c6a8467969595669+26ac6be0/compile_flags.json @@ -0,0 +1 @@ +"--enable-saturate-infinity --enable-mixed-precision-accumulation --model-type transformer -O1 --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2' --internal-enable-dge-levels vector_dynamic_offsets --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_bde7c6a8467969595669+26ac6be0/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_bde7c6a8467969595669+26ac6be0/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_bde7c6a8467969595669+26ac6be0/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_bde7c6a8467969595669+26ac6be0/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..262e7ff592cff312d593179ce8a4d23a3c82ee82 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_bde7c6a8467969595669+26ac6be0/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bac205fbceb2d5a93eb3eefd701958ce63287b0c5bc61140794d81fe51479e96 +size 217275 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_bde7c6a8467969595669+26ac6be0/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_bde7c6a8467969595669+26ac6be0/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..0af8259fef7d6f8f086ccceabf8f4976fccbeec1 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_bde7c6a8467969595669+26ac6be0/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3290622eea12102a0f44968ebe5c66030280749b81c767c4ec1262f4d5674e0 +size 369664 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_bebc77fa7efe716566fe+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_bebc77fa7efe716566fe+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_bebc77fa7efe716566fe+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_bebc77fa7efe716566fe+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_bebc77fa7efe716566fe+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_bebc77fa7efe716566fe+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_bebc77fa7efe716566fe+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..57b1a2bbd4f52c75f8abda3f1945147189d5c12d --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_bebc77fa7efe716566fe+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ffadf090f1e5b25d004205a163f73aef66771ba6ea46cc1d4c8fe62886c7f38 +size 954337 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_bebc77fa7efe716566fe+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_bebc77fa7efe716566fe+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..918394ada56c1e8cb60c91019a6b57fd85f8cdfb --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_bebc77fa7efe716566fe+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c69cf3b769a6aaa7e1dd37d6f0b89b0d3ad47f495c53daf61bbde90943e7dd1 +size 1526784 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_bebc77fa7efe716566fe+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_bebc77fa7efe716566fe+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..5736884c629aed9d3ad84d5489ada80d75e90d34 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_bebc77fa7efe716566fe+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c5e2290eb347e96386b6e03f93c43d03bbb81fc3ece691ceb9816dc73e88956 +size 1613754 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c0cc3ee540cc1113e295+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_c0cc3ee540cc1113e295+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c0cc3ee540cc1113e295+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c0cc3ee540cc1113e295+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_c0cc3ee540cc1113e295+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c0cc3ee540cc1113e295+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_c0cc3ee540cc1113e295+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..6fd4854a0cb3e03f5d74b257c45cd527634dce3b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c0cc3ee540cc1113e295+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b636c76487a38551ebdb96f01c35cbf5ccca76d407743191522afb4023a8ef7a +size 175310 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c0cc3ee540cc1113e295+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_c0cc3ee540cc1113e295+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..e6565bc8fd8fe096e0abfecfa7aeb76c60cafc03 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c0cc3ee540cc1113e295+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b953c46d63d9118f70279743f7d72f4673b0e2ac760304be3d3177ae4e26ffc +size 472064 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c0cc3ee540cc1113e295+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_c0cc3ee540cc1113e295+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..6d2d919f1cbce4494da4ea2c6253e217dabb669d --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c0cc3ee540cc1113e295+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24b725ce94e4ab1b3e001c1c315346f3abc0ba8e68607f08eeea02c093bfed02 +size 479903 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c1254120be09b5712a5f+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_c1254120be09b5712a5f+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c1254120be09b5712a5f+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c1254120be09b5712a5f+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_c1254120be09b5712a5f+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c1254120be09b5712a5f+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_c1254120be09b5712a5f+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..59773b0dbcca820258dfe53f95e6e76ef7a3b0ac --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c1254120be09b5712a5f+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:565ead2c15f2211ec08d6a350de1f40e787885e52a7c79d1670d5c931d2463a4 +size 955802 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c1254120be09b5712a5f+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_c1254120be09b5712a5f+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..8909ca101ee015f0014ebe241796a9e0b38da2b3 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c1254120be09b5712a5f+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:925d090722b386bb8112c2111bfaa41c1333e010419ca697f348ebf903f7e3e4 +size 2161664 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c1254120be09b5712a5f+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_c1254120be09b5712a5f+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..c1f0bc08a22725f5ddc5271f88eaa3d51900a227 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c1254120be09b5712a5f+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c8bd6d65f6730b8918d90fabda6238232734438b0d07f9dcb35c194bab20c49 +size 2231293 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c129fb60cc612963e5f3+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_c129fb60cc612963e5f3+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c129fb60cc612963e5f3+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c129fb60cc612963e5f3+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_c129fb60cc612963e5f3+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..cb505de3daddee73a0c6087519f60b8da0290c64 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c129fb60cc612963e5f3+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3d39aace1d85f74ce8e31631d1a5933a0dbbf064a4b86ecef8eb0848b0b07bb +size 940070 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c1a28b35c9ef3d5e39c4+bfc62e4c/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_c1a28b35c9ef3d5e39c4+bfc62e4c/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..892fab43157a43dc2f6c0bc74da09f06fb265ea9 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c1a28b35c9ef3d5e39c4+bfc62e4c/compile_flags.json @@ -0,0 +1 @@ +"--enable-saturate-infinity --enable-mixed-precision-accumulation --model-type transformer -O1 --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2' --internal-enable-dge-levels vector_dynamic_offsets --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c1a28b35c9ef3d5e39c4+bfc62e4c/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_c1a28b35c9ef3d5e39c4+bfc62e4c/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c1a28b35c9ef3d5e39c4+bfc62e4c/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_c1a28b35c9ef3d5e39c4+bfc62e4c/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..50f1095e9b415fa7d46fcd5452892b51db57ea3a --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c1a28b35c9ef3d5e39c4+bfc62e4c/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a5abdd863dcb22c3c496b816c5ba01b1e289d2de04c8eeebeaa6674b00e40697 +size 221211 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c1a28b35c9ef3d5e39c4+bfc62e4c/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_c1a28b35c9ef3d5e39c4+bfc62e4c/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..ad8e7c55401eb1b47203ff1cc2df107e718e481c --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c1a28b35c9ef3d5e39c4+bfc62e4c/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:320fc43ef5373a0a794f47c5ada0b23ad969dae6668a20b4e0c4014956dda0e9 +size 328704 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c1a28b35c9ef3d5e39c4+bfc62e4c/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_c1a28b35c9ef3d5e39c4+bfc62e4c/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..c6e35fc106189cacf185113fcdfe6132e4def4ae --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c1a28b35c9ef3d5e39c4+bfc62e4c/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de0911fc7fc3f04d5e80f8a91b2b73e71447932cef89d8d51d7231261d6dcb87 +size 340002 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c2a8fd6e4e6bd9ef3682+bfc62e4c/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_c2a8fd6e4e6bd9ef3682+bfc62e4c/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..892fab43157a43dc2f6c0bc74da09f06fb265ea9 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c2a8fd6e4e6bd9ef3682+bfc62e4c/compile_flags.json @@ -0,0 +1 @@ +"--enable-saturate-infinity --enable-mixed-precision-accumulation --model-type transformer -O1 --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2' --internal-enable-dge-levels vector_dynamic_offsets --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c2a8fd6e4e6bd9ef3682+bfc62e4c/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_c2a8fd6e4e6bd9ef3682+bfc62e4c/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c2a8fd6e4e6bd9ef3682+bfc62e4c/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_c2a8fd6e4e6bd9ef3682+bfc62e4c/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..299200b7d9a3385562f226b4618481eb702e2893 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c2a8fd6e4e6bd9ef3682+bfc62e4c/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f628b39578f42bea77d256318c29313eccdcca9a95edf5a5257efff3fbf28a7 +size 189416 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c2a8fd6e4e6bd9ef3682+bfc62e4c/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_c2a8fd6e4e6bd9ef3682+bfc62e4c/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..aaa35c7c2640ff9aa349b59fc013eb39f104b288 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c2a8fd6e4e6bd9ef3682+bfc62e4c/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa09df687b0d3dabdde9692540909bb82330a76579f8a2674978f15f9145ff0b +size 267264 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c2a8fd6e4e6bd9ef3682+bfc62e4c/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_c2a8fd6e4e6bd9ef3682+bfc62e4c/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..0e7b97e5ec7e2ab60f369092ec407f1d02c29790 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c2a8fd6e4e6bd9ef3682+bfc62e4c/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8941603b520a4c3f471835f80a2a93277104ec394d4280bc0464dd0e51abe154 +size 278562 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c36623907d50079ba312+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_c36623907d50079ba312+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c36623907d50079ba312+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c36623907d50079ba312+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_c36623907d50079ba312+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c36623907d50079ba312+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_c36623907d50079ba312+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..e1e4842fffec9bb46a6eee13ef8e68ab78983a64 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c36623907d50079ba312+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b0fae1af570970c8a697b10a4dd46f954692bbca6671f769bfd5910aedae839 +size 83712 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c36623907d50079ba312+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_c36623907d50079ba312+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..4bc6de862cf75a9e3c20f5b72c3453f16263679a --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c36623907d50079ba312+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44f38afceaff2b3c2d0a410b96ba1e878d6f165caab12fd62fc251330397daf3 +size 175104 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c3dda8a57464a1f8b5f3+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_c3dda8a57464a1f8b5f3+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c3dda8a57464a1f8b5f3+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c3dda8a57464a1f8b5f3+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_c3dda8a57464a1f8b5f3+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c3dda8a57464a1f8b5f3+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_c3dda8a57464a1f8b5f3+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..5ab862225ca8ea373426227c0b9f367e8cbe79bf --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c3dda8a57464a1f8b5f3+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11cd2d3ea6c62a60a6971e19e33eeb78216ed491e2edc379834c37f84815b612 +size 823276 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c3dda8a57464a1f8b5f3+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_c3dda8a57464a1f8b5f3+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..f17526fbdc16f2b9798b407a653827b9b58782c8 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c3dda8a57464a1f8b5f3+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26f812f804ba75bf853ef8f4e8259edb9f8c3f32b4b34a6d928fdf58d978e33d +size 37428224 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c53f552d07020ec60927+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_c53f552d07020ec60927+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c53f552d07020ec60927+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c53f552d07020ec60927+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_c53f552d07020ec60927+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c53f552d07020ec60927+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_c53f552d07020ec60927+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..bd68eca5e9421d1ee0daf5977462a28aeedcb9fe --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c53f552d07020ec60927+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:edf81ecf8351725012f10d0cdaa9437d8aec1c0157becd0a12928b5b090b357c +size 448770 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c53f552d07020ec60927+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_c53f552d07020ec60927+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..9521107e33ab6d9c52de72f4e158575fca52f298 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c53f552d07020ec60927+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75e501bf3300bf91e246ca573797beb2bffe3224c20c45345dc65e47b61b120f +size 32369664 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c58c3b1c9745cdc8b7c7+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_c58c3b1c9745cdc8b7c7+165e9558/model.hlo_module.pb index da9e9189aac728c1827512412445195be65c2b73..91378f0689372a48df832631547092149d85d73b 100644 --- a/neuronxcc-2.17.194.0+d312836f/MODULE_c58c3b1c9745cdc8b7c7+165e9558/model.hlo_module.pb +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c58c3b1c9745cdc8b7c7+165e9558/model.hlo_module.pb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:53cdf4b2b09d422056e0aa3f73ab0eb4dbdc6af4b2b86f36def704d88a30af08 +oid sha256:0eef3d5745062a23f742c2e129d47d340e674969e960027c05b031b0f182339b size 1095125 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c58c3b1c9745cdc8b7c7+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_c58c3b1c9745cdc8b7c7+165e9558/model.neff index ac3b91a1b5a2d1de6b055b8b2c9061671f414a25..68270e83c96a5881e65b16ba93ef6bf74393585e 100644 --- a/neuronxcc-2.17.194.0+d312836f/MODULE_c58c3b1c9745cdc8b7c7+165e9558/model.neff +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c58c3b1c9745cdc8b7c7+165e9558/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8233dc59cedb034fd26633a075139db9dadb435defba61fcf64cbf119ca1447c +oid sha256:75ac140b113d7e582a70b8c44a57cbe8eafc15c2fa70e34a660a421828e6706d size 4875264 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c58c3b1c9745cdc8b7c7+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_c58c3b1c9745cdc8b7c7+165e9558/wrapped_neff.hlo index 0a492b734985ccafeb9d9c2c4def755def4c0173..a7212719ed84c3c9e496c8d08048a763c40de15b 100644 --- a/neuronxcc-2.17.194.0+d312836f/MODULE_c58c3b1c9745cdc8b7c7+165e9558/wrapped_neff.hlo +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c58c3b1c9745cdc8b7c7+165e9558/wrapped_neff.hlo @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8cce84c8a683edb5ca225a5a30e2befc2c6c1d258bafb9cef64d74a891a71fc9 +oid sha256:63b66263cc1fac7c3137419ecf6d6c22d0d33944b2bb6ad80a059a672a54cef4 size 5048309 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c6a4c0cb35e7a906a9c2+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_c6a4c0cb35e7a906a9c2+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c6a4c0cb35e7a906a9c2+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c6a4c0cb35e7a906a9c2+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_c6a4c0cb35e7a906a9c2+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c6a4c0cb35e7a906a9c2+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_c6a4c0cb35e7a906a9c2+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..848fb11ab811a54e801c0024b64e66a25df51859 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c6a4c0cb35e7a906a9c2+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b19dcc7829c461191828a0ed051c9e2551841d4437d92455001c14039fcd1b6 +size 178966 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c6a4c0cb35e7a906a9c2+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_c6a4c0cb35e7a906a9c2+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..a5fa19c0e0198d5689518024144eac10bd6a853d --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c6a4c0cb35e7a906a9c2+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:acf90a43a2a22734500cdea09cbc0524a690790b79a19c5463f4063608d2773d +size 246784 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c7b1afc8cbed0b2dbf01+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_c7b1afc8cbed0b2dbf01+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c7b1afc8cbed0b2dbf01+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c7b1afc8cbed0b2dbf01+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_c7b1afc8cbed0b2dbf01+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c7b1afc8cbed0b2dbf01+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_c7b1afc8cbed0b2dbf01+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..88035493eb87f34b119a4665d937e6a27e17a714 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c7b1afc8cbed0b2dbf01+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b9e74f6e05b9bd84d59addc9f34ab7eef94e63734f2f1114b01407379ab01ac4 +size 7011 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c7b1afc8cbed0b2dbf01+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_c7b1afc8cbed0b2dbf01+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..278716d1d23a8f2062aa063ac26b08c0fee8704f Binary files /dev/null and b/neuronxcc-2.17.194.0+d312836f/MODULE_c7b1afc8cbed0b2dbf01+431f5505/model.neff differ diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c7b34d5943a8e162e192+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_c7b34d5943a8e162e192+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c7b34d5943a8e162e192+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c7b34d5943a8e162e192+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_c7b34d5943a8e162e192+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c7b34d5943a8e162e192+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_c7b34d5943a8e162e192+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..271b94f52357c0375e5c46551885a4c8717004f9 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c7b34d5943a8e162e192+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7ffb9442cb991df71e1e421a445c3f85e51bce2cdd500ee8df9c57d9db2b12e +size 823706 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c7b34d5943a8e162e192+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_c7b34d5943a8e162e192+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..dbe8cf116394d7d01a81fe14e83bc664591af254 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c7b34d5943a8e162e192+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be2105cf7b79efcc62310f75763d1554cebda64cfe44cfe0db6a762ac6f7c145 +size 6237184 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c8004cdbb3824a472562+26ac6be0/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_c8004cdbb3824a472562+26ac6be0/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1f7e03e762be6e5f685a6c8f08ee8ef5c0b6f76b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c8004cdbb3824a472562+26ac6be0/compile_flags.json @@ -0,0 +1 @@ +"--enable-saturate-infinity --enable-mixed-precision-accumulation --model-type transformer -O1 --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2' --internal-enable-dge-levels vector_dynamic_offsets --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c8004cdbb3824a472562+26ac6be0/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_c8004cdbb3824a472562+26ac6be0/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c8004cdbb3824a472562+26ac6be0/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_c8004cdbb3824a472562+26ac6be0/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..46928ad49f1efd6aff572d743431e282be7ba944 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c8004cdbb3824a472562+26ac6be0/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:36962f23ac765ea4fa33e2455b1bd08f06ff25ef05409e22acc400481427df99 +size 217277 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c8004cdbb3824a472562+26ac6be0/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_c8004cdbb3824a472562+26ac6be0/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..9b877ccb1a53d3856f77831aff30f33941666b73 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c8004cdbb3824a472562+26ac6be0/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4cd46d0cea0e8db30f972671826671caf8dfd40a4f4e6bc14887a59466e7c9a +size 369664 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_cb76f3926e2853557294+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_cb76f3926e2853557294+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_cb76f3926e2853557294+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_cb76f3926e2853557294+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_cb76f3926e2853557294+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_cb76f3926e2853557294+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_cb76f3926e2853557294+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..a8f2f6365b6f17ca502d74d662e9b72953b48de0 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_cb76f3926e2853557294+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f0f01bbdf4f06860c7ed88e56cddfb3f8df3d89cfc46dc5353768d8e4abaebd9 +size 83281 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_cb76f3926e2853557294+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_cb76f3926e2853557294+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..0321b60f0da0cca96385e7871140f08367ee8fbc --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_cb76f3926e2853557294+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:202b4e1c2d092a28057805718697d82ef1c078f523e48396a30cc03412dc77a0 +size 185344 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_cb76f3926e2853557294+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_cb76f3926e2853557294+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..b084aaeb60ecfa034925cf619e75cf620ff95931 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_cb76f3926e2853557294+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:81ce70ca44d2d776452aa3612ba5ebe0927189cb677592ffcabc09253868b534 +size 193114 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_cbd29f12a931a6034fdc+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_cbd29f12a931a6034fdc+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_cbd29f12a931a6034fdc+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_cbd29f12a931a6034fdc+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_cbd29f12a931a6034fdc+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_cbd29f12a931a6034fdc+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_cbd29f12a931a6034fdc+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..22dcf0f8c79ea418cd5ca2568a4ae6f6315ccc56 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_cbd29f12a931a6034fdc+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8919938a040060bd5077eef2dfa9d7707e8483b155a3cc2449f75b6e5ccf99f7 +size 10362 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_cbd29f12a931a6034fdc+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_cbd29f12a931a6034fdc+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..73a459f32b950bf6b4ddb88c31cdf7d77951cc00 Binary files /dev/null and b/neuronxcc-2.17.194.0+d312836f/MODULE_cbd29f12a931a6034fdc+431f5505/model.neff differ diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_d1e2c47cd5166e2d7503+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_d1e2c47cd5166e2d7503+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_d1e2c47cd5166e2d7503+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_d1e2c47cd5166e2d7503+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_d1e2c47cd5166e2d7503+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_d1e2c47cd5166e2d7503+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_d1e2c47cd5166e2d7503+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..352b248b21ba0cc47d619c4b1f971076cc4ae699 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_d1e2c47cd5166e2d7503+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf0ef62a7a0df517a79ae7d5279b1a01de6271c2e4d83fa8353180f2791492af +size 7099 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_d1e2c47cd5166e2d7503+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_d1e2c47cd5166e2d7503+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..cfda60609ab1b5a7894050af528fb4a3c2135716 Binary files /dev/null and b/neuronxcc-2.17.194.0+d312836f/MODULE_d1e2c47cd5166e2d7503+431f5505/model.neff differ diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_d1e5616a8086d5b15822+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_d1e5616a8086d5b15822+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_d1e5616a8086d5b15822+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_d1e5616a8086d5b15822+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_d1e5616a8086d5b15822+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_d1e5616a8086d5b15822+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_d1e5616a8086d5b15822+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..42c585184b3733725f380fe79262b336af4fe278 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_d1e5616a8086d5b15822+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c889e039810569cfd70f898b6a7466f59cc17e5e8472f18c8e222208ee4b7549 +size 178966 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_d1e5616a8086d5b15822+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_d1e5616a8086d5b15822+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..80aeea005757697033ff909e95413d701b7c7c52 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_d1e5616a8086d5b15822+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:916ce9dc21776c3c69d1b01cef5e1df9dee022933aa0675b316814e7425ef137 +size 246784 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_d2787d322069585a8ab7+26ac6be0/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_d2787d322069585a8ab7+26ac6be0/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1f7e03e762be6e5f685a6c8f08ee8ef5c0b6f76b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_d2787d322069585a8ab7+26ac6be0/compile_flags.json @@ -0,0 +1 @@ +"--enable-saturate-infinity --enable-mixed-precision-accumulation --model-type transformer -O1 --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2' --internal-enable-dge-levels vector_dynamic_offsets --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_d2787d322069585a8ab7+26ac6be0/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_d2787d322069585a8ab7+26ac6be0/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_d2787d322069585a8ab7+26ac6be0/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_d2787d322069585a8ab7+26ac6be0/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..da5ef08c43ec154a43eab7b5b49c6eface17a03c --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_d2787d322069585a8ab7+26ac6be0/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07d87f9eeb300098238912e78ac29c165c06863590b81f2dae1c7e037ab0b2b2 +size 218822 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_d2787d322069585a8ab7+26ac6be0/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_d2787d322069585a8ab7+26ac6be0/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..a5a3d7521d5808640ae508b60273e511debfee2a --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_d2787d322069585a8ab7+26ac6be0/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af8221511d51f857152e2f8ac0ba3c90fbc830139676ecb5c4f861f5b4f473d3 +size 431104 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_d2f3b0359d66bc4ef0fc+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_d2f3b0359d66bc4ef0fc+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_d2f3b0359d66bc4ef0fc+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_d2f3b0359d66bc4ef0fc+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_d2f3b0359d66bc4ef0fc+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_d2f3b0359d66bc4ef0fc+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_d2f3b0359d66bc4ef0fc+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..4c64e6101aeaf718753f7d103aa694a4991ffd68 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_d2f3b0359d66bc4ef0fc+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c426b71115730b23335e06509feb8597628447db9a3081d8e285910e962c2f7c +size 172361 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_d2f3b0359d66bc4ef0fc+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_d2f3b0359d66bc4ef0fc+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..434efae57d7fd2fc5a8f1905cb694c3f15a614aa --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_d2f3b0359d66bc4ef0fc+431f5505/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3a48f63bac534e4a9cc10e4a0505cc4487ae7551ae02d3222d7babf762e48a0 +size 2243584 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_d3986b7b5efee5f065f4+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_d3986b7b5efee5f065f4+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_d3986b7b5efee5f065f4+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_d3986b7b5efee5f065f4+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_d3986b7b5efee5f065f4+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_d3986b7b5efee5f065f4+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_d3986b7b5efee5f065f4+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..61f4fa8fa48ba12cb1cb5e77fb6d5ce569a19424 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_d3986b7b5efee5f065f4+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d3490ad3f154cf17e30d467065e0d249bfda6d6ad064598f06cefb0c4ca4072 +size 87293 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_d3986b7b5efee5f065f4+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_d3986b7b5efee5f065f4+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..a19bcbbb0e4b6ec579b7ed7dc97048bff8e8b442 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_d3986b7b5efee5f065f4+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1276d19ea2012e1c74fbd66b9b7fb2c4f116c5d75822dc9365bda22b1d6bac7e +size 246784 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_d7c9bb41f28fb7099dfb+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_d7c9bb41f28fb7099dfb+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_d7c9bb41f28fb7099dfb+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_d7c9bb41f28fb7099dfb+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_d7c9bb41f28fb7099dfb+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_d7c9bb41f28fb7099dfb+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_d7c9bb41f28fb7099dfb+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..250d2041988ec2e52f08ab7370dbc018a1eaed63 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_d7c9bb41f28fb7099dfb+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07c6d448a38db62888ceb0e3958abd178e45b0ac7061ec77084b6b64cb3b156f +size 114413 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_d7c9bb41f28fb7099dfb+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_d7c9bb41f28fb7099dfb+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..c3d227b69d9042b935404ec9b80cce1a369b2752 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_d7c9bb41f28fb7099dfb+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0192d52bbe1aaba917efb1a68f8b1a1243fdda4c21e22b934f805238561cfd1e +size 154624 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_d7e2548756fae2419754+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_d7e2548756fae2419754+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_d7e2548756fae2419754+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_d7e2548756fae2419754+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_d7e2548756fae2419754+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_d7e2548756fae2419754+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_d7e2548756fae2419754+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..9b890cb108d929e4fb902f3bbb5edd67a979bacb --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_d7e2548756fae2419754+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:80d7180b21620f00ea79cdd76c586aa0ae6d7ab9d9b866d076a3d6065fff8ca2 +size 374859 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_d7e2548756fae2419754+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_d7e2548756fae2419754+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..94f4fafa909e0897cd77583f945f5db39b53b3ea --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_d7e2548756fae2419754+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e80fdafbc8c6ada237dbee1311b24cdc6671d9df0fd4255ea2fc6fc4b770a54 +size 2151424 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_d7e2548756fae2419754+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_d7e2548756fae2419754+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..de37d7d1c2c4cb9ae75d0a2e4feddf793836e982 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_d7e2548756fae2419754+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dfd2f83046147a179dc02cb331b3f7296c47a694f9c20905cc87b2f5a7f3b0fb +size 2221053 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_d84f59502564753faaf1+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_d84f59502564753faaf1+431f5505/model.neff index f308e205d1814c8612db9f462ba3379f8686e4c7..3c05bf3ac0368fa78e3dfd1d1c9169970c9cca22 100644 --- a/neuronxcc-2.17.194.0+d312836f/MODULE_d84f59502564753faaf1+431f5505/model.neff +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_d84f59502564753faaf1+431f5505/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:04e71485e61807a057e4b2b587d95f705f0151cffdd052cc9c7316b7fe1eb5e6 +oid sha256:74649b3365125e4880c8b7179418da522aadc65756bd8207dd1b7defb80d00bd size 2243584 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_d8f0f9b8ac103cce2472+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_d8f0f9b8ac103cce2472+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_d8f0f9b8ac103cce2472+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_d8f0f9b8ac103cce2472+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_d8f0f9b8ac103cce2472+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_d8f0f9b8ac103cce2472+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_d8f0f9b8ac103cce2472+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..f8df65e2b544fe0a42d308ba12b0dea8bc24bd71 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_d8f0f9b8ac103cce2472+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5d24582c8eb37b805e6a5cc8f5b159a157c4b83bbe2c83ba5c23361fa21ba06 +size 78990 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_d8f0f9b8ac103cce2472+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_d8f0f9b8ac103cce2472+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..31683428be996a6248e2b90cb47edf60b7c68f1b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_d8f0f9b8ac103cce2472+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f98296b36d3308922bdd579df926128e4eec7c5bd72882e79c1f4f2928e73f2 +size 216064 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_d8f0f9b8ac103cce2472+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_d8f0f9b8ac103cce2472+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..64f9b0c8a8a03e275ffbd7c8a5d13e30f0339b5e --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_d8f0f9b8ac103cce2472+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:157c688f85fab4dced0625409b92672f8a9ab582dac68fcce266bb69b80cd43b +size 223770 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_d96567e523bd6df1ddcb+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_d96567e523bd6df1ddcb+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_d96567e523bd6df1ddcb+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_d96567e523bd6df1ddcb+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_d96567e523bd6df1ddcb+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..ecaf7b9a66be01cdd24e976fe811be9ffa0800c9 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_d96567e523bd6df1ddcb+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6992afc1a115ca412a632149c9f09bea1fb5b3e20c4993eff598624602ca2551 +size 2403893 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_dbfa93fc61816a97047d+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_dbfa93fc61816a97047d+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_dbfa93fc61816a97047d+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_dbfa93fc61816a97047d+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_dbfa93fc61816a97047d+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_dbfa93fc61816a97047d+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_dbfa93fc61816a97047d+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..1502c83b4525908108b326014103086cb7592912 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_dbfa93fc61816a97047d+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df9ca4b443203faf696efa2b1a4f86f6dc4490e6252e0b6bce196860365b3e7e +size 1315599 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_dbfa93fc61816a97047d+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_dbfa93fc61816a97047d+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..5f50d7db161fec741bed9a7a1446949c81801a0f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_dbfa93fc61816a97047d+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c0bc70b1da67a9d49fefe8dedfa3c1c5c53d1985fa9d33b4a017279ed7254213 +size 19969024 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_dc869e91ce0855da0223+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_dc869e91ce0855da0223+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_dc869e91ce0855da0223+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_dc869e91ce0855da0223+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_dc869e91ce0855da0223+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_dc869e91ce0855da0223+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_dc869e91ce0855da0223+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..3039bf766667382752c7f7d7684bc15c0a4a2c47 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_dc869e91ce0855da0223+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a81fd7e46dd4d38c0a50b96cae97b2e1eb58f0e38c534c4f80fc01312399ed5b +size 178966 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_dc869e91ce0855da0223+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_dc869e91ce0855da0223+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..8c134d15f09585c9e3d0b68a03d0bc9a60b319ca --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_dc869e91ce0855da0223+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:31e002cd0ee4973687f5a973f793fac9fe0d79f9e71c6176d83e632c2617ef0c +size 246784 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_ddd8df8ccbf7e2239d19+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_ddd8df8ccbf7e2239d19+bfe5714b/model.hlo_module.pb index 632a66f960df8e433c71c8d031d49862e2109f7f..97da9795a4f34a2ade1fea457e8b9748557a0d89 100644 --- a/neuronxcc-2.17.194.0+d312836f/MODULE_ddd8df8ccbf7e2239d19+bfe5714b/model.hlo_module.pb +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_ddd8df8ccbf7e2239d19+bfe5714b/model.hlo_module.pb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f1471300aaae4d1147af13017253b0d3b4b463f368087b93481273553b295c1e +oid sha256:1c9cca486eab4d6045bf1d2ae56c02c9b7cc9027ce7807aec3a99b6ec062e26f size 835006 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_ddd8df8ccbf7e2239d19+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_ddd8df8ccbf7e2239d19+bfe5714b/model.neff index 83ce794f6d357df4231db96ea3349fe4328b8471..92d7423fcc26ba5a48f7416b61c4bb3699bffc82 100644 --- a/neuronxcc-2.17.194.0+d312836f/MODULE_ddd8df8ccbf7e2239d19+bfe5714b/model.neff +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_ddd8df8ccbf7e2239d19+bfe5714b/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0626ccd085eceb184eda7f0019631866b9ba673e4bfb7e63133a7895fd367d12 +oid sha256:1d80bc8aefb2c260e890a98ae4bd688260b3913b8225b5f70b5787c485c20ff4 size 6216704 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_de463290b0cc81f3e50a+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_de463290b0cc81f3e50a+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_de463290b0cc81f3e50a+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_de463290b0cc81f3e50a+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_de463290b0cc81f3e50a+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_de463290b0cc81f3e50a+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_de463290b0cc81f3e50a+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..eec7fed12e5600f29e00498508c27ddc56b7a787 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_de463290b0cc81f3e50a+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ffa79e252b3ab5a178f15d5680b162a5ff48ece318bf95a4122231f100bd639 +size 149912 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_de463290b0cc81f3e50a+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_de463290b0cc81f3e50a+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..cc5ec1108b5c0b28fd777e4e3c1299e8e87ea82f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_de463290b0cc81f3e50a+431f5505/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6295644bdf42c043a55e937326ac1b3398fb9a7a62ed0c5bb03252d69f5884e8 +size 594944 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_de7020db3ad42153d0a9+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_de7020db3ad42153d0a9+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_de7020db3ad42153d0a9+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_de7020db3ad42153d0a9+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_de7020db3ad42153d0a9+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_de7020db3ad42153d0a9+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_de7020db3ad42153d0a9+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..41cb08fa0359914b160d82e2446d67f218947684 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_de7020db3ad42153d0a9+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f1aae98200d4d50000c1835f872e2a8fcc1bdbc4bd1f0c5ef22d6aeb9e409f2 +size 2539337 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_de7020db3ad42153d0a9+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_de7020db3ad42153d0a9+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..19184574058d576534fbbca0a3aeefc53dd8a124 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_de7020db3ad42153d0a9+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a5dd32130333bb9badf14ccddbb2f283c1a304e39abefd966dfa6469178af61 +size 4957184 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_de7020db3ad42153d0a9+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_de7020db3ad42153d0a9+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..5fb59384c802d15d6d6d410853f4c6f8804579b3 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_de7020db3ad42153d0a9+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff66e84f47f16a60baf25ec17569edfc20b985356145d30e4cf979067cbb9b0e +size 5130344 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_dea3fa0fa1232db56e94+26ac6be0/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_dea3fa0fa1232db56e94+26ac6be0/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1f7e03e762be6e5f685a6c8f08ee8ef5c0b6f76b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_dea3fa0fa1232db56e94+26ac6be0/compile_flags.json @@ -0,0 +1 @@ +"--enable-saturate-infinity --enable-mixed-precision-accumulation --model-type transformer -O1 --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2' --internal-enable-dge-levels vector_dynamic_offsets --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_dea3fa0fa1232db56e94+26ac6be0/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_dea3fa0fa1232db56e94+26ac6be0/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_dea3fa0fa1232db56e94+26ac6be0/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_dea3fa0fa1232db56e94+26ac6be0/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..157c6134fb9836e3dfe38913c20059ac9dbbefc8 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_dea3fa0fa1232db56e94+26ac6be0/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff33db0f32b8ac97fa777866ddb0d0a3600c9ec2dae5f50134e540cfcd4d344d +size 81607 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_dea3fa0fa1232db56e94+26ac6be0/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_dea3fa0fa1232db56e94+26ac6be0/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..7e19311b08287ccf9404a513608375c2097f1396 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_dea3fa0fa1232db56e94+26ac6be0/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b971aa7d9a9389cf21b9af3dfb1d228bdddf84978a4777ac90758cf0cf68c427 +size 359424 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_ded5e72f03277b22e294+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_ded5e72f03277b22e294+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_ded5e72f03277b22e294+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_ded5e72f03277b22e294+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_ded5e72f03277b22e294+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_ded5e72f03277b22e294+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_ded5e72f03277b22e294+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..92a0e95b846d1d4bb3446c4995b81842798b21e5 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_ded5e72f03277b22e294+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51f1a89033c2a8421aa39547944cc7191df407797dc54e6bfd15d5ee70200f59 +size 654848 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_ded5e72f03277b22e294+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_ded5e72f03277b22e294+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..b5afb2efb714fe6573ee86388e826e796e510047 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_ded5e72f03277b22e294+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b812fac2d4f21b8312ff2dd8fc25f6da0525f4bed3209d3a8d8d10b35fc9838b +size 10630144 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_ded5e72f03277b22e294+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_ded5e72f03277b22e294+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..49a4b8ac648b0ec81d2ad7742c3afa12f91864b2 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_ded5e72f03277b22e294+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27e460c3f0013695f4aa064761eebeea5ced21cf6872e85e4b6efadd65ab196b +size 10767074 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_df8cbade8e779ffc5703+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_df8cbade8e779ffc5703+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_df8cbade8e779ffc5703+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_df8cbade8e779ffc5703+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_df8cbade8e779ffc5703+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_df8cbade8e779ffc5703+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_df8cbade8e779ffc5703+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..7240dbf02d7477ec60b9c82e9885cf52ac973500 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_df8cbade8e779ffc5703+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd125bd12bba7a5505bde20c111a90b3ce268769afd0e48373cc64ee6c7c5100 +size 132732 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_df8cbade8e779ffc5703+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_df8cbade8e779ffc5703+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..794cd394db27fd42ed1d47be93cfc20418cd7ec8 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_df8cbade8e779ffc5703+431f5505/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a441cbe8498cc150c3da00273f0ecf9fbb9dd2a33602b20249eba2f14ac4297f +size 1281024 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_e00ea95e26db168f8dea+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_e00ea95e26db168f8dea+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_e00ea95e26db168f8dea+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_e00ea95e26db168f8dea+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_e00ea95e26db168f8dea+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_e00ea95e26db168f8dea+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_e00ea95e26db168f8dea+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..de3a2f1b3b8582f8b2a1dc7684f6e41e7a312a17 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_e00ea95e26db168f8dea+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5135c15b241fb17fb83d63b0d24e7bdcb550024f19e29cfb9bbb314ef928ee6 +size 579716 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_e00ea95e26db168f8dea+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_e00ea95e26db168f8dea+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..608f3882a2b987cc71a18b67a0dfbb88753665f5 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_e00ea95e26db168f8dea+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3d956f0dbcc71b727a62b0edb46512ec690db439b8ce4216a6798cd0bfdb531 +size 1649664 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_e00ea95e26db168f8dea+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_e00ea95e26db168f8dea+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..568ad0a62f7df1610dd9ba4c53a13b93037c2180 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_e00ea95e26db168f8dea+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94dcb01036aecfba082fbe79e6e3b17ac33dacfb81604c115a2debd0de16fe45 +size 1794702 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_e1ae3c47bc7fd4336c49+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_e1ae3c47bc7fd4336c49+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_e1ae3c47bc7fd4336c49+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_e1ae3c47bc7fd4336c49+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_e1ae3c47bc7fd4336c49+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_e1ae3c47bc7fd4336c49+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_e1ae3c47bc7fd4336c49+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..0be232d70946660c5254a9948d9979a857895252 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_e1ae3c47bc7fd4336c49+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3cb0c71f937117958ff0bbf0af012bc7e6647ff92c57d84580b38864a7498e6 +size 1062083 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_e1ae3c47bc7fd4336c49+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_e1ae3c47bc7fd4336c49+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..3871691b2d3da24946fd4a98868d35abfbac9d2f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_e1ae3c47bc7fd4336c49+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fcfb94ed3bdadaab4b7756ea9930c8046e79d0517573a9b241dde695d1fe159e +size 7312384 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_e5f864ff8f5ba3001616+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_e5f864ff8f5ba3001616+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_e5f864ff8f5ba3001616+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_e5f864ff8f5ba3001616+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_e5f864ff8f5ba3001616+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_e5f864ff8f5ba3001616+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_e5f864ff8f5ba3001616+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..4b7f6f7abb24c84ea9707614bbb2099f89a54fa8 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_e5f864ff8f5ba3001616+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5301437fe93b82f6acbea9dc89d17bdee797d23c52f693a5fff1dcb7a4190738 +size 580333 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_e5f864ff8f5ba3001616+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_e5f864ff8f5ba3001616+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..3ebb866528b8963f49618e0900e684aad3cbff6b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_e5f864ff8f5ba3001616+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:456604cdd421ba66c723c922bfabc290f580ec89f080ca3a4a503e5adb14639d +size 3728384 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_e5f864ff8f5ba3001616+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_e5f864ff8f5ba3001616+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..8975d914738b8459a087e357f119a71bcdcaca28 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_e5f864ff8f5ba3001616+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97b68f28bdbbe55ea60bfb62053a3238ad7680eb7c6cdb6ac6fd2d7e046d2523 +size 3873425 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_e67996812ad4b0713c1f+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_e67996812ad4b0713c1f+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_e67996812ad4b0713c1f+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_e67996812ad4b0713c1f+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_e67996812ad4b0713c1f+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..fb5d8fc7cdcd3984e5f83aabd0226b3f440372a3 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_e67996812ad4b0713c1f+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e5ec3336a02acfb3c655412a507d92dd3d62c984858fbb2b345b7d0687450e5 +size 2413541 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_e97fb518c22ed7e2515d+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_e97fb518c22ed7e2515d+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_e97fb518c22ed7e2515d+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_e97fb518c22ed7e2515d+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_e97fb518c22ed7e2515d+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_e97fb518c22ed7e2515d+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_e97fb518c22ed7e2515d+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..a5bace11d3a125fe8ae94ea91fb66b5aeb8bb356 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_e97fb518c22ed7e2515d+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c2963952c816a8d685b07bd3ecf53d2c138c5e805c4fe9880400c79f30d8b29 +size 841540 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_e97fb518c22ed7e2515d+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_e97fb518c22ed7e2515d+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..4c3a514f9ab5c5f746ae9fcda96bb0ee62ae63bf --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_e97fb518c22ed7e2515d+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb11b0e264ca6b631257909b1a53662396e9d0fb89887e8b2e1f9265c265c0dd +size 32103424 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_eb323409b9b19da08484+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_eb323409b9b19da08484+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_eb323409b9b19da08484+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_eb323409b9b19da08484+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_eb323409b9b19da08484+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_eb323409b9b19da08484+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_eb323409b9b19da08484+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..85b18cad1a80362525fa28b12da26da34c090464 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_eb323409b9b19da08484+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3804a491c378a04b7503220ca7f721f587e1bf3519572d89df8c1db705277eea +size 178966 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_eb323409b9b19da08484+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_eb323409b9b19da08484+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..9fbcdf4739189fb44afab53abf2ef9243c8a5f97 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_eb323409b9b19da08484+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62a29a7c929b4d87fcaf139b2c467e4121e16283d4b433340381670e22c128bb +size 246784 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_eb928a784f0138529be9+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_eb928a784f0138529be9+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_eb928a784f0138529be9+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_eb928a784f0138529be9+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_eb928a784f0138529be9+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_eb928a784f0138529be9+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_eb928a784f0138529be9+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..00e34411e7501163c12fcab25095b18c544cc9ba --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_eb928a784f0138529be9+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d27df69eec3defb21e98a4f5b0b28b794d66599f2b03cd6bd3046cd52791e241 +size 418296 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_eb928a784f0138529be9+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_eb928a784f0138529be9+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..d959ded2dfdbfc87dd60513df760c30283d2647c --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_eb928a784f0138529be9+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:452f051b9d89a1b0785f45bcc5588ba665641a708f21fe60c89b8990b487ce69 +size 502784 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_eb928a784f0138529be9+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_eb928a784f0138529be9+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..320224da1ca237a62c4eff1897fd35500decaa73 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_eb928a784f0138529be9+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f724e2db52d31ef4fa016cabc471051ddc776d20e5fb2a7bedfed5de7e639db1 +size 571183 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_ec98086d86f184f276f1+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_ec98086d86f184f276f1+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_ec98086d86f184f276f1+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_ec98086d86f184f276f1+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_ec98086d86f184f276f1+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_ec98086d86f184f276f1+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_ec98086d86f184f276f1+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..9ac2d33255df581814f5d236eef7ec3255e4e657 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_ec98086d86f184f276f1+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f3fd4246fa1794c96c88427b3948545c9ed616b03b550783b3eefdb580b6419 +size 1529642 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_ec98086d86f184f276f1+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_ec98086d86f184f276f1+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..667f86d708d591cbcc0fb70aebff347b5207ea1a --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_ec98086d86f184f276f1+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:46bdcbe69adfd86c08d08a23cd6c4dac03501cd3267968d4ac4344d2c1fd6672 +size 1127424 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_ec98086d86f184f276f1+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_ec98086d86f184f276f1+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..2eb8b8248020067a5a1300e93f509d3dbc30a33a --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_ec98086d86f184f276f1+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cacbbbe1d2bca4aa110cd777cff0f36276d2fdf6e09657c6c072a10157df7cd4 +size 1272450 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_ecf9cfe0aa34e5b00f85+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_ecf9cfe0aa34e5b00f85+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_ecf9cfe0aa34e5b00f85+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_ecf9cfe0aa34e5b00f85+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_ecf9cfe0aa34e5b00f85+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_ecf9cfe0aa34e5b00f85+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_ecf9cfe0aa34e5b00f85+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..f7a81f27b02be85dbd25ddeef1750b9c536b12fa --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_ecf9cfe0aa34e5b00f85+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:425f99c3f8a0d71f05502fed4ee1ffd958f083be56140fe38719058eaa6a956a +size 67207 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_ecf9cfe0aa34e5b00f85+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_ecf9cfe0aa34e5b00f85+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..6f416197509aeb3f8ef485632ba9cc14442bf19b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_ecf9cfe0aa34e5b00f85+431f5505/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5bc49c4ec99ba04d5bc0e04a07c02b2c48e7f23922ccc2ea755dd26160bec46b +size 236544 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_ed6180267143dfea9183+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_ed6180267143dfea9183+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_ed6180267143dfea9183+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_ed6180267143dfea9183+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_ed6180267143dfea9183+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_ed6180267143dfea9183+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_ed6180267143dfea9183+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..7ac17f6035702ed025e259a4ab0b6a66e7b9dac1 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_ed6180267143dfea9183+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6adc0afbde64c61bc0c59d708742998d0ed8cacedd763d5b897f26939ef4e46 +size 7004 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_ed6180267143dfea9183+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_ed6180267143dfea9183+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..1eaeafdd79178ba957e46e0d4193730e424384df Binary files /dev/null and b/neuronxcc-2.17.194.0+d312836f/MODULE_ed6180267143dfea9183+431f5505/model.neff differ diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_ef8771b8447d48519ae4+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_ef8771b8447d48519ae4+431f5505/model.neff index 5ff0daae510280ea4744357bc61943223de7e7ee..91705d0efabae83b39af743cf6a0491997ca9283 100644 --- a/neuronxcc-2.17.194.0+d312836f/MODULE_ef8771b8447d48519ae4+431f5505/model.neff +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_ef8771b8447d48519ae4+431f5505/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:09aa7db47d73b482a853ae44920428c6d0fe495b6fafb6cd4f86c418c4bfeb76 +oid sha256:39e0167c6c0a99562779178181116e87e16a0370b88bf239b07bba3bddbf1276 size 3974144 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_efd8799e097b083a4ed2+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_efd8799e097b083a4ed2+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_efd8799e097b083a4ed2+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_efd8799e097b083a4ed2+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_efd8799e097b083a4ed2+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_efd8799e097b083a4ed2+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_efd8799e097b083a4ed2+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..42778a93998798ff44848bdc26afddfb68e83922 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_efd8799e097b083a4ed2+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8839f1d793feb3ac9b940c56a368fc9b3f075b4fcce894481a93f32f147f9c0 +size 1702028 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_efd8799e097b083a4ed2+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_efd8799e097b083a4ed2+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..003ad8e6a64f4b4fcc25ffdf98894d7af7387d72 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_efd8799e097b083a4ed2+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:357f47f0d41b27c4d3eef71ee8d721f869a149ff1ffccfd91d14f8958f6a6cea +size 3554304 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_efd8799e097b083a4ed2+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_efd8799e097b083a4ed2+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..1938be333368fdc70ee3670ac7b2470e7b4a1b9f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_efd8799e097b083a4ed2+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a7099fcfbc10afa56f058f6fcc3286f36d40cae7f7cd063710f41fe1d5d9b4c +size 3684286 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_f03aeb88217270f42f42+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_f03aeb88217270f42f42+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_f03aeb88217270f42f42+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_f03aeb88217270f42f42+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_f03aeb88217270f42f42+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_f03aeb88217270f42f42+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_f03aeb88217270f42f42+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..e1271bd4bfc5e07cf5251c33b665f3ee5cf6c417 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_f03aeb88217270f42f42+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2caf68fa46bba891cf4bf57c5781de8aef7864ddd49f7ec3d29b058f73c41b81 +size 1650939 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_f03aeb88217270f42f42+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_f03aeb88217270f42f42+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..206673b668619c5a65c78226c52e8835404cb90e --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_f03aeb88217270f42f42+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad5ad31b2e4395fc1c3804341f1461fa005d4fdbdc2d08ccd6a5eed49eeb4a8b +size 8571904 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_f03aeb88217270f42f42+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_f03aeb88217270f42f42+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..8079f8f6cc581325b60735dc0f256576fc27200a --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_f03aeb88217270f42f42+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:381b9654322c66b2de85e12a49622f934c293b70dafcb34a5759600f96e669f7 +size 8709155 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_f3e8207126f92d912816+26ac6be0/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_f3e8207126f92d912816+26ac6be0/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1f7e03e762be6e5f685a6c8f08ee8ef5c0b6f76b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_f3e8207126f92d912816+26ac6be0/compile_flags.json @@ -0,0 +1 @@ +"--enable-saturate-infinity --enable-mixed-precision-accumulation --model-type transformer -O1 --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2' --internal-enable-dge-levels vector_dynamic_offsets --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_f3e8207126f92d912816+26ac6be0/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_f3e8207126f92d912816+26ac6be0/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_f3e8207126f92d912816+26ac6be0/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_f3e8207126f92d912816+26ac6be0/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..f5d031d6cc7473bf839041968a748c5e5634310c --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_f3e8207126f92d912816+26ac6be0/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60dfdc9203fbfc155e6bbad4c2c73a58a9d23c13bc691bbb339fa0909e137b98 +size 82456 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_f3e8207126f92d912816+26ac6be0/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_f3e8207126f92d912816+26ac6be0/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..8348c7a017a97e3419697fea2f9d7905eed51d5d --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_f3e8207126f92d912816+26ac6be0/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:782b273e6a479f7053648d1066d3567bde3960659f87769d0970d0c14be19b60 +size 420864 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_f55bef5a76fbf9bf2649+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_f55bef5a76fbf9bf2649+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_f55bef5a76fbf9bf2649+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_f55bef5a76fbf9bf2649+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_f55bef5a76fbf9bf2649+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_f55bef5a76fbf9bf2649+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_f55bef5a76fbf9bf2649+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..562a48d62244f5318573382d92858d6575052418 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_f55bef5a76fbf9bf2649+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4dae452cbbc0bcedd9a0638e3e92ffbe6a834a37d4dc0c5a1b6d1eeef9dff206 +size 132002 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_f55bef5a76fbf9bf2649+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_f55bef5a76fbf9bf2649+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..b21ff231b5bb1e8f99a06a1e9aadaf6f13e4d340 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_f55bef5a76fbf9bf2649+431f5505/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6bb9af88ee7e90828d3a7471b57b8ab32007993bc34dfadb1660d05dc52c4bba +size 1281024 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_f76426f46b3b076a97d3+26ac6be0/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_f76426f46b3b076a97d3+26ac6be0/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..1f7e03e762be6e5f685a6c8f08ee8ef5c0b6f76b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_f76426f46b3b076a97d3+26ac6be0/compile_flags.json @@ -0,0 +1 @@ +"--enable-saturate-infinity --enable-mixed-precision-accumulation --model-type transformer -O1 --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2' --internal-enable-dge-levels vector_dynamic_offsets --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_f76426f46b3b076a97d3+26ac6be0/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_f76426f46b3b076a97d3+26ac6be0/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_f76426f46b3b076a97d3+26ac6be0/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_f76426f46b3b076a97d3+26ac6be0/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..79680e62660f46e42fc77a73218dd58cb60dd025 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_f76426f46b3b076a97d3+26ac6be0/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f70256b49624663f124791de699598fc947b0f80b8b27c73973aa67e5676ac2 +size 217275 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_f76426f46b3b076a97d3+26ac6be0/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_f76426f46b3b076a97d3+26ac6be0/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..4137114c227239b457f7f0a7fbf323e63ae7e6c4 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_f76426f46b3b076a97d3+26ac6be0/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62275216ccf90cd1c50db6e75c6050c8bbb9de33ccce90de8066046261251431 +size 369664 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_f84068611c70eb3c41a2+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_f84068611c70eb3c41a2+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_f84068611c70eb3c41a2+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_f84068611c70eb3c41a2+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_f84068611c70eb3c41a2+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_f84068611c70eb3c41a2+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_f84068611c70eb3c41a2+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..351d209f0561d87d2ac91522986a3bc10e886f94 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_f84068611c70eb3c41a2+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b4aeec679a69511a8128b7836f1aa52b284c380397507732dcacb0e8d540385 +size 87293 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_f84068611c70eb3c41a2+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_f84068611c70eb3c41a2+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..203864cade35b10c1fcd74c241aa4e5960548946 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_f84068611c70eb3c41a2+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:753350d9678f5d9a3a7a205fed3e4aa3f6a0cb0273310fc36b07edf138c09299 +size 246784 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_f84b9ea570c171f5a5fd+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_f84b9ea570c171f5a5fd+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_f84b9ea570c171f5a5fd+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_f84b9ea570c171f5a5fd+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_f84b9ea570c171f5a5fd+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_f84b9ea570c171f5a5fd+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_f84b9ea570c171f5a5fd+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..ab52d0992487033ed4cd137bf1983b1412e9e693 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_f84b9ea570c171f5a5fd+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67f189519dd4f7b1b1d0de9f1389bb8ece67b3d757fd26381c835121188439ed +size 197724 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_f84b9ea570c171f5a5fd+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_f84b9ea570c171f5a5fd+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..f3971444c1294100bcaa3837bc7f9f5d2ecf31a1 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_f84b9ea570c171f5a5fd+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6439aef2099c115415211e36e33d0d6788e0b4aa7d2e1f76515a36cd03ca9431 +size 246784 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_f84b9ea570c171f5a5fd+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_f84b9ea570c171f5a5fd+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..c0451d469c1dc05e7bd1059018ecba6bb6940c27 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_f84b9ea570c171f5a5fd+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9946ceeb5aea33b029c3424322f5e293fb879db261fabaa2b9c8dfaf78fb445c +size 257393 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_f86c96f7ee62c6431f74+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_f86c96f7ee62c6431f74+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_f86c96f7ee62c6431f74+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_f86c96f7ee62c6431f74+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_f86c96f7ee62c6431f74+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_f86c96f7ee62c6431f74+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_f86c96f7ee62c6431f74+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..b574e35656a698bf7d5f420f08ea7e890ccd7713 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_f86c96f7ee62c6431f74+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f24f6952bed3b510f10199c95e6a92ba54c483288f157c5f127a25187b9b1049 +size 149919 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_f86c96f7ee62c6431f74+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_f86c96f7ee62c6431f74+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..703d1abe387edc31d8d23735a29941610a5f12f2 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_f86c96f7ee62c6431f74+431f5505/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5bcb50f983ac85525dce689516f4291a3517a149fb1ad0c9a34c3e01b2900c7d +size 594944 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_fa033bd8b80a68e8d242+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_fa033bd8b80a68e8d242+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_fa033bd8b80a68e8d242+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_fa033bd8b80a68e8d242+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_fa033bd8b80a68e8d242+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_fa033bd8b80a68e8d242+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_fa033bd8b80a68e8d242+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..ce333b89b28fcada4e40fe4ce10130524ac5df43 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_fa033bd8b80a68e8d242+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4cec17e088de58ce6e12e9483c087621a1155c2f815579ae5611cb99cb73d399 +size 813739 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_fa033bd8b80a68e8d242+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_fa033bd8b80a68e8d242+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..028113c7a0bb4d2fb5671a7b21970f35a7f47f3a --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_fa033bd8b80a68e8d242+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1314db59097a2de7fa54a25229196ad95509e675efbf3f7137eab77c034f4083 +size 3636224 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_fa033bd8b80a68e8d242+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_fa033bd8b80a68e8d242+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..4001eee5fc0c84577424c0603fabfecfad97dba2 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_fa033bd8b80a68e8d242+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a6282352666ad0cb1bd2551376ef5fac09d39aea2a03e8e683ca385fc3c0b79 +size 3767038 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_fb1bf0457a26706fb5d1+bfc62e4c/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_fb1bf0457a26706fb5d1+bfc62e4c/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..892fab43157a43dc2f6c0bc74da09f06fb265ea9 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_fb1bf0457a26706fb5d1+bfc62e4c/compile_flags.json @@ -0,0 +1 @@ +"--enable-saturate-infinity --enable-mixed-precision-accumulation --model-type transformer -O1 --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2' --internal-enable-dge-levels vector_dynamic_offsets --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_fb1bf0457a26706fb5d1+bfc62e4c/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_fb1bf0457a26706fb5d1+bfc62e4c/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_fb1bf0457a26706fb5d1+bfc62e4c/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_fb1bf0457a26706fb5d1+bfc62e4c/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..653d27cfc0e0b84ad5c71b7c47329aec4868a5a2 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_fb1bf0457a26706fb5d1+bfc62e4c/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62a84d231abb65453464149c97003c0af604eba87418c8ed68da627ce0415ef3 +size 189414 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_fb1bf0457a26706fb5d1+bfc62e4c/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_fb1bf0457a26706fb5d1+bfc62e4c/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..9e76440971f440801e89490095754eb7915242b5 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_fb1bf0457a26706fb5d1+bfc62e4c/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8021eb3e7858b2b427370867456aea7a96ead2ea666c9894dbc690e808d04721 +size 267264 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_fb1bf0457a26706fb5d1+bfc62e4c/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_fb1bf0457a26706fb5d1+bfc62e4c/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..1c05ec6cc0f07b6a1c9a20ab8bb36ea2db289d98 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_fb1bf0457a26706fb5d1+bfc62e4c/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:049e61fefb7dee9f1f38dd3d0ea2aebe90ae4b942bfafed82954d615b2c06030 +size 278562 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_fc12e674baf93fa7e151+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_fc12e674baf93fa7e151+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_fc12e674baf93fa7e151+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_fc12e674baf93fa7e151+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_fc12e674baf93fa7e151+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_fc12e674baf93fa7e151+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_fc12e674baf93fa7e151+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..ba2397adeca1fdece799dda609c71ecdee49de2c --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_fc12e674baf93fa7e151+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9abcce14f0414e1c45e61f4757dd660466ac74ca33bd87b99b4eb958bdacc981 +size 737474 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_fc12e674baf93fa7e151+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_fc12e674baf93fa7e151+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..e6cdee840c74ed6bc79dd6b322afc2432b68d3b0 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_fc12e674baf93fa7e151+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1586289fd962cdcec0d689bb99a71f6dd2c6c7afd8535e64f5eff7ac20443d47 +size 31724544 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_fc2afbd3a188bc24f65d+bfc62e4c/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_fc2afbd3a188bc24f65d+bfc62e4c/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..892fab43157a43dc2f6c0bc74da09f06fb265ea9 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_fc2afbd3a188bc24f65d+bfc62e4c/compile_flags.json @@ -0,0 +1 @@ +"--enable-saturate-infinity --enable-mixed-precision-accumulation --model-type transformer -O1 --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2' --internal-enable-dge-levels vector_dynamic_offsets --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_fc2afbd3a188bc24f65d+bfc62e4c/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_fc2afbd3a188bc24f65d+bfc62e4c/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_fc2afbd3a188bc24f65d+bfc62e4c/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_fc2afbd3a188bc24f65d+bfc62e4c/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..e3c6a516b771d471457e3f6a1699f88b4085fd56 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_fc2afbd3a188bc24f65d+bfc62e4c/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:786287af28926930e2a0d805ad9723239719241353325cb92367adb8d755ee72 +size 221211 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_fc2afbd3a188bc24f65d+bfc62e4c/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_fc2afbd3a188bc24f65d+bfc62e4c/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..a350394d3f05e986ad33d2a96317d09bec02c0d1 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_fc2afbd3a188bc24f65d+bfc62e4c/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cdc0c2db2c1b9701dc89f05dc4a403441c5e812c22cfbc8e275c94078a43d056 +size 328704 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_fc2afbd3a188bc24f65d+bfc62e4c/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_fc2afbd3a188bc24f65d+bfc62e4c/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..0c3b7758fcddc046e9dc40be2163a6c498c5ac2a --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_fc2afbd3a188bc24f65d+bfc62e4c/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b70c5fb75426a49d0b2ecc1cc51df0b277a4ff76bfb86572e349a2816fb339bf +size 340002 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_fe2b7cb58538780206c1+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_fe2b7cb58538780206c1+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_fe2b7cb58538780206c1+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_fe2b7cb58538780206c1+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_fe2b7cb58538780206c1+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_fe2b7cb58538780206c1+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_fe2b7cb58538780206c1+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..205f5abe85b2c91eff69a7f26b62fa6c55c52b25 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_fe2b7cb58538780206c1+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c0ad82c036ff952e466d25b7dbdc6e161cc973bacf1b5d53307b6721234cf4b +size 641559 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_fe2b7cb58538780206c1+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_fe2b7cb58538780206c1+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..78e2ff1b7f03cf32526c9069d25d1d3335e63574 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_fe2b7cb58538780206c1+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8e0c6b1706b368362e6dd65bbfcb2c60adefd26d0f746c243d4ae2f54198d64 +size 10599424 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_fe2b7cb58538780206c1+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_fe2b7cb58538780206c1+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..c55f18e0560b131f1af27d42a93c335d51e851e6 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_fe2b7cb58538780206c1+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eba30b7dade65e54eff5ac251da714577a120d81992815ec050e202ba774513e +size 10719778 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_fe86d2c41acff0457a14+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_fe86d2c41acff0457a14+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_fe86d2c41acff0457a14+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_fe86d2c41acff0457a14+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_fe86d2c41acff0457a14+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_fe86d2c41acff0457a14+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_fe86d2c41acff0457a14+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..e3986c5d95dd9f8c2bf18641de79de51c6e54d8f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_fe86d2c41acff0457a14+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a395796e7fb4d2206523acf8855aeda9c769dcf30069f104221315aaf986b7c +size 135622 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_fe86d2c41acff0457a14+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_fe86d2c41acff0457a14+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..12148ecbf3c99455892ec9803d0fa50d99136c1a --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_fe86d2c41acff0457a14+431f5505/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:150c86c1e993eeeffe85d8e7cb0c93b9e35702b4b6f13a775bc857b2410af352 +size 3984384