| ,layer_id,layer_type,param_type,shape,nparam,nnz,sparsity | |
| 0,bert.embeddings.word_embeddings,Embedding,weight,"[30522, 768]",23440896,23440896,0.0 | |
| 1,bert.embeddings.position_embeddings,Embedding,weight,"[512, 768]",393216,393216,0.0 | |
| 2,bert.embeddings.token_type_embeddings,Embedding,weight,"[2, 768]",1536,1536,0.0 | |
| 3,bert.embeddings.LayerNorm,LayerNorm,weight,[768],768,768,0.0 | |
| 4,bert.embeddings.LayerNorm,LayerNorm,bias,[768],768,768,0.0 | |
| 5,bert.encoder.layer.0.attention.self.query,Linear,weight,"[320, 768]",245760,135168,0.44999998807907104 | |
| 6,bert.encoder.layer.0.attention.self.query,Linear,bias,[320],320,256,0.19999998807907104 | |
| 7,bert.encoder.layer.0.attention.self.key,Linear,weight,"[320, 768]",245760,149504,0.3916666507720947 | |
| 8,bert.encoder.layer.0.attention.self.key,Linear,bias,[320],320,256,0.19999998807907104 | |
| 9,bert.encoder.layer.0.attention.self.value,Linear,weight,"[320, 768]",245760,173056,0.2958332896232605 | |
| 10,bert.encoder.layer.0.attention.self.value,Linear,bias,[320],320,256,0.19999998807907104 | |
| 11,bert.encoder.layer.0.attention.output.dense,Linear,weight,"[768, 320]",245760,181248,0.26249998807907104 | |
| 12,bert.encoder.layer.0.attention.output.dense,Linear,bias,[768],768,768,0.0 | |
| 13,bert.encoder.layer.0.attention.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0 | |
| 14,bert.encoder.layer.0.attention.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0 | |
| 15,bert.encoder.layer.0.intermediate.dense,Linear,weight,"[185, 768]",142080,142080,0.0 | |
| 16,bert.encoder.layer.0.intermediate.dense,Linear,bias,[185],185,185,0.0 | |
| 17,bert.encoder.layer.0.output.dense,Linear,weight,"[768, 185]",142080,142080,0.0 | |
| 18,bert.encoder.layer.0.output.dense,Linear,bias,[768],768,768,0.0 | |
| 19,bert.encoder.layer.0.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0 | |
| 20,bert.encoder.layer.0.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0 | |
| 21,bert.encoder.layer.1.attention.self.query,Linear,weight,"[320, 768]",245760,175104,0.28749996423721313 | |
| 22,bert.encoder.layer.1.attention.self.query,Linear,bias,[320],320,288,0.09999996423721313 | |
| 23,bert.encoder.layer.1.attention.self.key,Linear,weight,"[320, 768]",245760,177152,0.27916663885116577 | |
| 24,bert.encoder.layer.1.attention.self.key,Linear,bias,[320],320,288,0.09999996423721313 | |
| 25,bert.encoder.layer.1.attention.self.value,Linear,weight,"[320, 768]",245760,166912,0.32083332538604736 | |
| 26,bert.encoder.layer.1.attention.self.value,Linear,bias,[320],320,288,0.09999996423721313 | |
| 27,bert.encoder.layer.1.attention.output.dense,Linear,weight,"[768, 320]",245760,167936,0.3166666030883789 | |
| 28,bert.encoder.layer.1.attention.output.dense,Linear,bias,[768],768,768,0.0 | |
| 29,bert.encoder.layer.1.attention.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0 | |
| 30,bert.encoder.layer.1.attention.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0 | |
| 31,bert.encoder.layer.1.intermediate.dense,Linear,weight,"[315, 768]",241920,241920,0.0 | |
| 32,bert.encoder.layer.1.intermediate.dense,Linear,bias,[315],315,315,0.0 | |
| 33,bert.encoder.layer.1.output.dense,Linear,weight,"[768, 315]",241920,241920,0.0 | |
| 34,bert.encoder.layer.1.output.dense,Linear,bias,[768],768,768,0.0 | |
| 35,bert.encoder.layer.1.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0 | |
| 36,bert.encoder.layer.1.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0 | |
| 37,bert.encoder.layer.2.attention.self.query,Linear,weight,"[576, 768]",442368,285696,0.3541666865348816 | |
| 38,bert.encoder.layer.2.attention.self.query,Linear,bias,[576],576,480,0.1666666865348816 | |
| 39,bert.encoder.layer.2.attention.self.key,Linear,weight,"[576, 768]",442368,297984,0.3263888955116272 | |
| 40,bert.encoder.layer.2.attention.self.key,Linear,bias,[576],576,480,0.1666666865348816 | |
| 41,bert.encoder.layer.2.attention.self.value,Linear,weight,"[576, 768]",442368,226304,0.4884259104728699 | |
| 42,bert.encoder.layer.2.attention.self.value,Linear,bias,[576],576,384,0.3333333134651184 | |
| 43,bert.encoder.layer.2.attention.output.dense,Linear,weight,"[768, 576]",442368,237568,0.4629629850387573 | |
| 44,bert.encoder.layer.2.attention.output.dense,Linear,bias,[768],768,768,0.0 | |
| 45,bert.encoder.layer.2.attention.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0 | |
| 46,bert.encoder.layer.2.attention.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0 | |
| 47,bert.encoder.layer.2.intermediate.dense,Linear,weight,"[339, 768]",260352,260352,0.0 | |
| 48,bert.encoder.layer.2.intermediate.dense,Linear,bias,[339],339,339,0.0 | |
| 49,bert.encoder.layer.2.output.dense,Linear,weight,"[768, 339]",260352,260352,0.0 | |
| 50,bert.encoder.layer.2.output.dense,Linear,bias,[768],768,768,0.0 | |
| 51,bert.encoder.layer.2.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0 | |
| 52,bert.encoder.layer.2.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0 | |
| 53,bert.encoder.layer.3.attention.self.query,Linear,weight,"[576, 768]",442368,277504,0.37268519401550293 | |
| 54,bert.encoder.layer.3.attention.self.query,Linear,bias,[576],576,512,0.1111111044883728 | |
| 55,bert.encoder.layer.3.attention.self.key,Linear,weight,"[576, 768]",442368,303104,0.31481480598449707 | |
| 56,bert.encoder.layer.3.attention.self.key,Linear,bias,[576],576,512,0.1111111044883728 | |
| 57,bert.encoder.layer.3.attention.self.value,Linear,weight,"[576, 768]",442368,297984,0.3263888955116272 | |
| 58,bert.encoder.layer.3.attention.self.value,Linear,bias,[576],576,512,0.1111111044883728 | |
| 59,bert.encoder.layer.3.attention.output.dense,Linear,weight,"[768, 576]",442368,308224,0.30324071645736694 | |
| 60,bert.encoder.layer.3.attention.output.dense,Linear,bias,[768],768,768,0.0 | |
| 61,bert.encoder.layer.3.attention.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0 | |
| 62,bert.encoder.layer.3.attention.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0 | |
| 63,bert.encoder.layer.3.intermediate.dense,Linear,weight,"[368, 768]",282624,282624,0.0 | |
| 64,bert.encoder.layer.3.intermediate.dense,Linear,bias,[368],368,368,0.0 | |
| 65,bert.encoder.layer.3.output.dense,Linear,weight,"[768, 368]",282624,282624,0.0 | |
| 66,bert.encoder.layer.3.output.dense,Linear,bias,[768],768,768,0.0 | |
| 67,bert.encoder.layer.3.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0 | |
| 68,bert.encoder.layer.3.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0 | |
| 69,bert.encoder.layer.4.attention.self.query,Linear,weight,"[576, 768]",442368,291840,0.3402777910232544 | |
| 70,bert.encoder.layer.4.attention.self.query,Linear,bias,[576],576,544,0.055555522441864014 | |
| 71,bert.encoder.layer.4.attention.self.key,Linear,weight,"[576, 768]",442368,310272,0.2986111044883728 | |
| 72,bert.encoder.layer.4.attention.self.key,Linear,bias,[576],576,544,0.055555522441864014 | |
| 73,bert.encoder.layer.4.attention.self.value,Linear,weight,"[576, 768]",442368,272384,0.38425928354263306 | |
| 74,bert.encoder.layer.4.attention.self.value,Linear,bias,[576],576,480,0.1666666865348816 | |
| 75,bert.encoder.layer.4.attention.output.dense,Linear,weight,"[768, 576]",442368,263168,0.40509259700775146 | |
| 76,bert.encoder.layer.4.attention.output.dense,Linear,bias,[768],768,768,0.0 | |
| 77,bert.encoder.layer.4.attention.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0 | |
| 78,bert.encoder.layer.4.attention.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0 | |
| 79,bert.encoder.layer.4.intermediate.dense,Linear,weight,"[386, 768]",296448,296448,0.0 | |
| 80,bert.encoder.layer.4.intermediate.dense,Linear,bias,[386],386,386,0.0 | |
| 81,bert.encoder.layer.4.output.dense,Linear,weight,"[768, 386]",296448,296448,0.0 | |
| 82,bert.encoder.layer.4.output.dense,Linear,bias,[768],768,768,0.0 | |
| 83,bert.encoder.layer.4.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0 | |
| 84,bert.encoder.layer.4.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0 | |
| 85,bert.encoder.layer.5.attention.self.query,Linear,weight,"[384, 768]",294912,171008,0.4201388955116272 | |
| 86,bert.encoder.layer.5.attention.self.query,Linear,bias,[384],384,352,0.08333331346511841 | |
| 87,bert.encoder.layer.5.attention.self.key,Linear,weight,"[384, 768]",294912,205824,0.3020833134651184 | |
| 88,bert.encoder.layer.5.attention.self.key,Linear,bias,[384],384,352,0.08333331346511841 | |
| 89,bert.encoder.layer.5.attention.self.value,Linear,weight,"[384, 768]",294912,217088,0.2638888955116272 | |
| 90,bert.encoder.layer.5.attention.self.value,Linear,bias,[384],384,384,0.0 | |
| 91,bert.encoder.layer.5.attention.output.dense,Linear,weight,"[768, 384]",294912,223232,0.243055522441864 | |
| 92,bert.encoder.layer.5.attention.output.dense,Linear,bias,[768],768,768,0.0 | |
| 93,bert.encoder.layer.5.attention.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0 | |
| 94,bert.encoder.layer.5.attention.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0 | |
| 95,bert.encoder.layer.5.intermediate.dense,Linear,weight,"[336, 768]",258048,258048,0.0 | |
| 96,bert.encoder.layer.5.intermediate.dense,Linear,bias,[336],336,336,0.0 | |
| 97,bert.encoder.layer.5.output.dense,Linear,weight,"[768, 336]",258048,258048,0.0 | |
| 98,bert.encoder.layer.5.output.dense,Linear,bias,[768],768,768,0.0 | |
| 99,bert.encoder.layer.5.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0 | |
| 100,bert.encoder.layer.5.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0 | |
| 101,bert.encoder.layer.6.attention.self.query,Linear,weight,"[448, 768]",344064,192512,0.4404761791229248 | |
| 102,bert.encoder.layer.6.attention.self.query,Linear,bias,[448],448,416,0.07142853736877441 | |
| 103,bert.encoder.layer.6.attention.self.key,Linear,weight,"[448, 768]",344064,224256,0.3482142686843872 | |
| 104,bert.encoder.layer.6.attention.self.key,Linear,bias,[448],448,416,0.07142853736877441 | |
| 105,bert.encoder.layer.6.attention.self.value,Linear,weight,"[448, 768]",344064,209920,0.3898809552192688 | |
| 106,bert.encoder.layer.6.attention.self.value,Linear,bias,[448],448,352,0.21428567171096802 | |
| 107,bert.encoder.layer.6.attention.output.dense,Linear,weight,"[768, 448]",344064,199680,0.4196428656578064 | |
| 108,bert.encoder.layer.6.attention.output.dense,Linear,bias,[768],768,768,0.0 | |
| 109,bert.encoder.layer.6.attention.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0 | |
| 110,bert.encoder.layer.6.attention.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0 | |
| 111,bert.encoder.layer.6.intermediate.dense,Linear,weight,"[280, 768]",215040,215040,0.0 | |
| 112,bert.encoder.layer.6.intermediate.dense,Linear,bias,[280],280,280,0.0 | |
| 113,bert.encoder.layer.6.output.dense,Linear,weight,"[768, 280]",215040,215040,0.0 | |
| 114,bert.encoder.layer.6.output.dense,Linear,bias,[768],768,768,0.0 | |
| 115,bert.encoder.layer.6.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0 | |
| 116,bert.encoder.layer.6.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0 | |
| 117,bert.encoder.layer.7.attention.self.query,Linear,weight,"[448, 768]",344064,201728,0.413690447807312 | |
| 118,bert.encoder.layer.7.attention.self.query,Linear,bias,[448],448,416,0.07142853736877441 | |
| 119,bert.encoder.layer.7.attention.self.key,Linear,weight,"[448, 768]",344064,237568,0.3095238208770752 | |
| 120,bert.encoder.layer.7.attention.self.key,Linear,bias,[448],448,416,0.07142853736877441 | |
| 121,bert.encoder.layer.7.attention.self.value,Linear,weight,"[448, 768]",344064,218112,0.3660714030265808 | |
| 122,bert.encoder.layer.7.attention.self.value,Linear,bias,[448],448,352,0.21428567171096802 | |
| 123,bert.encoder.layer.7.attention.output.dense,Linear,weight,"[768, 448]",344064,202752,0.4107142686843872 | |
| 124,bert.encoder.layer.7.attention.output.dense,Linear,bias,[768],768,768,0.0 | |
| 125,bert.encoder.layer.7.attention.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0 | |
| 126,bert.encoder.layer.7.attention.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0 | |
| 127,bert.encoder.layer.7.intermediate.dense,Linear,weight,"[211, 768]",162048,162048,0.0 | |
| 128,bert.encoder.layer.7.intermediate.dense,Linear,bias,[211],211,211,0.0 | |
| 129,bert.encoder.layer.7.output.dense,Linear,weight,"[768, 211]",162048,162048,0.0 | |
| 130,bert.encoder.layer.7.output.dense,Linear,bias,[768],768,768,0.0 | |
| 131,bert.encoder.layer.7.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0 | |
| 132,bert.encoder.layer.7.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0 | |
| 133,bert.encoder.layer.8.attention.self.query,Linear,weight,"[448, 768]",344064,186368,0.4583333134651184 | |
| 134,bert.encoder.layer.8.attention.self.query,Linear,bias,[448],448,416,0.07142853736877441 | |
| 135,bert.encoder.layer.8.attention.self.key,Linear,weight,"[448, 768]",344064,197632,0.425595223903656 | |
| 136,bert.encoder.layer.8.attention.self.key,Linear,bias,[448],448,416,0.07142853736877441 | |
| 137,bert.encoder.layer.8.attention.self.value,Linear,weight,"[448, 768]",344064,154624,0.550595223903656 | |
| 138,bert.encoder.layer.8.attention.self.value,Linear,bias,[448],448,288,0.3571428060531616 | |
| 139,bert.encoder.layer.8.attention.output.dense,Linear,weight,"[768, 448]",344064,148480,0.5684523582458496 | |
| 140,bert.encoder.layer.8.attention.output.dense,Linear,bias,[768],768,768,0.0 | |
| 141,bert.encoder.layer.8.attention.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0 | |
| 142,bert.encoder.layer.8.attention.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0 | |
| 143,bert.encoder.layer.8.intermediate.dense,Linear,weight,"[108, 768]",82944,82944,0.0 | |
| 144,bert.encoder.layer.8.intermediate.dense,Linear,bias,[108],108,108,0.0 | |
| 145,bert.encoder.layer.8.output.dense,Linear,weight,"[768, 108]",82944,82944,0.0 | |
| 146,bert.encoder.layer.8.output.dense,Linear,bias,[768],768,768,0.0 | |
| 147,bert.encoder.layer.8.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0 | |
| 148,bert.encoder.layer.8.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0 | |
| 149,bert.encoder.layer.9.attention.self.query,Linear,weight,"[320, 768]",245760,144384,0.41249996423721313 | |
| 150,bert.encoder.layer.9.attention.self.query,Linear,bias,[320],320,288,0.09999996423721313 | |
| 151,bert.encoder.layer.9.attention.self.key,Linear,weight,"[320, 768]",245760,155648,0.36666661500930786 | |
| 152,bert.encoder.layer.9.attention.self.key,Linear,bias,[320],320,288,0.09999996423721313 | |
| 153,bert.encoder.layer.9.attention.self.value,Linear,weight,"[320, 768]",245760,63488,0.7416666746139526 | |
| 154,bert.encoder.layer.9.attention.self.value,Linear,bias,[320],320,160,0.5 | |
| 155,bert.encoder.layer.9.attention.output.dense,Linear,weight,"[768, 320]",245760,65536,0.7333333492279053 | |
| 156,bert.encoder.layer.9.attention.output.dense,Linear,bias,[768],768,704,0.08333331346511841 | |
| 157,bert.encoder.layer.9.attention.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0 | |
| 158,bert.encoder.layer.9.attention.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0 | |
| 159,bert.encoder.layer.9.intermediate.dense,Linear,weight,"[53, 768]",40704,40704,5.960464477539063e-08 | |
| 160,bert.encoder.layer.9.intermediate.dense,Linear,bias,[53],53,53,0.0 | |
| 161,bert.encoder.layer.9.output.dense,Linear,weight,"[768, 53]",40704,40704,5.960464477539063e-08 | |
| 162,bert.encoder.layer.9.output.dense,Linear,bias,[768],768,768,0.0 | |
| 163,bert.encoder.layer.9.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0 | |
| 164,bert.encoder.layer.9.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0 | |
| 165,bert.encoder.layer.10.attention.self.query,Linear,weight,"[384, 768]",294912,158720,0.461805522441864 | |
| 166,bert.encoder.layer.10.attention.self.query,Linear,bias,[384],384,320,0.16666662693023682 | |
| 167,bert.encoder.layer.10.attention.self.key,Linear,weight,"[384, 768]",294912,158720,0.461805522441864 | |
| 168,bert.encoder.layer.10.attention.self.key,Linear,bias,[384],384,320,0.16666662693023682 | |
| 169,bert.encoder.layer.10.attention.self.value,Linear,weight,"[384, 768]",294912,77824,0.7361111044883728 | |
| 170,bert.encoder.layer.10.attention.self.value,Linear,bias,[384],384,192,0.5 | |
| 171,bert.encoder.layer.10.attention.output.dense,Linear,weight,"[768, 384]",294912,78848,0.7326388955116272 | |
| 172,bert.encoder.layer.10.attention.output.dense,Linear,bias,[768],768,736,0.041666626930236816 | |
| 173,bert.encoder.layer.10.attention.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0 | |
| 174,bert.encoder.layer.10.attention.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0 | |
| 175,bert.encoder.layer.10.intermediate.dense,Linear,weight,"[86, 768]",66048,66048,0.0 | |
| 176,bert.encoder.layer.10.intermediate.dense,Linear,bias,[86],86,86,0.0 | |
| 177,bert.encoder.layer.10.output.dense,Linear,weight,"[768, 86]",66048,66048,0.0 | |
| 178,bert.encoder.layer.10.output.dense,Linear,bias,[768],768,768,0.0 | |
| 179,bert.encoder.layer.10.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0 | |
| 180,bert.encoder.layer.10.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0 | |
| 181,bert.encoder.layer.11.attention.self.query,Linear,weight,"[384, 768]",294912,107520,0.6354166269302368 | |
| 182,bert.encoder.layer.11.attention.self.query,Linear,bias,[384],384,256,0.3333333134651184 | |
| 183,bert.encoder.layer.11.attention.self.key,Linear,weight,"[384, 768]",294912,118784,0.5972222089767456 | |
| 184,bert.encoder.layer.11.attention.self.key,Linear,bias,[384],384,256,0.3333333134651184 | |
| 185,bert.encoder.layer.11.attention.self.value,Linear,weight,"[384, 768]",294912,62464,0.7881944179534912 | |
| 186,bert.encoder.layer.11.attention.self.value,Linear,bias,[384],384,192,0.5 | |
| 187,bert.encoder.layer.11.attention.output.dense,Linear,weight,"[768, 384]",294912,54272,0.8159722089767456 | |
| 188,bert.encoder.layer.11.attention.output.dense,Linear,bias,[768],768,672,0.125 | |
| 189,bert.encoder.layer.11.attention.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0 | |
| 190,bert.encoder.layer.11.attention.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0 | |
| 191,bert.encoder.layer.11.intermediate.dense,Linear,weight,"[105, 768]",80640,80640,0.0 | |
| 192,bert.encoder.layer.11.intermediate.dense,Linear,bias,[105],105,105,0.0 | |
| 193,bert.encoder.layer.11.output.dense,Linear,weight,"[768, 105]",80640,80640,0.0 | |
| 194,bert.encoder.layer.11.output.dense,Linear,bias,[768],768,768,0.0 | |
| 195,bert.encoder.layer.11.output.LayerNorm,LayerNorm,weight,[768],768,768,0.0 | |
| 196,bert.encoder.layer.11.output.LayerNorm,LayerNorm,bias,[768],768,768,0.0 | |
| 197,qa_outputs,Linear,weight,"[2, 768]",1536,1536,0.0 | |
| 198,qa_outputs,Linear,bias,[2],2,2,0.0 | |