| task,metric,value,err,version | |
| anli_r1,acc,0.317,0.014721675438880227,0 | |
| anli_r2,acc,0.346,0.015050266127564448,0 | |
| anli_r3,acc,0.3283333333333333,0.013562032919529012,0 | |
| arc_challenge,acc,0.27047781569965873,0.012980954547659554,0 | |
| arc_challenge,acc_norm,0.2883959044368601,0.013238394422428175,0 | |
| arc_easy,acc,0.5934343434343434,0.01007905641922352,0 | |
| arc_easy,acc_norm,0.5715488215488216,0.01015419573399097,0 | |
| boolq,acc,0.5773700305810398,0.008639722698719019,1 | |
| cb,acc,0.35714285714285715,0.0646095738380922,1 | |
| cb,f1,0.2557471264367816,,1 | |
| copa,acc,0.73,0.044619604333847394,0 | |
| hellaswag,acc,0.45030870344552876,0.004965078477435579,0 | |
| hellaswag,acc_norm,0.599183429595698,0.004890623693243619,0 | |
| piqa,acc,0.7524483133841132,0.010069703966857106,0 | |
| piqa,acc_norm,0.750272034820457,0.010099232969867469,0 | |
| rte,acc,0.5342960288808665,0.030025579819366426,0 | |
| sciq,acc,0.881,0.010244215145336664,0 | |
| sciq,acc_norm,0.87,0.010640169792499349,0 | |
| storycloze_2016,acc,0.6996258685195083,0.010600915927985021,0 | |
| winogrande,acc,0.5611681136543015,0.013946933444507032,0 | |