task,metric,value,err,version anli_r1,acc,0.332,0.014899597242811485,0 anli_r2,acc,0.339,0.014976758771620344,0 anli_r3,acc,0.3358333333333333,0.013639261190932882,0 arc_challenge,acc,0.28668941979522183,0.013214986329274777,0 arc_challenge,acc_norm,0.3122866894197952,0.013542598541688065,0 arc_easy,acc,0.6123737373737373,0.009997307914447614,0 arc_easy,acc_norm,0.5669191919191919,0.010167478013701796,0 boolq,acc,0.5522935779816514,0.008697094687974059,1 cb,acc,0.42857142857142855,0.06672848092813058,1 cb,f1,0.255050505050505,,1 copa,acc,0.75,0.04351941398892446,0 hellaswag,acc,0.46395140410276836,0.004976796060456438,0 hellaswag,acc_norm,0.6121290579565823,0.004862690594815717,0 piqa,acc,0.7377584330794341,0.010262502565172449,0 piqa,acc_norm,0.7383025027203483,0.01025563077270823,0 rte,acc,0.5306859205776173,0.030039730592197812,0 sciq,acc,0.886,0.01005510343582333,0 sciq,acc_norm,0.863,0.01087884871433332,0 storycloze_2016,acc,0.7071084981293426,0.010523873293246304,0 winogrande,acc,0.5840568271507498,0.013852485356798266,0