|
task,metric,value,err,version
|
|
anli_r1,acc,0.332,0.014899597242811485,0
|
|
anli_r2,acc,0.339,0.014976758771620344,0
|
|
anli_r3,acc,0.3358333333333333,0.013639261190932882,0
|
|
arc_challenge,acc,0.28668941979522183,0.013214986329274777,0
|
|
arc_challenge,acc_norm,0.3122866894197952,0.013542598541688065,0
|
|
arc_easy,acc,0.6123737373737373,0.009997307914447614,0
|
|
arc_easy,acc_norm,0.5669191919191919,0.010167478013701796,0
|
|
boolq,acc,0.5522935779816514,0.008697094687974059,1
|
|
cb,acc,0.42857142857142855,0.06672848092813058,1
|
|
cb,f1,0.255050505050505,,1
|
|
copa,acc,0.75,0.04351941398892446,0
|
|
hellaswag,acc,0.46395140410276836,0.004976796060456438,0
|
|
hellaswag,acc_norm,0.6121290579565823,0.004862690594815717,0
|
|
piqa,acc,0.7377584330794341,0.010262502565172449,0
|
|
piqa,acc_norm,0.7383025027203483,0.01025563077270823,0
|
|
rte,acc,0.5306859205776173,0.030039730592197812,0
|
|
sciq,acc,0.886,0.01005510343582333,0
|
|
sciq,acc_norm,0.863,0.01087884871433332,0
|
|
storycloze_2016,acc,0.7071084981293426,0.010523873293246304,0
|
|
winogrande,acc,0.5840568271507498,0.013852485356798266,0
|
|
|