| task,metric,value,err,version | |
| anli_r1,acc,0.318,0.014734079309311901,0 | |
| anli_r2,acc,0.365,0.015231776226264902,0 | |
| anli_r3,acc,0.3491666666666667,0.013767075395077252,0 | |
| arc_challenge,acc,0.25341296928327645,0.012710896778378606,0 | |
| arc_challenge,acc_norm,0.2815699658703072,0.013143376735009026,0 | |
| arc_easy,acc,0.5618686868686869,0.010180937100600071,0 | |
| arc_easy,acc_norm,0.5568181818181818,0.010193324837773495,0 | |
| boolq,acc,0.5801223241590214,0.00863204550478174,1 | |
| cb,acc,0.44642857142857145,0.06703189227942398,1 | |
| cb,f1,0.2316017316017316,,1 | |
| copa,acc,0.69,0.04648231987117316,0 | |
| hellaswag,acc,0.3505277833100976,0.00476160130325889,0 | |
| hellaswag,acc_norm,0.4260107548297152,0.004934846809827193,0 | |
| piqa,acc,0.6931447225244831,0.010760295070580368,0 | |
| piqa,acc_norm,0.6877040261153428,0.010812581599154424,0 | |
| rte,acc,0.4620938628158845,0.030009848912529113,0 | |
| sciq,acc,0.882,0.010206869264381795,0 | |
| sciq,acc_norm,0.875,0.010463483381956722,0 | |
| storycloze_2016,acc,0.6226616782469268,0.011209099452196189,0 | |
| winogrande,acc,0.4964483030781373,0.014052131146915864,0 | |