craffel's picture
craffel HF Staff
Upload mistralai-Mistral-7B-Instruct-v0.3/metrics.eval.jsonl with huggingface_hub
73a013d verified
{"created_at": "2025-08-23T11:19:48.422546", "global_step": 2000, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.19965870307167236, "acc_stderr,none": 0.011681625756888673, "acc_norm,none": 0.24146757679180889, "acc_norm_stderr,none": 0.01250656483973943}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.4225589225589226, "acc_stderr,none": 0.010135978222981071, "acc_norm,none": 0.38930976430976433, "acc_norm_stderr,none": 0.01000521278287814}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.22555555555555556, "acc_stderr,none": 0.013939334910458118, "acc_norm,none": 0.22555555555555556, "acc_norm_stderr,none": 0.013939334910458118}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.2688888888888889, "acc_stderr,none": 0.01478761974756761, "acc_norm,none": 0.2688888888888889, "acc_norm_stderr,none": 0.01478761974756761}, "belebele_pes_Arab": {"alias": "belebele_pes_Arab", "acc,none": 0.2811111111111111, "acc_stderr,none": 0.014993051943690201, "acc_norm,none": 0.2811111111111111, "acc_norm_stderr,none": 0.014993051943690201}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.26555555555555554, "acc_stderr,none": 0.014729137787585108, "acc_norm,none": 0.26555555555555554, "acc_norm_stderr,none": 0.014729137787585108}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.24888888888888888, "acc_stderr,none": 0.014420323451642524, "acc_norm,none": 0.24888888888888888, "acc_norm_stderr,none": 0.014420323451642524}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.27833100975901215, "acc_stderr,none": 0.004472613148508901, "acc_norm,none": 0.28908583947420835, "acc_norm_stderr,none": 0.004524113671259686}, "include_base_44_chinese": {"acc,none": 0.25688073394495414, "acc_stderr,none": 0.018757678755115702, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.049938324531150324}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.25287356321839083, "acc_stderr,none": 0.04687049503854671}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.323943661971831, "acc_stderr,none": 0.05593416612923642}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.323943661971831, "acc_stderr,none": 0.05593416612923642}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.2413793103448276, "acc_stderr,none": 0.04614377668264891}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.051995626882956975}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.19718309859154928, "acc_stderr,none": 0.04755476905953276}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.1875, "acc_stderr,none": 0.10077822185373188}, "include_base_44_italian": {"acc,none": 0.25547445255474455, "acc_stderr,none": 0.018723152557077877, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.2857142857142857, "acc_stderr,none": 0.07747516350666293}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.25149700598802394, "acc_stderr,none": 0.03367511880168703}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.3333333333333333, "acc_stderr,none": 0.14213381090374033}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.25161290322580643, "acc_stderr,none": 0.034967874881680024}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.25149700598802394, "acc_stderr,none": 0.03367511880168704}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.25, "acc_stderr,none": 0.1305582419667734}, "include_base_44_turkish": {"acc,none": 0.29014598540145986, "acc_stderr,none": 0.019433171425292186, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.3072289156626506, "acc_stderr,none": 0.03591566797824664}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.26506024096385544, "acc_stderr,none": 0.03436024037944966}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.28313253012048195, "acc_stderr,none": 0.03507295431370519}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.34, "acc_stderr,none": 0.06767268161329719}, "piqa": {"alias": "piqa", "acc,none": 0.5914036996735582, "acc_stderr,none": 0.01146924038724514, "acc_norm,none": 0.5778019586507073, "acc_norm_stderr,none": 0.011523728926777593}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.38473895582329315, "acc_stderr,none": 0.00975214930715253}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.3413654618473896, "acc_stderr,none": 0.00950428807888022}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.3329317269076305, "acc_stderr,none": 0.009446051001358225}}
{"created_at": "2025-08-23T15:46:12.375275", "global_step": 4000, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.2090443686006826, "acc_stderr,none": 0.011882746987406448, "acc_norm,none": 0.2551194539249147, "acc_norm_stderr,none": 0.012739038695202098}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.4877946127946128, "acc_stderr,none": 0.010256726235129014, "acc_norm,none": 0.4225589225589226, "acc_norm_stderr,none": 0.010135978222981071}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.22555555555555556, "acc_stderr,none": 0.013939334910458118, "acc_norm,none": 0.22555555555555556, "acc_norm_stderr,none": 0.013939334910458118}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.22333333333333333, "acc_stderr,none": 0.013890384297198714, "acc_norm,none": 0.22333333333333333, "acc_norm_stderr,none": 0.013890384297198714}, "belebele_pes_Arab": {"alias": "belebele_pes_Arab", "acc,none": 0.24888888888888888, "acc_stderr,none": 0.01442032345164253, "acc_norm,none": 0.24888888888888888, "acc_norm_stderr,none": 0.01442032345164253}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.2577777777777778, "acc_stderr,none": 0.014588474089651676, "acc_norm,none": 0.2577777777777778, "acc_norm_stderr,none": 0.014588474089651676}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.25333333333333335, "acc_stderr,none": 0.014505399844356818, "acc_norm,none": 0.25333333333333335, "acc_norm_stderr,none": 0.014505399844356818}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.2921728739294961, "acc_stderr,none": 0.0045383194641119605, "acc_norm,none": 0.32224656442939653, "acc_norm_stderr,none": 0.004663817291468728}, "include_base_44_chinese": {"acc,none": 0.24036697247706423, "acc_stderr,none": 0.018392287804737003, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.049938324531150324}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.2413793103448276, "acc_stderr,none": 0.046143776682648914}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.29577464788732394, "acc_stderr,none": 0.054549061214189}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295697}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.25287356321839083, "acc_stderr,none": 0.04687049503854671}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.04993832453115031}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.18309859154929578, "acc_stderr,none": 0.04622514734921428}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.25, "acc_stderr,none": 0.11180339887498948}, "include_base_44_italian": {"acc,none": 0.24087591240875914, "acc_stderr,none": 0.018261422782795318, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.14285714285714285, "acc_stderr,none": 0.06001200360120039}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.281437125748503, "acc_stderr,none": 0.03490350467428358}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.16666666666666666, "acc_stderr,none": 0.11236664374387367}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.23870967741935484, "acc_stderr,none": 0.03435182440245767}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.2155688622754491, "acc_stderr,none": 0.031916582528798025}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.4166666666666667, "acc_stderr,none": 0.1486470975026408}, "include_base_44_turkish": {"acc,none": 0.2791970802919708, "acc_stderr,none": 0.01920619950288358, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.2891566265060241, "acc_stderr,none": 0.03529486801511115}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.27710843373493976, "acc_stderr,none": 0.03484331592680589}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.25301204819277107, "acc_stderr,none": 0.03384429155233136}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.34, "acc_stderr,none": 0.06767268161329719}, "piqa": {"alias": "piqa", "acc,none": 0.6115342763873776, "acc_stderr,none": 0.01137187759321025, "acc_norm,none": 0.6115342763873776, "acc_norm_stderr,none": 0.011371877593210256}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.43012048192771085, "acc_stderr,none": 0.009923711675408061}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.3477911646586345, "acc_stderr,none": 0.009546411769843142}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.3329317269076305, "acc_stderr,none": 0.009446051001358225}}
{"created_at": "2025-08-23T21:30:39.447668", "global_step": 6000, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.2226962457337884, "acc_stderr,none": 0.012158314774829931, "acc_norm,none": 0.25341296928327645, "acc_norm_stderr,none": 0.012710896778378602}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.5155723905723906, "acc_stderr,none": 0.010254806331961887, "acc_norm,none": 0.45286195286195285, "acc_norm_stderr,none": 0.010214087372211392}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.2222222222222222, "acc_stderr,none": 0.013865695626579392, "acc_norm,none": 0.2222222222222222, "acc_norm_stderr,none": 0.013865695626579392}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.27111111111111114, "acc_stderr,none": 0.014826016446581951, "acc_norm,none": 0.27111111111111114, "acc_norm_stderr,none": 0.014826016446581951}, "belebele_pes_Arab": {"alias": "belebele_pes_Arab", "acc,none": 0.27555555555555555, "acc_stderr,none": 0.014901407215241896, "acc_norm,none": 0.27555555555555555, "acc_norm_stderr,none": 0.014901407215241896}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.2577777777777778, "acc_stderr,none": 0.014588474089651667, "acc_norm,none": 0.2577777777777778, "acc_norm_stderr,none": 0.014588474089651667}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.28444444444444444, "acc_stderr,none": 0.015046675709606041, "acc_norm,none": 0.28444444444444444, "acc_norm_stderr,none": 0.015046675709606041}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.30342561242780325, "acc_stderr,none": 0.00458797862558248, "acc_norm,none": 0.3437562238597889, "acc_norm_stderr,none": 0.004739902411944551}, "include_base_44_chinese": {"acc,none": 0.25871559633027524, "acc_stderr,none": 0.018792356250531085, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.28169014084507044, "acc_stderr,none": 0.05376414171383254}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.2413793103448276, "acc_stderr,none": 0.04614377668264891}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.28169014084507044, "acc_stderr,none": 0.05376414171383253}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.30985915492957744, "acc_stderr,none": 0.055271596817883316}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.3103448275862069, "acc_stderr,none": 0.04988718850038745}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.18309859154929578, "acc_stderr,none": 0.04622514734921428}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.2112676056338028, "acc_stderr,none": 0.04879016359348947}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.1875, "acc_stderr,none": 0.10077822185373188}, "include_base_44_italian": {"acc,none": 0.24817518248175183, "acc_stderr,none": 0.018517972623312545, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.3142857142857143, "acc_stderr,none": 0.07961491954505552}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.23952095808383234, "acc_stderr,none": 0.03312541599851861}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.25, "acc_stderr,none": 0.1305582419667734}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.2709677419354839, "acc_stderr,none": 0.03581556513964114}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.2275449101796407, "acc_stderr,none": 0.03253989433108519}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.16666666666666666, "acc_stderr,none": 0.11236664374387367}, "include_base_44_turkish": {"acc,none": 0.2572992700729927, "acc_stderr,none": 0.0186504170682999, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.2891566265060241, "acc_stderr,none": 0.03529486801511114}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.22289156626506024, "acc_stderr,none": 0.03240004825594688}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.2289156626506024, "acc_stderr,none": 0.03270745277352477}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.36, "acc_stderr,none": 0.06857142857142856}, "piqa": {"alias": "piqa", "acc,none": 0.6235038084874864, "acc_stderr,none": 0.01130434155012673, "acc_norm,none": 0.6338411316648531, "acc_norm_stderr,none": 0.011240106070308457}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.45903614457831327, "acc_stderr,none": 0.009988381409296447}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.40441767068273093, "acc_stderr,none": 0.009837245625453005}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.3333333333333333, "acc_stderr,none": 0.009448900914617609}}
{"created_at": "2025-08-24T02:58:50.467532", "global_step": 8000, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.22866894197952217, "acc_stderr,none": 0.012272853582540802, "acc_norm,none": 0.26621160409556316, "acc_norm_stderr,none": 0.012915774781523203}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.5311447811447811, "acc_stderr,none": 0.010239860250021745, "acc_norm,none": 0.4650673400673401, "acc_norm_stderr,none": 0.01023471305272367}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.22111111111111112, "acc_stderr,none": 0.013840863699859547, "acc_norm,none": 0.22111111111111112, "acc_norm_stderr,none": 0.013840863699859547}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884507, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884507}, "belebele_pes_Arab": {"alias": "belebele_pes_Arab", "acc,none": 0.25666666666666665, "acc_stderr,none": 0.014567891342380032, "acc_norm,none": 0.25666666666666665, "acc_norm_stderr,none": 0.014567891342380032}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.24444444444444444, "acc_stderr,none": 0.014333209259632421, "acc_norm,none": 0.24444444444444444, "acc_norm_stderr,none": 0.014333209259632421}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.24444444444444444, "acc_stderr,none": 0.014333209259632423, "acc_norm,none": 0.24444444444444444, "acc_norm_stderr,none": 0.014333209259632423}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.31208922525393346, "acc_stderr,none": 0.004623990785158488, "acc_norm,none": 0.3529177454690301, "acc_norm_stderr,none": 0.004769007545082273}, "include_base_44_chinese": {"acc,none": 0.25871559633027524, "acc_stderr,none": 0.018680215781279947, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.2676056338028169, "acc_stderr,none": 0.05291406220869697}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.19540229885057472, "acc_stderr,none": 0.042756781109738705}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.28169014084507044, "acc_stderr,none": 0.05376414171383252}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.323943661971831, "acc_stderr,none": 0.05593416612923642}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.3448275862068966, "acc_stderr,none": 0.05125421389342353}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.19718309859154928, "acc_stderr,none": 0.04755476905953276}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.16901408450704225, "acc_stderr,none": 0.04479290819909663}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.375, "acc_stderr,none": 0.125}, "include_base_44_italian": {"acc,none": 0.2591240875912409, "acc_stderr,none": 0.018742906982648452, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.22857142857142856, "acc_stderr,none": 0.0720144043214405}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.2934131736526946, "acc_stderr,none": 0.035340161390504686}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.16666666666666666, "acc_stderr,none": 0.11236664374387367}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.27741935483870966, "acc_stderr,none": 0.03607872492487903}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.23353293413173654, "acc_stderr,none": 0.03283724952964298}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.08333333333333333, "acc_stderr,none": 0.08333333333333333}, "include_base_44_turkish": {"acc,none": 0.2591240875912409, "acc_stderr,none": 0.01873614062006723, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.24096385542168675, "acc_stderr,none": 0.03329394119073528}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.3072289156626506, "acc_stderr,none": 0.03591566797824664}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.23493975903614459, "acc_stderr,none": 0.03300533186128922}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.24, "acc_stderr,none": 0.061011875725893214}, "piqa": {"alias": "piqa", "acc,none": 0.6360174102285092, "acc_stderr,none": 0.011225875703487171, "acc_norm,none": 0.6175190424374319, "acc_norm_stderr,none": 0.011339019654272343}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.4634538152610442, "acc_stderr,none": 0.009995265580368909}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.40401606425702813, "acc_stderr,none": 0.009835674445385838}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.334136546184739, "acc_stderr,none": 0.009454577602463621}}
{"created_at": "2025-08-24T08:35:57.441383", "global_step": 10000, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.21928327645051193, "acc_stderr,none": 0.012091245787615734, "acc_norm,none": 0.2687713310580205, "acc_norm_stderr,none": 0.012955065963710682}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.5454545454545454, "acc_stderr,none": 0.010217299762709423, "acc_norm,none": 0.4810606060606061, "acc_norm_stderr,none": 0.010252420496894494}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.23444444444444446, "acc_stderr,none": 0.014129554968110771, "acc_norm,none": 0.23444444444444446, "acc_norm_stderr,none": 0.014129554968110771}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.2611111111111111, "acc_stderr,none": 0.014649486385262138, "acc_norm,none": 0.2611111111111111, "acc_norm_stderr,none": 0.014649486385262138}, "belebele_pes_Arab": {"alias": "belebele_pes_Arab", "acc,none": 0.2833333333333333, "acc_stderr,none": 0.015028913755536904, "acc_norm,none": 0.2833333333333333, "acc_norm_stderr,none": 0.015028913755536904}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.24555555555555555, "acc_stderr,none": 0.01435518086534295, "acc_norm,none": 0.24555555555555555, "acc_norm_stderr,none": 0.01435518086534295}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.2633333333333333, "acc_stderr,none": 0.014689553047342534, "acc_norm,none": 0.2633333333333333, "acc_norm_stderr,none": 0.014689553047342534}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.3165704043019319, "acc_stderr,none": 0.004641876299335619, "acc_norm,none": 0.37004580760804623, "acc_norm_stderr,none": 0.004818298991012553}, "include_base_44_chinese": {"acc,none": 0.25137614678899084, "acc_stderr,none": 0.018533981304490077, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.16901408450704225, "acc_stderr,none": 0.04479290819909662}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.3103448275862069, "acc_stderr,none": 0.049887188500387446}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.2112676056338028, "acc_stderr,none": 0.048790163593489484}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.04993832453115032}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.3448275862068966, "acc_stderr,none": 0.05125421389342353}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.2676056338028169, "acc_stderr,none": 0.05291406220869697}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.04993832453115032}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.125, "acc_stderr,none": 0.08539125638299665}, "include_base_44_italian": {"acc,none": 0.25364963503649635, "acc_stderr,none": 0.01863332796005509, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.2, "acc_stderr,none": 0.06859943405700356}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.24550898203592814, "acc_stderr,none": 0.03340463153945588}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.3333333333333333, "acc_stderr,none": 0.14213381090374033}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.25806451612903225, "acc_stderr,none": 0.03526036935484003}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.2754491017964072, "acc_stderr,none": 0.03467377173717454}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.08333333333333333, "acc_stderr,none": 0.08333333333333331}, "include_base_44_turkish": {"acc,none": 0.22445255474452555, "acc_stderr,none": 0.017828352028525073, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.23493975903614459, "acc_stderr,none": 0.03300533186128922}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.25903614457831325, "acc_stderr,none": 0.03410646614071856}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.20481927710843373, "acc_stderr,none": 0.03141784291663925}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.14, "acc_stderr,none": 0.04956957592256418}, "piqa": {"alias": "piqa", "acc,none": 0.6517954298150164, "acc_stderr,none": 0.0111152263432444, "acc_norm,none": 0.6392818280739935, "acc_norm_stderr,none": 0.01120406480908852}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.4570281124497992, "acc_stderr,none": 0.009984991084561275}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.42449799196787147, "acc_stderr,none": 0.009907151253284268}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.3389558232931727, "acc_stderr,none": 0.009487992732201524}}
{"created_at": "2025-08-24T19:53:19.933497", "global_step": 14000, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.24658703071672355, "acc_stderr,none": 0.012595726268790122, "acc_norm,none": 0.28668941979522183, "acc_norm_stderr,none": 0.013214986329274786}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.5660774410774411, "acc_stderr,none": 0.010169795770462108, "acc_norm,none": 0.5054713804713805, "acc_norm_stderr,none": 0.010259169228615044}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.22555555555555556, "acc_stderr,none": 0.013939334910458125, "acc_norm,none": 0.22555555555555556, "acc_norm_stderr,none": 0.013939334910458125}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.23555555555555555, "acc_stderr,none": 0.014152716079132616, "acc_norm,none": 0.23555555555555555, "acc_norm_stderr,none": 0.014152716079132616}, "belebele_pes_Arab": {"alias": "belebele_pes_Arab", "acc,none": 0.24222222222222223, "acc_stderr,none": 0.014288876375699814, "acc_norm,none": 0.24222222222222223, "acc_norm_stderr,none": 0.014288876375699814}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.24666666666666667, "acc_stderr,none": 0.01437702337540938, "acc_norm,none": 0.24666666666666667, "acc_norm_stderr,none": 0.01437702337540938}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.24555555555555555, "acc_stderr,none": 0.014355180865342964, "acc_norm,none": 0.24555555555555555, "acc_norm_stderr,none": 0.014355180865342964}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.32792272455686117, "acc_stderr,none": 0.004684970696902951, "acc_norm,none": 0.39026090420235016, "acc_norm_stderr,none": 0.00486811759848194}, "include_base_44_chinese": {"acc,none": 0.26605504587155965, "acc_stderr,none": 0.01898247919860598, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.18309859154929578, "acc_stderr,none": 0.046225147349214284}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.27586206896551724, "acc_stderr,none": 0.048195602891152295}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295697}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.323943661971831, "acc_stderr,none": 0.05593416612923642}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.26436781609195403, "acc_stderr,none": 0.04755382188278444}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.28169014084507044, "acc_stderr,none": 0.05376414171383252}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295698}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.375, "acc_stderr,none": 0.125}, "include_base_44_italian": {"acc,none": 0.23357664233576642, "acc_stderr,none": 0.017998307813348293, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.2571428571428571, "acc_stderr,none": 0.07495496847387484}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.17964071856287425, "acc_stderr,none": 0.029795480654829835}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.25, "acc_stderr,none": 0.1305582419667734}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.1935483870967742, "acc_stderr,none": 0.03183637233676304}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.31137724550898205, "acc_stderr,none": 0.03594016584565771}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.3333333333333333, "acc_stderr,none": 0.14213381090374033}, "include_base_44_turkish": {"acc,none": 0.24635036496350365, "acc_stderr,none": 0.01845085448158941, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.22289156626506024, "acc_stderr,none": 0.03240004825594688}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.27710843373493976, "acc_stderr,none": 0.03484331592680588}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.24096385542168675, "acc_stderr,none": 0.033293941190735296}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.24, "acc_stderr,none": 0.06101187572589322}, "piqa": {"alias": "piqa", "acc,none": 0.6523394994559304, "acc_stderr,none": 0.01111117366139373, "acc_norm,none": 0.6485310119695321, "acc_norm_stderr,none": 0.011139207691931191}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.4746987951807229, "acc_stderr,none": 0.010009233363499405}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.39518072289156625, "acc_stderr,none": 0.00979937189274674}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.3381526104417671, "acc_stderr,none": 0.009482500057981031}}
{"created_at": "2025-08-25T01:32:21.125390", "global_step": 16000, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.23976109215017063, "acc_stderr,none": 0.012476304127453954, "acc_norm,none": 0.2713310580204778, "acc_norm_stderr,none": 0.012993807727545792}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.569023569023569, "acc_stderr,none": 0.010161552863493751, "acc_norm,none": 0.49957912457912457, "acc_norm_stderr,none": 0.01025977988609442}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.2111111111111111, "acc_stderr,none": 0.013610798969328474, "acc_norm,none": 0.2111111111111111, "acc_norm_stderr,none": 0.013610798969328474}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.22111111111111112, "acc_stderr,none": 0.013840863699859537, "acc_norm,none": 0.22111111111111112, "acc_norm_stderr,none": 0.013840863699859537}, "belebele_pes_Arab": {"alias": "belebele_pes_Arab", "acc,none": 0.20777777777777778, "acc_stderr,none": 0.013531414972025832, "acc_norm,none": 0.20777777777777778, "acc_norm_stderr,none": 0.013531414972025832}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.21333333333333335, "acc_stderr,none": 0.013662962863011163, "acc_norm,none": 0.21333333333333335, "acc_norm_stderr,none": 0.013662962863011163}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.2088888888888889, "acc_stderr,none": 0.01355802923832221, "acc_norm,none": 0.2088888888888889, "acc_norm_stderr,none": 0.01355802923832221}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.33270264887472617, "acc_stderr,none": 0.004702181042215885, "acc_norm,none": 0.3986257717586138, "acc_norm_stderr,none": 0.004886147907627404}, "include_base_44_chinese": {"acc,none": 0.27155963302752295, "acc_stderr,none": 0.019112347221182876, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.04993832453115034}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.2413793103448276, "acc_stderr,none": 0.046143776682648914}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.30985915492957744, "acc_stderr,none": 0.05527159681788331}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.323943661971831, "acc_stderr,none": 0.05593416612923642}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.2413793103448276, "acc_stderr,none": 0.04614377668264891}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.323943661971831, "acc_stderr,none": 0.05593416612923643}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.23943661971830985, "acc_stderr,none": 0.05100514327793383}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.3125, "acc_stderr,none": 0.11967838846954226}, "include_base_44_italian": {"acc,none": 0.25547445255474455, "acc_stderr,none": 0.018663387039756785, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.22857142857142856, "acc_stderr,none": 0.0720144043214405}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.281437125748503, "acc_stderr,none": 0.03490350467428358}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.25, "acc_stderr,none": 0.1305582419667734}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.2903225806451613, "acc_stderr,none": 0.03657720706540911}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.20359281437125748, "acc_stderr,none": 0.031253219622833416}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.25, "acc_stderr,none": 0.1305582419667734}, "include_base_44_turkish": {"acc,none": 0.24817518248175183, "acc_stderr,none": 0.018498434611058014, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.26506024096385544, "acc_stderr,none": 0.03436024037944966}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.25903614457831325, "acc_stderr,none": 0.03410646614071856}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.21686746987951808, "acc_stderr,none": 0.03208284450356365}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.26, "acc_stderr,none": 0.06266203485560372}, "piqa": {"alias": "piqa", "acc,none": 0.6610446137105549, "acc_stderr,none": 0.01104414441971064, "acc_norm,none": 0.6534276387377584, "acc_norm_stderr,none": 0.01110302032087217}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.491566265060241, "acc_stderr,none": 0.010020647068114173}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.43694779116465865, "acc_stderr,none": 0.009942066394610849}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.3353413654618474, "acc_stderr,none": 0.009463034891512704}}
{"created_at": "2025-08-25T07:13:08.178914", "global_step": 18000, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.24914675767918087, "acc_stderr,none": 0.012639407111926437, "acc_norm,none": 0.2713310580204778, "acc_norm_stderr,none": 0.012993807727545789}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.5795454545454546, "acc_stderr,none": 0.010129114278546528, "acc_norm,none": 0.49242424242424243, "acc_norm_stderr,none": 0.01025860579215332}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.24, "acc_stderr,none": 0.01424401987979265, "acc_norm,none": 0.24, "acc_norm_stderr,none": 0.01424401987979265}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.25222222222222224, "acc_stderr,none": 0.014484319811433903, "acc_norm,none": 0.25222222222222224, "acc_norm_stderr,none": 0.014484319811433903}, "belebele_pes_Arab": {"alias": "belebele_pes_Arab", "acc,none": 0.23222222222222222, "acc_stderr,none": 0.014082825936214571, "acc_norm,none": 0.23222222222222222, "acc_norm_stderr,none": 0.014082825936214571}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.2544444444444444, "acc_stderr,none": 0.014526354751055189, "acc_norm,none": 0.2544444444444444, "acc_norm_stderr,none": 0.014526354751055189}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.23444444444444446, "acc_stderr,none": 0.014129554968110773, "acc_norm,none": 0.23444444444444446, "acc_norm_stderr,none": 0.014129554968110773}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.33588926508663614, "acc_stderr,none": 0.00471335150088514, "acc_norm,none": 0.40689105755825533, "acc_norm_stderr,none": 0.004902502514738603}, "include_base_44_chinese": {"acc,none": 0.26422018348623855, "acc_stderr,none": 0.01893216956087259, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.2112676056338028, "acc_stderr,none": 0.04879016359348949}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.3333333333333333, "acc_stderr,none": 0.05083285677753486}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.30985915492957744, "acc_stderr,none": 0.05527159681788331}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.2112676056338028, "acc_stderr,none": 0.04879016359348947}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.2413793103448276, "acc_stderr,none": 0.046143776682648914}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.2676056338028169, "acc_stderr,none": 0.05291406220869697}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295698}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.3125, "acc_stderr,none": 0.11967838846954226}, "include_base_44_italian": {"acc,none": 0.23905109489051096, "acc_stderr,none": 0.018274056541611797, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.2571428571428571, "acc_stderr,none": 0.07495496847387484}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.24550898203592814, "acc_stderr,none": 0.033404631539455894}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.16666666666666666, "acc_stderr,none": 0.11236664374387367}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.2645161290322581, "acc_stderr,none": 0.03554285382300394}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.20359281437125748, "acc_stderr,none": 0.031253219622833416}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.3333333333333333, "acc_stderr,none": 0.14213381090374033}, "include_base_44_turkish": {"acc,none": 0.22992700729927007, "acc_stderr,none": 0.018008902675098033, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.23493975903614459, "acc_stderr,none": 0.03300533186128922}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.25301204819277107, "acc_stderr,none": 0.033844291552331346}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.22289156626506024, "acc_stderr,none": 0.032400048255946876}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.16, "acc_stderr,none": 0.052372293656638154}, "piqa": {"alias": "piqa", "acc,none": 0.6702937976060935, "acc_stderr,none": 0.010968357083095152, "acc_norm,none": 0.6507072905331882, "acc_norm_stderr,none": 0.011123283817525077}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.5108433734939759, "acc_stderr,none": 0.01001971582448348}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.43132530120481927, "acc_stderr,none": 0.009927090290379253}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.336144578313253, "acc_stderr,none": 0.009468634669293527}}
{"created_at": "2025-08-25T12:50:56.173567", "global_step": 20000, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.25, "acc_stderr,none": 0.012653835621466646, "acc_norm,none": 0.27986348122866894, "acc_norm_stderr,none": 0.013119040897725925}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.5656565656565656, "acc_stderr,none": 0.010170943451269423, "acc_norm,none": 0.4823232323232323, "acc_norm_stderr,none": 0.010253369805698968}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.23333333333333334, "acc_stderr,none": 0.014106258477755672, "acc_norm,none": 0.23333333333333334, "acc_norm_stderr,none": 0.014106258477755672}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.24666666666666667, "acc_stderr,none": 0.014377023375409378, "acc_norm,none": 0.24666666666666667, "acc_norm_stderr,none": 0.014377023375409378}, "belebele_pes_Arab": {"alias": "belebele_pes_Arab", "acc,none": 0.25666666666666665, "acc_stderr,none": 0.014567891342380039, "acc_norm,none": 0.25666666666666665, "acc_norm_stderr,none": 0.014567891342380039}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.23777777777777778, "acc_stderr,none": 0.014198634809308185, "acc_norm,none": 0.23777777777777778, "acc_norm_stderr,none": 0.014198634809308185}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.24888888888888888, "acc_stderr,none": 0.01442032345164253, "acc_norm,none": 0.24888888888888888, "acc_norm_stderr,none": 0.01442032345164253}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.34186417048396733, "acc_stderr,none": 0.004733649274814516, "acc_norm,none": 0.41326428998207526, "acc_norm_stderr,none": 0.0049141308554317776}, "include_base_44_chinese": {"acc,none": 0.27522935779816515, "acc_stderr,none": 0.01915540149827571, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.2112676056338028, "acc_stderr,none": 0.048790163593489484}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.25287356321839083, "acc_stderr,none": 0.04687049503854671}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.23943661971830985, "acc_stderr,none": 0.05100514327793383}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.3380281690140845, "acc_stderr,none": 0.056538877391335146}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.27586206896551724, "acc_stderr,none": 0.04819560289115228}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.323943661971831, "acc_stderr,none": 0.05593416612923643}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295698}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.4375, "acc_stderr,none": 0.128086884574495}, "include_base_44_italian": {"acc,none": 0.2116788321167883, "acc_stderr,none": 0.017509930568308, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.2, "acc_stderr,none": 0.06859943405700354}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.20958083832335328, "acc_stderr,none": 0.03159006158827181}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.16666666666666666, "acc_stderr,none": 0.11236664374387369}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.1870967741935484, "acc_stderr,none": 0.031426224808489875}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.23353293413173654, "acc_stderr,none": 0.03283724952964297}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.3333333333333333, "acc_stderr,none": 0.14213381090374033}, "include_base_44_turkish": {"acc,none": 0.24817518248175183, "acc_stderr,none": 0.018434747917551675, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.21686746987951808, "acc_stderr,none": 0.03208284450356365}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.29518072289156627, "acc_stderr,none": 0.03550920185689631}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.25903614457831325, "acc_stderr,none": 0.03410646614071856}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.16, "acc_stderr,none": 0.05237229365663815}, "piqa": {"alias": "piqa", "acc,none": 0.6534276387377584, "acc_stderr,none": 0.01110302032087218, "acc_norm,none": 0.6550598476605005, "acc_norm_stderr,none": 0.011090670102993158}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.4879518072289157, "acc_stderr,none": 0.010019162857624485}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.41566265060240964, "acc_stderr,none": 0.009878474341822924}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.3413654618473896, "acc_stderr,none": 0.009504288078880216}}
{"created_at": "2025-08-25T18:30:56.103157", "global_step": 22000, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.25597269624573377, "acc_stderr,none": 0.012753013241244521, "acc_norm,none": 0.28498293515358364, "acc_norm_stderr,none": 0.013191348179838793}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.5959595959595959, "acc_stderr,none": 0.010069061649549545, "acc_norm,none": 0.5214646464646465, "acc_norm_stderr,none": 0.01025032515945666}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.2644444444444444, "acc_stderr,none": 0.014709405413413137, "acc_norm,none": 0.2644444444444444, "acc_norm_stderr,none": 0.014709405413413137}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.25222222222222224, "acc_stderr,none": 0.014484319811433903, "acc_norm,none": 0.25222222222222224, "acc_norm_stderr,none": 0.014484319811433903}, "belebele_pes_Arab": {"alias": "belebele_pes_Arab", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884514, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884514}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.24444444444444444, "acc_stderr,none": 0.014333209259632407, "acc_norm,none": 0.24444444444444444, "acc_norm_stderr,none": 0.014333209259632407}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.2577777777777778, "acc_stderr,none": 0.014588474089651672, "acc_norm,none": 0.2577777777777778, "acc_norm_stderr,none": 0.014588474089651672}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.34475204142601074, "acc_stderr,none": 0.004743160034271151, "acc_norm,none": 0.4231228838876718, "acc_norm_stderr,none": 0.0049304485271466575}, "include_base_44_chinese": {"acc,none": 0.24587155963302754, "acc_stderr,none": 0.018342006741239283, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.36619718309859156, "acc_stderr,none": 0.05758184314388001}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.20689655172413793, "acc_stderr,none": 0.04368097459950702}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.30985915492957744, "acc_stderr,none": 0.055271596817883316}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.19718309859154928, "acc_stderr,none": 0.04755476905953275}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.20689655172413793, "acc_stderr,none": 0.04368097459950702}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.19718309859154928, "acc_stderr,none": 0.04755476905953275}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.2112676056338028, "acc_stderr,none": 0.048790163593489484}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.4375, "acc_stderr,none": 0.128086884574495}, "include_base_44_italian": {"acc,none": 0.25, "acc_stderr,none": 0.018490258974505073, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.3142857142857143, "acc_stderr,none": 0.07961491954505552}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.2634730538922156, "acc_stderr,none": 0.034190730421806675}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.08333333333333333, "acc_stderr,none": 0.08333333333333331}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.2903225806451613, "acc_stderr,none": 0.036577207065409116}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.20359281437125748, "acc_stderr,none": 0.031253219622833416}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.16666666666666666, "acc_stderr,none": 0.11236664374387367}, "include_base_44_turkish": {"acc,none": 0.2718978102189781, "acc_stderr,none": 0.018986392446404408, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.29518072289156627, "acc_stderr,none": 0.035509201856896294}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.23493975903614459, "acc_stderr,none": 0.03300533186128922}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.3132530120481928, "acc_stderr,none": 0.036108050180310235}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.18, "acc_stderr,none": 0.054883922035138706}, "piqa": {"alias": "piqa", "acc,none": 0.6653971708378672, "acc_stderr,none": 0.0110090717251625, "acc_norm,none": 0.661588683351469, "acc_norm_stderr,none": 0.011039817512986832}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.4807228915662651, "acc_stderr,none": 0.010014621554188637}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.40321285140562246, "acc_stderr,none": 0.009832511560868066}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.3433734939759036, "acc_stderr,none": 0.009517658993060703}}
{"created_at": "2025-08-26T12:28:16.821909", "global_step": 24000, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.24914675767918087, "acc_stderr,none": 0.012639407111926439, "acc_norm,none": 0.28668941979522183, "acc_norm_stderr,none": 0.013214986329274776}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.5845959595959596, "acc_stderr,none": 0.01011186949491152, "acc_norm,none": 0.5189393939393939, "acc_norm_stderr,none": 0.010252420496894494}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.23333333333333334, "acc_stderr,none": 0.014106258477755675, "acc_norm,none": 0.23333333333333334, "acc_norm_stderr,none": 0.014106258477755675}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.23555555555555555, "acc_stderr,none": 0.014152716079132646, "acc_norm,none": 0.23555555555555555, "acc_norm_stderr,none": 0.014152716079132646}, "belebele_pes_Arab": {"alias": "belebele_pes_Arab", "acc,none": 0.22777777777777777, "acc_stderr,none": 0.013987721523687956, "acc_norm,none": 0.22777777777777777, "acc_norm_stderr,none": 0.013987721523687956}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.24666666666666667, "acc_stderr,none": 0.014377023375409392, "acc_norm,none": 0.24666666666666667, "acc_norm_stderr,none": 0.014377023375409392}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.23222222222222222, "acc_stderr,none": 0.014082825936214573, "acc_norm,none": 0.23222222222222222, "acc_norm_stderr,none": 0.014082825936214573}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.34495120493925513, "acc_stderr,none": 0.004743808792037857, "acc_norm,none": 0.4222266480780721, "acc_norm_stderr,none": 0.004929048482760453}, "include_base_44_chinese": {"acc,none": 0.24587155963302754, "acc_stderr,none": 0.018517539247711934, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.049938324531150324}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.25287356321839083, "acc_stderr,none": 0.04687049503854671}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.323943661971831, "acc_stderr,none": 0.05593416612923642}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.23943661971830985, "acc_stderr,none": 0.05100514327793383}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.2413793103448276, "acc_stderr,none": 0.04614377668264891}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.051995626882956975}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.19718309859154928, "acc_stderr,none": 0.04755476905953276}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.1875, "acc_stderr,none": 0.10077822185373188}, "include_base_44_italian": {"acc,none": 0.2518248175182482, "acc_stderr,none": 0.018549511731901367, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.17142857142857143, "acc_stderr,none": 0.06463490595976273}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.2994011976047904, "acc_stderr,none": 0.03554736535384932}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.25, "acc_stderr,none": 0.1305582419667734}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.2709677419354839, "acc_stderr,none": 0.03581556513964113}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.20359281437125748, "acc_stderr,none": 0.031253219622833416}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.25, "acc_stderr,none": 0.1305582419667734}, "include_base_44_turkish": {"acc,none": 0.28284671532846717, "acc_stderr,none": 0.019228786058337794, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.3132530120481928, "acc_stderr,none": 0.03610805018031023}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.25903614457831325, "acc_stderr,none": 0.034106466140718564}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.2469879518072289, "acc_stderr,none": 0.03357351982064537}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.38, "acc_stderr,none": 0.06934092056863767}, "piqa": {"alias": "piqa", "acc,none": 0.661588683351469, "acc_stderr,none": 0.011039817512986832, "acc_norm,none": 0.6626768226332971, "acc_norm_stderr,none": 0.011031114785059698}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.4955823293172691, "acc_stderr,none": 0.010021681681769354}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.42088353413654617, "acc_stderr,none": 0.009895812914052199}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.3357429718875502, "acc_stderr,none": 0.009465838617337343}}
{"created_at": "2025-08-26T12:31:23.359938", "global_step": 26000, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.2815699658703072, "acc_stderr,none": 0.01314337673500902, "acc_norm,none": 0.3054607508532423, "acc_norm_stderr,none": 0.013460080478002501}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.5913299663299664, "acc_stderr,none": 0.010087174498762886, "acc_norm,none": 0.5378787878787878, "acc_norm_stderr,none": 0.0102302996288648}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.22555555555555556, "acc_stderr,none": 0.013939334910458118, "acc_norm,none": 0.22555555555555556, "acc_norm_stderr,none": 0.013939334910458118}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.22777777777777777, "acc_stderr,none": 0.013987721523687961, "acc_norm,none": 0.22777777777777777, "acc_norm_stderr,none": 0.013987721523687961}, "belebele_pes_Arab": {"alias": "belebele_pes_Arab", "acc,none": 0.23333333333333334, "acc_stderr,none": 0.014106258477755672, "acc_norm,none": 0.23333333333333334, "acc_norm_stderr,none": 0.014106258477755672}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.23777777777777778, "acc_stderr,none": 0.014198634809308197, "acc_norm,none": 0.23777777777777778, "acc_norm_stderr,none": 0.014198634809308197}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.22777777777777777, "acc_stderr,none": 0.01398772152368797, "acc_norm,none": 0.22777777777777777, "acc_norm_stderr,none": 0.01398772152368797}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.3504282015534754, "acc_stderr,none": 0.004761289867046065, "acc_norm,none": 0.4237203744274049, "acc_norm_stderr,none": 0.004931372657129786}, "include_base_44_chinese": {"acc,none": 0.24587155963302754, "acc_stderr,none": 0.018517539247711934, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.049938324531150324}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.25287356321839083, "acc_stderr,none": 0.04687049503854671}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.323943661971831, "acc_stderr,none": 0.05593416612923642}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.23943661971830985, "acc_stderr,none": 0.05100514327793383}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.2413793103448276, "acc_stderr,none": 0.04614377668264891}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.051995626882956975}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.19718309859154928, "acc_stderr,none": 0.04755476905953276}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.1875, "acc_stderr,none": 0.10077822185373188}, "include_base_44_italian": {"acc,none": 0.25547445255474455, "acc_stderr,none": 0.018631817842873294, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.17142857142857143, "acc_stderr,none": 0.06463490595976273}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.2934131736526946, "acc_stderr,none": 0.03534016139050468}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.25, "acc_stderr,none": 0.1305582419667734}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.2903225806451613, "acc_stderr,none": 0.03657720706540911}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.20359281437125748, "acc_stderr,none": 0.031253219622833416}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.25, "acc_stderr,none": 0.1305582419667734}, "include_base_44_turkish": {"acc,none": 0.2846715328467153, "acc_stderr,none": 0.01923406084306874, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.3433734939759036, "acc_stderr,none": 0.03696584317010601}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.26506024096385544, "acc_stderr,none": 0.03436024037944966}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.2289156626506024, "acc_stderr,none": 0.03270745277352477}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.34, "acc_stderr,none": 0.06767268161329719}, "piqa": {"alias": "piqa", "acc,none": 0.6681175190424374, "acc_stderr,none": 0.01098661777636159, "acc_norm,none": 0.6605005440696409, "acc_norm_stderr,none": 0.011048455047173918}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.4831325301204819, "acc_stderr,none": 0.010016368453021547}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.41285140562248995, "acc_stderr,none": 0.009868665943084408}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.3481927710843373, "acc_stderr,none": 0.009548980649153386}}
{"created_at": "2025-08-26T12:35:25.531811", "global_step": 28000, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.26706484641638223, "acc_stderr,none": 0.012928933196496352, "acc_norm,none": 0.30204778156996587, "acc_norm_stderr,none": 0.013417519144716417}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.5921717171717171, "acc_stderr,none": 0.010083950240041214, "acc_norm,none": 0.531986531986532, "acc_norm_stderr,none": 0.010238767643185721}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.25555555555555554, "acc_stderr,none": 0.014547185072254297, "acc_norm,none": 0.25555555555555554, "acc_norm_stderr,none": 0.014547185072254297}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.23444444444444446, "acc_stderr,none": 0.014129554968110773, "acc_norm,none": 0.23444444444444446, "acc_norm_stderr,none": 0.014129554968110773}, "belebele_pes_Arab": {"alias": "belebele_pes_Arab", "acc,none": 0.2311111111111111, "acc_stderr,none": 0.01405925666321818, "acc_norm,none": 0.2311111111111111, "acc_norm_stderr,none": 0.01405925666321818}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.24333333333333335, "acc_stderr,none": 0.014311107963683066, "acc_norm,none": 0.24333333333333335, "acc_norm_stderr,none": 0.014311107963683066}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.27555555555555555, "acc_stderr,none": 0.014901407215241892, "acc_norm,none": 0.27555555555555555, "acc_norm_stderr,none": 0.014901407215241892}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.35012945628360886, "acc_stderr,none": 0.004760354191370857, "acc_norm,none": 0.42929695279824737, "acc_norm_stderr,none": 0.00493964246017258}, "include_base_44_chinese": {"acc,none": 0.24587155963302754, "acc_stderr,none": 0.018508404479228944, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.323943661971831, "acc_stderr,none": 0.055934166129236414}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.22988505747126436, "acc_stderr,none": 0.045371581852507746}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.28169014084507044, "acc_stderr,none": 0.05376414171383254}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.2112676056338028, "acc_stderr,none": 0.048790163593489484}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.20689655172413793, "acc_stderr,none": 0.04368097459950702}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.04993832453115031}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295696}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.25, "acc_stderr,none": 0.11180339887498948}, "include_base_44_italian": {"acc,none": 0.25, "acc_stderr,none": 0.01828647856146751, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.14285714285714285, "acc_stderr,none": 0.06001200360120039}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.32934131736526945, "acc_stderr,none": 0.03647706112341117}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.0, "acc_stderr,none": 0.0}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.27741935483870966, "acc_stderr,none": 0.03607872492487903}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.20359281437125748, "acc_stderr,none": 0.0312532196228334}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.0, "acc_stderr,none": 0.0}, "include_base_44_turkish": {"acc,none": 0.2846715328467153, "acc_stderr,none": 0.0193139932475483, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.3192771084337349, "acc_stderr,none": 0.036293353299478595}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.25301204819277107, "acc_stderr,none": 0.033844291552331346}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.27710843373493976, "acc_stderr,none": 0.03484331592680588}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.3, "acc_stderr,none": 0.0654653670707977}, "piqa": {"alias": "piqa", "acc,none": 0.6632208922742111, "acc_stderr,none": 0.01102673892525118, "acc_norm,none": 0.6659412404787813, "acc_norm_stderr,none": 0.011004613886336745}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.4907630522088353, "acc_stderr,none": 0.01002036253063136}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.44859437751004017, "acc_stderr,none": 0.009968964736894261}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.3405622489959839, "acc_stderr,none": 0.009498886690274445}}
{"created_at": "2025-08-26T17:06:55.894936", "global_step": 30000, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.2687713310580205, "acc_stderr,none": 0.012955065963710679, "acc_norm,none": 0.2960750853242321, "acc_norm_stderr,none": 0.01334091608524626}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.5824915824915825, "acc_stderr,none": 0.01011918737777603, "acc_norm,none": 0.5168350168350169, "acc_norm_stderr,none": 0.010253966261288897}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.24555555555555555, "acc_stderr,none": 0.014355180865342966, "acc_norm,none": 0.24555555555555555, "acc_norm_stderr,none": 0.014355180865342966}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.24333333333333335, "acc_stderr,none": 0.014311107963683041, "acc_norm,none": 0.24333333333333335, "acc_norm_stderr,none": 0.014311107963683041}, "belebele_pes_Arab": {"alias": "belebele_pes_Arab", "acc,none": 0.23666666666666666, "acc_stderr,none": 0.014175742474391944, "acc_norm,none": 0.23666666666666666, "acc_norm_stderr,none": 0.014175742474391944}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.2588888888888889, "acc_stderr,none": 0.014608933836168491, "acc_norm,none": 0.2588888888888889, "acc_norm_stderr,none": 0.014608933836168491}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.24444444444444444, "acc_stderr,none": 0.014333209259632428, "acc_norm,none": 0.24444444444444444, "acc_norm_stderr,none": 0.014333209259632428}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.3511252738498307, "acc_stderr,none": 0.0047634651390385556, "acc_norm,none": 0.43397729535949015, "acc_norm_stderr,none": 0.0049460892301530215}, "include_base_44_chinese": {"acc,none": 0.25321100917431194, "acc_stderr,none": 0.01862937195537458, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.2112676056338028, "acc_stderr,none": 0.048790163593489484}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.26436781609195403, "acc_stderr,none": 0.04755382188278443}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.323943661971831, "acc_stderr,none": 0.05593416612923642}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.3380281690140845, "acc_stderr,none": 0.056538877391335146}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.19540229885057472, "acc_stderr,none": 0.042756781109738705}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295697}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.2112676056338028, "acc_stderr,none": 0.04879016359348949}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.1875, "acc_stderr,none": 0.10077822185373188}, "include_base_44_italian": {"acc,none": 0.2591240875912409, "acc_stderr,none": 0.018723219090368018, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.22857142857142856, "acc_stderr,none": 0.07201440432144052}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.2934131736526946, "acc_stderr,none": 0.03534016139050468}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.25, "acc_stderr,none": 0.1305582419667734}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.2967741935483871, "acc_stderr,none": 0.03681290636819265}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.19760479041916168, "acc_stderr,none": 0.030905719167240605}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.25, "acc_stderr,none": 0.1305582419667734}, "include_base_44_turkish": {"acc,none": 0.24817518248175183, "acc_stderr,none": 0.018467866146565207, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.2710843373493976, "acc_stderr,none": 0.03460579907553027}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.27710843373493976, "acc_stderr,none": 0.03484331592680589}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.21084337349397592, "acc_stderr,none": 0.03175554786629921}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.2, "acc_stderr,none": 0.05714285714285715}, "piqa": {"alias": "piqa", "acc,none": 0.6730141458106638, "acc_stderr,none": 0.01094515712697823, "acc_norm,none": 0.6632208922742111, "acc_norm_stderr,none": 0.01102673892525118}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.4771084337349398, "acc_stderr,none": 0.010011563747774335}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.43654618473895584, "acc_stderr,none": 0.00994103979113313}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.3389558232931727, "acc_stderr,none": 0.009487992732201524}}
{"created_at": "2025-08-27T10:16:55.900922", "global_step": 32000, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.28498293515358364, "acc_stderr,none": 0.013191348179838793, "acc_norm,none": 0.30887372013651876, "acc_norm_stderr,none": 0.013501770929344003}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.6064814814814815, "acc_stderr,none": 0.010024426884292567, "acc_norm,none": 0.5441919191919192, "acc_norm_stderr,none": 0.010219631763437851}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.24, "acc_stderr,none": 0.01424401987979265, "acc_norm,none": 0.24, "acc_norm_stderr,none": 0.01424401987979265}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.01401170515888452, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.01401170515888452}, "belebele_pes_Arab": {"alias": "belebele_pes_Arab", "acc,none": 0.2388888888888889, "acc_stderr,none": 0.014221393731276217, "acc_norm,none": 0.2388888888888889, "acc_norm_stderr,none": 0.014221393731276217}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.23222222222222222, "acc_stderr,none": 0.014082825936214573, "acc_norm,none": 0.23222222222222222, "acc_norm_stderr,none": 0.014082825936214573}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.2388888888888889, "acc_stderr,none": 0.014221393731276207, "acc_norm,none": 0.2388888888888889, "acc_norm_stderr,none": 0.014221393731276207}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.35919139613622786, "acc_stderr,none": 0.0047878291682556555, "acc_norm,none": 0.4386576379207329, "acc_norm_stderr,none": 0.004952087083128892}, "include_base_44_chinese": {"acc,none": 0.23486238532110093, "acc_stderr,none": 0.018216579881465574, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.049938324531150324}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.26436781609195403, "acc_stderr,none": 0.04755382188278443}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.29577464788732394, "acc_stderr,none": 0.054549061214188996}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.16901408450704225, "acc_stderr,none": 0.044792908199096614}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.2413793103448276, "acc_stderr,none": 0.04614377668264891}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.051995626882956975}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.19718309859154928, "acc_stderr,none": 0.04755476905953276}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.1875, "acc_stderr,none": 0.10077822185373188}, "include_base_44_italian": {"acc,none": 0.25547445255474455, "acc_stderr,none": 0.018630450973885777, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.17142857142857143, "acc_stderr,none": 0.06463490595976273}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.2994011976047904, "acc_stderr,none": 0.03554736535384932}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.25, "acc_stderr,none": 0.1305582419667734}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.2838709677419355, "acc_stderr,none": 0.03633254072705441}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.20958083832335328, "acc_stderr,none": 0.03159006158827181}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.16666666666666666, "acc_stderr,none": 0.1123666437438737}, "include_base_44_turkish": {"acc,none": 0.2718978102189781, "acc_stderr,none": 0.019026793414974364, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.30120481927710846, "acc_stderr,none": 0.03571609230053481}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.2710843373493976, "acc_stderr,none": 0.034605799075530255}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.2289156626506024, "acc_stderr,none": 0.03270745277352477}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.32, "acc_stderr,none": 0.06663945022680343}, "piqa": {"alias": "piqa", "acc,none": 0.6692056583242655, "acc_stderr,none": 0.010977520584714429, "acc_norm,none": 0.6692056583242655, "acc_norm_stderr,none": 0.010977520584714434}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.4863453815261044, "acc_stderr,none": 0.01001833496714856}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.4433734939759036, "acc_stderr,none": 0.00995759266053865}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.3389558232931727, "acc_stderr,none": 0.009487992732201524}}
{"created_at": "2025-08-27T10:49:04.595382", "global_step": 34000, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.2909556313993174, "acc_stderr,none": 0.013273077865907581, "acc_norm,none": 0.3046075085324232, "acc_norm_stderr,none": 0.013449522109932487}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.6005892255892256, "acc_stderr,none": 0.010050018228742122, "acc_norm,none": 0.5412457912457912, "acc_norm_stderr,none": 0.010224815730255816}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.24444444444444444, "acc_stderr,none": 0.014333209259632404, "acc_norm,none": 0.24444444444444444, "acc_norm_stderr,none": 0.014333209259632404}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.2511111111111111, "acc_stderr,none": 0.014463114105170805, "acc_norm,none": 0.2511111111111111, "acc_norm_stderr,none": 0.014463114105170805}, "belebele_pes_Arab": {"alias": "belebele_pes_Arab", "acc,none": 0.25, "acc_stderr,none": 0.014441782171967503, "acc_norm,none": 0.25, "acc_norm_stderr,none": 0.014441782171967503}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.2644444444444444, "acc_stderr,none": 0.01470940541341313, "acc_norm,none": 0.2644444444444444, "acc_norm_stderr,none": 0.01470940541341313}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.24333333333333335, "acc_stderr,none": 0.01431110796368305, "acc_norm,none": 0.24333333333333335, "acc_norm_stderr,none": 0.01431110796368305}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.3602867954590719, "acc_stderr,none": 0.004791024004588006, "acc_norm,none": 0.44234216291575384, "acc_norm_stderr,none": 0.004956494059864892}, "include_base_44_chinese": {"acc,none": 0.23669724770642203, "acc_stderr,none": 0.01813410946643339, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.19718309859154928, "acc_stderr,none": 0.04755476905953275}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.27586206896551724, "acc_stderr,none": 0.04819560289115228}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.14084507042253522, "acc_stderr,none": 0.04157742116654289}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.23943661971830985, "acc_stderr,none": 0.05100514327793383}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.3333333333333333, "acc_stderr,none": 0.05083285677753485}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.051995626882956975}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.04993832453115033}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.0625, "acc_stderr,none": 0.0625}, "include_base_44_italian": {"acc,none": 0.2645985401459854, "acc_stderr,none": 0.018879716937192717, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.34285714285714286, "acc_stderr,none": 0.08140424227436863}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.2275449101796407, "acc_stderr,none": 0.032539894331085194}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.3333333333333333, "acc_stderr,none": 0.14213381090374033}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.24516129032258063, "acc_stderr,none": 0.03466511701965931}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.2994011976047904, "acc_stderr,none": 0.03554736535384932}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.25, "acc_stderr,none": 0.1305582419667734}, "include_base_44_turkish": {"acc,none": 0.22262773722627738, "acc_stderr,none": 0.0178080517681108, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.22289156626506024, "acc_stderr,none": 0.03240004825594688}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.2469879518072289, "acc_stderr,none": 0.03357351982064536}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.21686746987951808, "acc_stderr,none": 0.03208284450356365}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.16, "acc_stderr,none": 0.05237229365663814}, "piqa": {"alias": "piqa", "acc,none": 0.676822633297062, "acc_stderr,none": 0.01091197412428213, "acc_norm,none": 0.675734494015234, "acc_norm_stderr,none": 0.010921539041347975}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.5020080321285141, "acc_stderr,none": 0.010021992045038411}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.41686746987951806, "acc_stderr,none": 0.009882576606533239}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.3421686746987952, "acc_stderr,none": 0.009509659143015632}}
{"created_at": "2025-08-27T12:05:36.212200", "global_step": 36000, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.27559726962457337, "acc_stderr,none": 0.013057169655761838, "acc_norm,none": 0.28924914675767915, "acc_norm_stderr,none": 0.013250012579393443}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.6077441077441077, "acc_stderr,none": 0.010018744689650043, "acc_norm,none": 0.5382996632996633, "acc_norm_stderr,none": 0.010229639820610516}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.25555555555555554, "acc_stderr,none": 0.014547185072254283, "acc_norm,none": 0.25555555555555554, "acc_norm_stderr,none": 0.014547185072254283}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.2222222222222222, "acc_stderr,none": 0.01386569562657939, "acc_norm,none": 0.2222222222222222, "acc_norm_stderr,none": 0.01386569562657939}, "belebele_pes_Arab": {"alias": "belebele_pes_Arab", "acc,none": 0.26222222222222225, "acc_stderr,none": 0.014669580202217898, "acc_norm,none": 0.26222222222222225, "acc_norm_stderr,none": 0.014669580202217898}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.27, "acc_stderr,none": 0.014806876915962117, "acc_norm,none": 0.27, "acc_norm_stderr,none": 0.014806876915962117}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.25666666666666665, "acc_stderr,none": 0.014567891342380068, "acc_norm,none": 0.25666666666666665, "acc_norm_stderr,none": 0.014567891342380068}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.35600477992431784, "acc_stderr,none": 0.004778380758851134, "acc_norm,none": 0.44323839872535353, "acc_norm_stderr,none": 0.004957524197900428}, "include_base_44_chinese": {"acc,none": 0.23669724770642203, "acc_stderr,none": 0.018206845040527324, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295699}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.25287356321839083, "acc_stderr,none": 0.04687049503854671}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.30985915492957744, "acc_stderr,none": 0.055271596817883316}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.1267605633802817, "acc_stderr,none": 0.03976580062454874}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.25287356321839083, "acc_stderr,none": 0.04687049503854671}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.04993832453115031}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.2112676056338028, "acc_stderr,none": 0.04879016359348949}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.3125, "acc_stderr,none": 0.11967838846954226}, "include_base_44_italian": {"acc,none": 0.26824817518248173, "acc_stderr,none": 0.018889246401805036, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.2857142857142857, "acc_stderr,none": 0.07747516350666293}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.32335329341317365, "acc_stderr,none": 0.036304928558844665}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.25, "acc_stderr,none": 0.1305582419667734}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.2838709677419355, "acc_stderr,none": 0.036332540727054406}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.19161676646706588, "acc_stderr,none": 0.030547196475366606}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.3333333333333333, "acc_stderr,none": 0.14213381090374033}, "include_base_44_turkish": {"acc,none": 0.2354014598540146, "acc_stderr,none": 0.018101684397584823, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.25301204819277107, "acc_stderr,none": 0.03384429155233137}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.27710843373493976, "acc_stderr,none": 0.034843315926805875}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.20481927710843373, "acc_stderr,none": 0.03141784291663925}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.14, "acc_stderr,none": 0.04956957592256418}, "piqa": {"alias": "piqa", "acc,none": 0.6746463547334058, "acc_stderr,none": 0.010931036623525193, "acc_norm,none": 0.6844396082698585, "acc_norm_stderr,none": 0.010843119201758938}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.5020080321285141, "acc_stderr,none": 0.010021992045038411}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.4248995983935743, "acc_stderr,none": 0.009908377568198196}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.3449799196787149, "acc_stderr,none": 0.009528219800053311}}
{"created_at": "2025-08-27T15:43:21.969757", "global_step": 38000, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.27474402730375425, "acc_stderr,none": 0.013044617212771227, "acc_norm,none": 0.30119453924914674, "acc_norm_stderr,none": 0.013406741767847626}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.6039562289562289, "acc_stderr,none": 0.010035580962097942, "acc_norm,none": 0.5340909090909091, "acc_norm_stderr,none": 0.010235908103438688}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.27111111111111114, "acc_stderr,none": 0.014826016446581972, "acc_norm,none": 0.27111111111111114, "acc_norm_stderr,none": 0.014826016446581972}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.24888888888888888, "acc_stderr,none": 0.014420323451642538, "acc_norm,none": 0.24888888888888888, "acc_norm_stderr,none": 0.014420323451642538}, "belebele_pes_Arab": {"alias": "belebele_pes_Arab", "acc,none": 0.27, "acc_stderr,none": 0.014806876915962126, "acc_norm,none": 0.27, "acc_norm_stderr,none": 0.014806876915962126}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.2777777777777778, "acc_stderr,none": 0.014938408363642795, "acc_norm,none": 0.2777777777777778, "acc_norm_stderr,none": 0.014938408363642795}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.2644444444444444, "acc_stderr,none": 0.014709405413413154, "acc_norm,none": 0.2644444444444444, "acc_norm_stderr,none": 0.014709405413413154}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.3593905596494722, "acc_stderr,none": 0.004788412062375701, "acc_norm,none": 0.4534953196574388, "acc_norm_stderr,none": 0.004968151878211055}, "include_base_44_chinese": {"acc,none": 0.24587155963302754, "acc_stderr,none": 0.018429798135627007, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295696}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.2413793103448276, "acc_stderr,none": 0.046143776682648914}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.30985915492957744, "acc_stderr,none": 0.055271596817883316}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.2112676056338028, "acc_stderr,none": 0.048790163593489484}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.19540229885057472, "acc_stderr,none": 0.042756781109738705}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.29577464788732394, "acc_stderr,none": 0.05454906121418898}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.18309859154929578, "acc_stderr,none": 0.046225147349214284}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.4375, "acc_stderr,none": 0.128086884574495}, "include_base_44_italian": {"acc,none": 0.2518248175182482, "acc_stderr,none": 0.01837719975074986, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.11428571428571428, "acc_stderr,none": 0.05456364060755606}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.32934131736526945, "acc_stderr,none": 0.036477061123411154}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.16666666666666666, "acc_stderr,none": 0.11236664374387367}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.2903225806451613, "acc_stderr,none": 0.03657720706540911}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.17964071856287425, "acc_stderr,none": 0.029795480654829835}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.16666666666666666, "acc_stderr,none": 0.1123666437438737}, "include_base_44_turkish": {"acc,none": 0.23357664233576642, "acc_stderr,none": 0.018109921055541597, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.22289156626506024, "acc_stderr,none": 0.032400048255946896}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.26506024096385544, "acc_stderr,none": 0.03436024037944966}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.2289156626506024, "acc_stderr,none": 0.03270745277352477}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.18, "acc_stderr,none": 0.054883922035138706}, "piqa": {"alias": "piqa", "acc,none": 0.6746463547334058, "acc_stderr,none": 0.010931036623525197, "acc_norm,none": 0.6719260065288357, "acc_norm_stderr,none": 0.01095448713512422}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.4983935742971888, "acc_stderr,none": 0.010022021141102108}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.42329317269076305, "acc_stderr,none": 0.009903432138272914}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.3461847389558233, "acc_stderr,none": 0.009536061379898339}}