diff --git "a/gpt2/metrics.eval.jsonl" "b/gpt2/metrics.eval.jsonl" new file mode 100644--- /dev/null +++ "b/gpt2/metrics.eval.jsonl" @@ -0,0 +1,39 @@ +{"created_at": "2025-06-25T14:55:48.873061", "global_step": 500, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.1621160409556314, "acc_stderr,none": 0.010770267380798377, "acc_norm,none": 0.22610921501706485, "acc_norm_stderr,none": 0.012224202097063278}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.3345959595959596, "acc_stderr,none": 0.00968213772432791, "acc_norm,none": 0.31607744107744107, "acc_norm_stderr,none": 0.009540440071928292}, "belebele_arb_Arab": {"alias": "belebele_arb_Arab", "acc,none": 0.24444444444444444, "acc_stderr,none": 0.014333209259632418, "acc_norm,none": 0.24444444444444444, "acc_norm_stderr,none": 0.014333209259632418}, "belebele_ben_Beng": {"alias": "belebele_ben_Beng", "acc,none": 0.23, "acc_stderr,none": 0.014035549969945594, "acc_norm,none": 0.23, "acc_norm_stderr,none": 0.014035549969945594}, "belebele_deu_Latn": {"alias": "belebele_deu_Latn", "acc,none": 0.22666666666666666, "acc_stderr,none": 0.01396359834903046, "acc_norm,none": 0.22666666666666666, "acc_norm_stderr,none": 0.01396359834903046}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_fra_Latn": {"alias": "belebele_fra_Latn", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.0140117051588845, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.0140117051588845}, "belebele_hin_Deva": {"alias": "belebele_hin_Deva", "acc,none": 0.23444444444444446, "acc_stderr,none": 0.014129554968110771, "acc_norm,none": 0.23444444444444446, "acc_norm_stderr,none": 0.014129554968110771}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.22777777777777777, "acc_stderr,none": 0.013987721523687947, "acc_norm,none": 0.22777777777777777, "acc_norm_stderr,none": 0.013987721523687947}, "belebele_jpn_Jpan": {"alias": "belebele_jpn_Jpan", "acc,none": 0.26666666666666666, "acc_stderr,none": 0.01474875065147089, "acc_norm,none": 0.26666666666666666, "acc_norm_stderr,none": 0.01474875065147089}, "belebele_kor_Hang": {"alias": "belebele_kor_Hang", "acc,none": 0.25, "acc_stderr,none": 0.014441782171967503, "acc_norm,none": 0.25, "acc_norm_stderr,none": 0.014441782171967503}, "belebele_rus_Cyrl": {"alias": "belebele_rus_Cyrl", "acc,none": 0.2311111111111111, "acc_stderr,none": 0.014059256663218174, "acc_norm,none": 0.2311111111111111, "acc_norm_stderr,none": 0.014059256663218174}, "belebele_spa_Latn": {"alias": "belebele_spa_Latn", "acc,none": 0.23666666666666666, "acc_stderr,none": 0.014175742474391953, "acc_norm,none": 0.23666666666666666, "acc_norm_stderr,none": 0.014175742474391953}, "belebele_swh_Latn": {"alias": "belebele_swh_Latn", "acc,none": 0.2733333333333333, "acc_stderr,none": 0.01486394440941746, "acc_norm,none": 0.2733333333333333, "acc_norm_stderr,none": 0.01486394440941746}, "belebele_tel_Telu": {"alias": "belebele_tel_Telu", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884525, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884525}, "belebele_tha_Thai": {"alias": "belebele_tha_Thai", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.2311111111111111, "acc_stderr,none": 0.014059256663218186, "acc_norm,none": 0.2311111111111111, "acc_norm_stderr,none": 0.014059256663218186}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.23444444444444446, "acc_stderr,none": 0.014129554968110773, "acc_norm,none": 0.23444444444444446, "acc_norm_stderr,none": 0.014129554968110773}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.2621987651862179, "acc_stderr,none": 0.004389312748012148, "acc_norm,none": 0.26010754829715194, "acc_norm_stderr,none": 0.004377965074211625}, "include_base_44_arabic": {"acc,none": 0.22644927536231885, "acc_stderr,none": 0.017804456121442113, "alias": "include_base_44_arabic"}, "include_base_44_arabic_few_shot_og_arts_humanities": {"alias": " - include_base_44_arabic_few_shot_og_arts_humanities", "acc,none": 0.23809523809523808, "acc_stderr,none": 0.04176466758604901}, "include_base_44_arabic_few_shot_og_business_commerce": {"alias": " - include_base_44_arabic_few_shot_og_business_commerce", "acc,none": 0.18292682926829268, "acc_stderr,none": 0.04295628044484376}, "include_base_44_arabic_few_shot_og_driving_license": {"alias": " - include_base_44_arabic_few_shot_og_driving_license", "acc,none": 0.29523809523809524, "acc_stderr,none": 0.044729159560441434}, "include_base_44_arabic_few_shot_og_general_knowledge": {"alias": " - include_base_44_arabic_few_shot_og_general_knowledge", "acc,none": 0.24761904761904763, "acc_stderr,none": 0.0423247353205504}, "include_base_44_arabic_few_shot_og_social_science": {"alias": " - include_base_44_arabic_few_shot_og_social_science", "acc,none": 0.1619047619047619, "acc_stderr,none": 0.03612100523258919}, "include_base_44_arabic_few_shot_og_stem": {"alias": " - include_base_44_arabic_few_shot_og_stem", "acc,none": 0.22, "acc_stderr,none": 0.05917804336345139}, "include_base_44_bengali": {"acc,none": 0.29014598540145986, "acc_stderr,none": 0.01939486736491474, "alias": "include_base_44_bengali"}, "include_base_44_bengali_few_shot_og_arts_humanities": {"alias": " - include_base_44_bengali_few_shot_og_arts_humanities", "acc,none": 0.3132530120481928, "acc_stderr,none": 0.036108050180310235}, "include_base_44_bengali_few_shot_og_general_knowledge": {"alias": " - include_base_44_bengali_few_shot_og_general_knowledge", "acc,none": 0.3253012048192771, "acc_stderr,none": 0.036471685236832294}, "include_base_44_bengali_few_shot_og_professional_certification": {"alias": " - include_base_44_bengali_few_shot_og_professional_certification", "acc,none": 0.2469879518072289, "acc_stderr,none": 0.03357351982064536}, "include_base_44_bengali_few_shot_og_stem": {"alias": " - include_base_44_bengali_few_shot_og_stem", "acc,none": 0.24, "acc_stderr,none": 0.06101187572589323}, "include_base_44_chinese": {"acc,none": 0.24220183486238533, "acc_stderr,none": 0.018419237511494123, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.2676056338028169, "acc_stderr,none": 0.05291406220869697}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.2413793103448276, "acc_stderr,none": 0.046143776682648914}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.323943661971831, "acc_stderr,none": 0.05593416612923642}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.04993832453115033}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.22988505747126436, "acc_stderr,none": 0.045371581852507746}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.2112676056338028, "acc_stderr,none": 0.048790163593489484}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.2112676056338028, "acc_stderr,none": 0.04879016359348949}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.1875, "acc_stderr,none": 0.10077822185373188}, "include_base_44_french": {"acc,none": 0.2315035799522673, "acc_stderr,none": 0.02065182489356673, "alias": "include_base_44_french"}, "include_base_44_french_few_shot_og_arts_humanities": {"alias": " - include_base_44_french_few_shot_og_arts_humanities", "acc,none": 0.2556390977443609, "acc_stderr,none": 0.026796771802814458}, "include_base_44_french_few_shot_og_driving_license": {"alias": " - include_base_44_french_few_shot_og_driving_license", "acc,none": 0.1702127659574468, "acc_stderr,none": 0.055411578656325386}, "include_base_44_french_few_shot_og_health_oriented_education": {"alias": " - include_base_44_french_few_shot_og_health_oriented_education", "acc,none": 0.125, "acc_stderr,none": 0.08539125638299665}, "include_base_44_french_few_shot_og_social_science": {"alias": " - include_base_44_french_few_shot_og_social_science", "acc,none": 0.21621621621621623, "acc_stderr,none": 0.04818155789706758}, "include_base_44_french_few_shot_og_stem": {"alias": " - include_base_44_french_few_shot_og_stem", "acc,none": 0.1875, "acc_stderr,none": 0.10077822185373188}, "include_base_44_german": {"acc,none": 0.2446043165467626, "acc_stderr,none": 0.03528696432939441, "alias": "include_base_44_german"}, "include_base_44_german_few_shot_og_driving_license": {"alias": " - include_base_44_german_few_shot_og_driving_license", "acc,none": 0.5217391304347826, "acc_stderr,none": 0.10649955403405122}, "include_base_44_german_few_shot_og_social_science": {"alias": " - include_base_44_german_few_shot_og_social_science", "acc,none": 0.1978021978021978, "acc_stderr,none": 0.04198895203196222}, "include_base_44_german_few_shot_og_stem": {"alias": " - include_base_44_german_few_shot_og_stem", "acc,none": 0.16, "acc_stderr,none": 0.0748331477354788}, "include_base_44_hindi": {"acc,none": 0.24314442413162707, "acc_stderr,none": 0.018329899631546326, "alias": "include_base_44_hindi"}, "include_base_44_hindi_few_shot_og_applied_science": {"alias": " - include_base_44_hindi_few_shot_og_applied_science", "acc,none": 0.323943661971831, "acc_stderr,none": 0.055934166129236414}, "include_base_44_hindi_few_shot_og_arts_humanities": {"alias": " - include_base_44_hindi_few_shot_og_arts_humanities", "acc,none": 0.16666666666666666, "acc_stderr,none": 0.03823595564509367}, "include_base_44_hindi_few_shot_og_driving_license": {"alias": " - include_base_44_hindi_few_shot_og_driving_license", "acc,none": 0.19718309859154928, "acc_stderr,none": 0.04755476905953275}, "include_base_44_hindi_few_shot_og_general_knowledge": {"alias": " - include_base_44_hindi_few_shot_og_general_knowledge", "acc,none": 0.28169014084507044, "acc_stderr,none": 0.05376414171383253}, "include_base_44_hindi_few_shot_og_health_oriented_education": {"alias": " - include_base_44_hindi_few_shot_og_health_oriented_education", "acc,none": 0.28169014084507044, "acc_stderr,none": 0.05376414171383253}, "include_base_44_hindi_few_shot_og_professional_certification": {"alias": " - include_base_44_hindi_few_shot_og_professional_certification", "acc,none": 0.2676056338028169, "acc_stderr,none": 0.05291406220869697}, "include_base_44_hindi_few_shot_og_social_science": {"alias": " - include_base_44_hindi_few_shot_og_social_science", "acc,none": 0.23943661971830985, "acc_stderr,none": 0.05100514327793383}, "include_base_44_hindi_few_shot_og_stem": {"alias": " - include_base_44_hindi_few_shot_og_stem", "acc,none": 0.16, "acc_stderr,none": 0.0748331477354788}, "include_base_44_italian": {"acc,none": 0.2281021897810219, "acc_stderr,none": 0.017975769482451883, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.2857142857142857, "acc_stderr,none": 0.07747516350666293}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.20359281437125748, "acc_stderr,none": 0.031253219622833416}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.08333333333333333, "acc_stderr,none": 0.08333333333333333}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.23870967741935484, "acc_stderr,none": 0.03435182440245767}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.23952095808383234, "acc_stderr,none": 0.03312541599851862}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.25, "acc_stderr,none": 0.1305582419667734}, "include_base_44_japanese": {"acc,none": 0.2275449101796407, "acc_stderr,none": 0.0187695533694293, "alias": "include_base_44_japanese"}, "include_base_44_japanese_few_shot_og_driving_license": {"alias": " - include_base_44_japanese_few_shot_og_driving_license", "acc,none": 0.2222222222222222, "acc_stderr,none": 0.04199605255658083}, "include_base_44_japanese_few_shot_og_medical_license": {"alias": " - include_base_44_japanese_few_shot_og_medical_license", "acc,none": 0.24875621890547264, "acc_stderr,none": 0.030567675938916714}, "include_base_44_japanese_few_shot_og_professional_certification": {"alias": " - include_base_44_japanese_few_shot_og_professional_certification", "acc,none": 0.208955223880597, "acc_stderr,none": 0.028748298931728655}, "include_base_44_korean": {"acc,none": 0.234, "acc_stderr,none": 0.01896646797984599, "alias": "include_base_44_korean"}, "include_base_44_korean_few_shot_og_professional_certification": {"alias": " - include_base_44_korean_few_shot_og_professional_certification", "acc,none": 0.244, "acc_stderr,none": 0.027217995464553175}, "include_base_44_korean_few_shot_og_social_science": {"alias": " - include_base_44_korean_few_shot_og_social_science", "acc,none": 0.224, "acc_stderr,none": 0.02642136168734791}, "include_base_44_russian": {"acc,none": 0.25905797101449274, "acc_stderr,none": 0.018573145294929972, "alias": "include_base_44_russian"}, "include_base_44_russian_few_shot_og_applied_science": {"alias": " - include_base_44_russian_few_shot_og_applied_science", "acc,none": 0.18840579710144928, "acc_stderr,none": 0.04742006474057421}, "include_base_44_russian_few_shot_og_arts_humanities": {"alias": " - include_base_44_russian_few_shot_og_arts_humanities", "acc,none": 0.18840579710144928, "acc_stderr,none": 0.0474200647405742}, "include_base_44_russian_few_shot_og_business_commerce": {"alias": " - include_base_44_russian_few_shot_og_business_commerce", "acc,none": 0.30434782608695654, "acc_stderr,none": 0.0557990389495433}, "include_base_44_russian_few_shot_og_driving_license": {"alias": " - include_base_44_russian_few_shot_og_driving_license", "acc,none": 0.2857142857142857, "acc_stderr,none": 0.08694008849288351}, "include_base_44_russian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_russian_few_shot_og_health_oriented_education", "acc,none": 0.36470588235294116, "acc_stderr,none": 0.05251932770420085}, "include_base_44_russian_few_shot_og_marine_license": {"alias": " - include_base_44_russian_few_shot_og_marine_license", "acc,none": 0.3188405797101449, "acc_stderr,none": 0.05651408783764654}, "include_base_44_russian_few_shot_og_social_science": {"alias": " - include_base_44_russian_few_shot_og_social_science", "acc,none": 0.21739130434782608, "acc_stderr,none": 0.0500194557943937}, "include_base_44_russian_few_shot_og_stem": {"alias": " - include_base_44_russian_few_shot_og_stem", "acc,none": 0.2127659574468085, "acc_stderr,none": 0.04243864702455469}, "include_base_44_spanish": {"acc,none": 0.2690909090909091, "acc_stderr,none": 0.0189026283174948, "alias": "include_base_44_spanish"}, "include_base_44_spanish_few_shot_og_arts_humanities": {"alias": " - include_base_44_spanish_few_shot_og_arts_humanities", "acc,none": 0.228, "acc_stderr,none": 0.026587432487268466}, "include_base_44_spanish_few_shot_og_health_oriented_education": {"alias": " - include_base_44_spanish_few_shot_og_health_oriented_education", "acc,none": 0.32, "acc_stderr,none": 0.09521904571390466}, "include_base_44_spanish_few_shot_og_social_science": {"alias": " - include_base_44_spanish_few_shot_og_social_science", "acc,none": 0.296, "acc_stderr,none": 0.028928939388379638}, "include_base_44_spanish_few_shot_og_stem": {"alias": " - include_base_44_spanish_few_shot_og_stem", "acc,none": 0.36, "acc_stderr,none": 0.09797958971132713}, "include_base_44_telugu": {"acc,none": 0.24087591240875914, "acc_stderr,none": 0.018222516659056984, "alias": "include_base_44_telugu"}, "include_base_44_telugu_few_shot_og_applied_science": {"alias": " - include_base_44_telugu_few_shot_og_applied_science", "acc,none": 0.2469879518072289, "acc_stderr,none": 0.03357351982064536}, "include_base_44_telugu_few_shot_og_arts_humanities": {"alias": " - include_base_44_telugu_few_shot_og_arts_humanities", "acc,none": 0.24083769633507854, "acc_stderr,none": 0.031020782537814432}, "include_base_44_telugu_few_shot_og_social_science": {"alias": " - include_base_44_telugu_few_shot_og_social_science", "acc,none": 0.20481927710843373, "acc_stderr,none": 0.03141784291663926}, "include_base_44_telugu_few_shot_og_stem": {"alias": " - include_base_44_telugu_few_shot_og_stem", "acc,none": 0.44, "acc_stderr,none": 0.10132456102380442}, "include_base_44_turkish": {"acc,none": 0.28102189781021897, "acc_stderr,none": 0.019213037156152486, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.3192771084337349, "acc_stderr,none": 0.036293353299478595}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.25301204819277107, "acc_stderr,none": 0.033844291552331346}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.25301204819277107, "acc_stderr,none": 0.033844291552331346}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.34, "acc_stderr,none": 0.06767268161329719}, "piqa": {"alias": "piqa", "acc,none": 0.5473340587595212, "acc_stderr,none": 0.01161343165087287, "acc_norm,none": 0.5348204570184983, "acc_norm_stderr,none": 0.011637500993815848}, "xnli_ar": {"alias": "xnli_ar", "acc,none": 0.3301204819277108, "acc_stderr,none": 0.00942588499243073}, "xnli_de": {"alias": "xnli_de", "acc,none": 0.3365461847389558, "acc_stderr,none": 0.009471423054177135}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.3345381526104418, "acc_stderr,none": 0.009457404390939166}, "xnli_es": {"alias": "xnli_es", "acc,none": 0.3401606425702811, "acc_stderr,none": 0.009496174608136405}, "xnli_fr": {"alias": "xnli_fr", "acc,none": 0.3253012048192771, "acc_stderr,none": 0.00939042730309305}, "xnli_hi": {"alias": "xnli_hi", "acc,none": 0.3485943775100402, "acc_stderr,none": 0.009551542053301814}, "xnli_ru": {"alias": "xnli_ru", "acc,none": 0.3365461847389558, "acc_stderr,none": 0.009471423054177136}, "xnli_sw": {"alias": "xnli_sw", "acc,none": 0.3244979919678715, "acc_stderr,none": 0.009384407992489761}, "xnli_th": {"alias": "xnli_th", "acc,none": 0.3357429718875502, "acc_stderr,none": 0.009465838617337357}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.3281124497991968, "acc_stderr,none": 0.009411247685593385}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.3329317269076305, "acc_stderr,none": 0.009446051001358239}} +{"created_at": "2025-06-25T15:22:16.065621", "global_step": 1000, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.17491467576791808, "acc_stderr,none": 0.011101562501828215, "acc_norm,none": 0.21843003412969283, "acc_norm_stderr,none": 0.012074291605700992}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.3602693602693603, "acc_stderr,none": 0.009851002584732383, "acc_norm,none": 0.3484848484848485, "acc_norm_stderr,none": 0.009777377947106527}, "belebele_arb_Arab": {"alias": "belebele_arb_Arab", "acc,none": 0.22111111111111112, "acc_stderr,none": 0.01384086369985952, "acc_norm,none": 0.22111111111111112, "acc_norm_stderr,none": 0.01384086369985952}, "belebele_ben_Beng": {"alias": "belebele_ben_Beng", "acc,none": 0.22333333333333333, "acc_stderr,none": 0.013890384297198737, "acc_norm,none": 0.22333333333333333, "acc_norm_stderr,none": 0.013890384297198737}, "belebele_deu_Latn": {"alias": "belebele_deu_Latn", "acc,none": 0.22444444444444445, "acc_stderr,none": 0.013914930474237287, "acc_norm,none": 0.22444444444444445, "acc_norm_stderr,none": 0.013914930474237287}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.21555555555555556, "acc_stderr,none": 0.013714527832369683, "acc_norm,none": 0.21555555555555556, "acc_norm_stderr,none": 0.013714527832369683}, "belebele_fra_Latn": {"alias": "belebele_fra_Latn", "acc,none": 0.21666666666666667, "acc_stderr,none": 0.013740087830700168, "acc_norm,none": 0.21666666666666667, "acc_norm_stderr,none": 0.013740087830700168}, "belebele_hin_Deva": {"alias": "belebele_hin_Deva", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.0140117051588845, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.0140117051588845}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.22777777777777777, "acc_stderr,none": 0.013987721523687937, "acc_norm,none": 0.22777777777777777, "acc_norm_stderr,none": 0.013987721523687937}, "belebele_jpn_Jpan": {"alias": "belebele_jpn_Jpan", "acc,none": 0.22111111111111112, "acc_stderr,none": 0.01384086369985954, "acc_norm,none": 0.22111111111111112, "acc_norm_stderr,none": 0.01384086369985954}, "belebele_kor_Hang": {"alias": "belebele_kor_Hang", "acc,none": 0.21777777777777776, "acc_stderr,none": 0.013765500608039457, "acc_norm,none": 0.21777777777777776, "acc_norm_stderr,none": 0.013765500608039457}, "belebele_rus_Cyrl": {"alias": "belebele_rus_Cyrl", "acc,none": 0.22666666666666666, "acc_stderr,none": 0.013963598349030472, "acc_norm,none": 0.22666666666666666, "acc_norm_stderr,none": 0.013963598349030472}, "belebele_spa_Latn": {"alias": "belebele_spa_Latn", "acc,none": 0.21555555555555556, "acc_stderr,none": 0.01371452783236968, "acc_norm,none": 0.21555555555555556, "acc_norm_stderr,none": 0.01371452783236968}, "belebele_swh_Latn": {"alias": "belebele_swh_Latn", "acc,none": 0.22666666666666666, "acc_stderr,none": 0.01396359834903046, "acc_norm,none": 0.22666666666666666, "acc_norm_stderr,none": 0.01396359834903046}, "belebele_tel_Telu": {"alias": "belebele_tel_Telu", "acc,none": 0.2311111111111111, "acc_stderr,none": 0.014059256663218186, "acc_norm,none": 0.2311111111111111, "acc_norm_stderr,none": 0.014059256663218186}, "belebele_tha_Thai": {"alias": "belebele_tha_Thai", "acc,none": 0.2222222222222222, "acc_stderr,none": 0.01386569562657936, "acc_norm,none": 0.2222222222222222, "acc_norm_stderr,none": 0.01386569562657936}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.22, "acc_stderr,none": 0.013815887744596803, "acc_norm,none": 0.22, "acc_norm_stderr,none": 0.013815887744596803}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.23444444444444446, "acc_stderr,none": 0.01412955496811077, "acc_norm,none": 0.23444444444444446, "acc_norm_stderr,none": 0.01412955496811077}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.26618203545110536, "acc_stderr,none": 0.00441057343183763, "acc_norm,none": 0.26558454491137223, "acc_norm_stderr,none": 0.004407413723383406}, "include_base_44_arabic": {"acc,none": 0.34057971014492755, "acc_stderr,none": 0.020163616385340918, "alias": "include_base_44_arabic"}, "include_base_44_arabic_few_shot_og_arts_humanities": {"alias": " - include_base_44_arabic_few_shot_og_arts_humanities", "acc,none": 0.34285714285714286, "acc_stderr,none": 0.046544656229774474}, "include_base_44_arabic_few_shot_og_business_commerce": {"alias": " - include_base_44_arabic_few_shot_og_business_commerce", "acc,none": 0.32926829268292684, "acc_stderr,none": 0.05221639259619971}, "include_base_44_arabic_few_shot_og_driving_license": {"alias": " - include_base_44_arabic_few_shot_og_driving_license", "acc,none": 0.4, "acc_stderr,none": 0.04803844614152614}, "include_base_44_arabic_few_shot_og_general_knowledge": {"alias": " - include_base_44_arabic_few_shot_og_general_knowledge", "acc,none": 0.3333333333333333, "acc_stderr,none": 0.04622501635210239}, "include_base_44_arabic_few_shot_og_social_science": {"alias": " - include_base_44_arabic_few_shot_og_social_science", "acc,none": 0.2571428571428571, "acc_stderr,none": 0.04285714285714284}, "include_base_44_arabic_few_shot_og_stem": {"alias": " - include_base_44_arabic_few_shot_og_stem", "acc,none": 0.42, "acc_stderr,none": 0.07050835816716035}, "include_base_44_bengali": {"acc,none": 0.21532846715328466, "acc_stderr,none": 0.017604037167642254, "alias": "include_base_44_bengali"}, "include_base_44_bengali_few_shot_og_arts_humanities": {"alias": " - include_base_44_bengali_few_shot_og_arts_humanities", "acc,none": 0.1927710843373494, "acc_stderr,none": 0.030709824050565274}, "include_base_44_bengali_few_shot_og_general_knowledge": {"alias": " - include_base_44_bengali_few_shot_og_general_knowledge", "acc,none": 0.24096385542168675, "acc_stderr,none": 0.03329394119073529}, "include_base_44_bengali_few_shot_og_professional_certification": {"alias": " - include_base_44_bengali_few_shot_og_professional_certification", "acc,none": 0.21686746987951808, "acc_stderr,none": 0.03208284450356365}, "include_base_44_bengali_few_shot_og_stem": {"alias": " - include_base_44_bengali_few_shot_og_stem", "acc,none": 0.2, "acc_stderr,none": 0.057142857142857155}, "include_base_44_chinese": {"acc,none": 0.28440366972477066, "acc_stderr,none": 0.019350001577075553, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295697}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.2413793103448276, "acc_stderr,none": 0.046143776682648914}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.28169014084507044, "acc_stderr,none": 0.05376414171383255}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.30985915492957744, "acc_stderr,none": 0.055271596817883316}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.2413793103448276, "acc_stderr,none": 0.04614377668264891}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.323943661971831, "acc_stderr,none": 0.05593416612923642}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.30985915492957744, "acc_stderr,none": 0.055271596817883316}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.5, "acc_stderr,none": 0.12909944487358055}, "include_base_44_french": {"acc,none": 0.23389021479713604, "acc_stderr,none": 0.020728459122602638, "alias": "include_base_44_french"}, "include_base_44_french_few_shot_og_arts_humanities": {"alias": " - include_base_44_french_few_shot_og_arts_humanities", "acc,none": 0.24060150375939848, "acc_stderr,none": 0.02625796699570633}, "include_base_44_french_few_shot_og_driving_license": {"alias": " - include_base_44_french_few_shot_og_driving_license", "acc,none": 0.2127659574468085, "acc_stderr,none": 0.06034260964773521}, "include_base_44_french_few_shot_og_health_oriented_education": {"alias": " - include_base_44_french_few_shot_og_health_oriented_education", "acc,none": 0.0625, "acc_stderr,none": 0.0625}, "include_base_44_french_few_shot_og_social_science": {"alias": " - include_base_44_french_few_shot_og_social_science", "acc,none": 0.25675675675675674, "acc_stderr,none": 0.05112872883765946}, "include_base_44_french_few_shot_og_stem": {"alias": " - include_base_44_french_few_shot_og_stem", "acc,none": 0.25, "acc_stderr,none": 0.11180339887498948}, "include_base_44_german": {"acc,none": 0.26618705035971224, "acc_stderr,none": 0.037705717272049626, "alias": "include_base_44_german"}, "include_base_44_german_few_shot_og_driving_license": {"alias": " - include_base_44_german_few_shot_og_driving_license", "acc,none": 0.17391304347826086, "acc_stderr,none": 0.08081046758996394}, "include_base_44_german_few_shot_og_social_science": {"alias": " - include_base_44_german_few_shot_og_social_science", "acc,none": 0.27472527472527475, "acc_stderr,none": 0.04705213398778438}, "include_base_44_german_few_shot_og_stem": {"alias": " - include_base_44_german_few_shot_og_stem", "acc,none": 0.32, "acc_stderr,none": 0.09521904571390467}, "include_base_44_hindi": {"acc,none": 0.21755027422303475, "acc_stderr,none": 0.01763529539981861, "alias": "include_base_44_hindi"}, "include_base_44_hindi_few_shot_og_applied_science": {"alias": " - include_base_44_hindi_few_shot_og_applied_science", "acc,none": 0.30985915492957744, "acc_stderr,none": 0.055271596817883316}, "include_base_44_hindi_few_shot_og_arts_humanities": {"alias": " - include_base_44_hindi_few_shot_og_arts_humanities", "acc,none": 0.21875, "acc_stderr,none": 0.04241380578364351}, "include_base_44_hindi_few_shot_og_driving_license": {"alias": " - include_base_44_hindi_few_shot_og_driving_license", "acc,none": 0.16901408450704225, "acc_stderr,none": 0.04479290819909662}, "include_base_44_hindi_few_shot_og_general_knowledge": {"alias": " - include_base_44_hindi_few_shot_og_general_knowledge", "acc,none": 0.23943661971830985, "acc_stderr,none": 0.05100514327793383}, "include_base_44_hindi_few_shot_og_health_oriented_education": {"alias": " - include_base_44_hindi_few_shot_og_health_oriented_education", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295697}, "include_base_44_hindi_few_shot_og_professional_certification": {"alias": " - include_base_44_hindi_few_shot_og_professional_certification", "acc,none": 0.2112676056338028, "acc_stderr,none": 0.04879016359348948}, "include_base_44_hindi_few_shot_og_social_science": {"alias": " - include_base_44_hindi_few_shot_og_social_science", "acc,none": 0.15492957746478872, "acc_stderr,none": 0.04324785766640779}, "include_base_44_hindi_few_shot_og_stem": {"alias": " - include_base_44_hindi_few_shot_og_stem", "acc,none": 0.12, "acc_stderr,none": 0.06633249580710801}, "include_base_44_italian": {"acc,none": 0.2116788321167883, "acc_stderr,none": 0.017455834875732025, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.22857142857142856, "acc_stderr,none": 0.0720144043214405}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.18562874251497005, "acc_stderr,none": 0.030177258698794687}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.08333333333333333, "acc_stderr,none": 0.08333333333333333}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.1870967741935484, "acc_stderr,none": 0.031426224808489854}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.25748502994011974, "acc_stderr,none": 0.033937086485697064}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.3333333333333333, "acc_stderr,none": 0.14213381090374033}, "include_base_44_japanese": {"acc,none": 0.2155688622754491, "acc_stderr,none": 0.018417105142205632, "alias": "include_base_44_japanese"}, "include_base_44_japanese_few_shot_og_driving_license": {"alias": " - include_base_44_japanese_few_shot_og_driving_license", "acc,none": 0.1919191919191919, "acc_stderr,none": 0.039780804479336844}, "include_base_44_japanese_few_shot_og_medical_license": {"alias": " - include_base_44_japanese_few_shot_og_medical_license", "acc,none": 0.21393034825870647, "acc_stderr,none": 0.028996909693328913}, "include_base_44_japanese_few_shot_og_professional_certification": {"alias": " - include_base_44_japanese_few_shot_og_professional_certification", "acc,none": 0.22885572139303484, "acc_stderr,none": 0.02970528405677244}, "include_base_44_korean": {"acc,none": 0.236, "acc_stderr,none": 0.019027775090629962, "alias": "include_base_44_korean"}, "include_base_44_korean_few_shot_og_professional_certification": {"alias": " - include_base_44_korean_few_shot_og_professional_certification", "acc,none": 0.236, "acc_stderr,none": 0.026909337594953852}, "include_base_44_korean_few_shot_og_social_science": {"alias": " - include_base_44_korean_few_shot_og_social_science", "acc,none": 0.236, "acc_stderr,none": 0.026909337594953835}, "include_base_44_russian": {"acc,none": 0.2536231884057971, "acc_stderr,none": 0.018358055842720357, "alias": "include_base_44_russian"}, "include_base_44_russian_few_shot_og_applied_science": {"alias": " - include_base_44_russian_few_shot_og_applied_science", "acc,none": 0.21739130434782608, "acc_stderr,none": 0.0500194557943937}, "include_base_44_russian_few_shot_og_arts_humanities": {"alias": " - include_base_44_russian_few_shot_og_arts_humanities", "acc,none": 0.18840579710144928, "acc_stderr,none": 0.04742006474057422}, "include_base_44_russian_few_shot_og_business_commerce": {"alias": " - include_base_44_russian_few_shot_og_business_commerce", "acc,none": 0.17391304347826086, "acc_stderr,none": 0.045964707814863}, "include_base_44_russian_few_shot_og_driving_license": {"alias": " - include_base_44_russian_few_shot_og_driving_license", "acc,none": 0.5357142857142857, "acc_stderr,none": 0.09597925890831603}, "include_base_44_russian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_russian_few_shot_og_health_oriented_education", "acc,none": 0.3058823529411765, "acc_stderr,none": 0.05027523520585574}, "include_base_44_russian_few_shot_og_marine_license": {"alias": " - include_base_44_russian_few_shot_og_marine_license", "acc,none": 0.2463768115942029, "acc_stderr,none": 0.052254366311072324}, "include_base_44_russian_few_shot_og_social_science": {"alias": " - include_base_44_russian_few_shot_og_social_science", "acc,none": 0.2608695652173913, "acc_stderr,none": 0.05324977701702559}, "include_base_44_russian_few_shot_og_stem": {"alias": " - include_base_44_russian_few_shot_og_stem", "acc,none": 0.2553191489361702, "acc_stderr,none": 0.04521528920247246}, "include_base_44_spanish": {"acc,none": 0.28909090909090907, "acc_stderr,none": 0.019362466136964455, "alias": "include_base_44_spanish"}, "include_base_44_spanish_few_shot_og_arts_humanities": {"alias": " - include_base_44_spanish_few_shot_og_arts_humanities", "acc,none": 0.26, "acc_stderr,none": 0.027797315752644304}, "include_base_44_spanish_few_shot_og_health_oriented_education": {"alias": " - include_base_44_spanish_few_shot_og_health_oriented_education", "acc,none": 0.36, "acc_stderr,none": 0.09797958971132711}, "include_base_44_spanish_few_shot_og_social_science": {"alias": " - include_base_44_spanish_few_shot_og_social_science", "acc,none": 0.308, "acc_stderr,none": 0.029256928606501864}, "include_base_44_spanish_few_shot_og_stem": {"alias": " - include_base_44_spanish_few_shot_og_stem", "acc,none": 0.32, "acc_stderr,none": 0.09521904571390467}, "include_base_44_telugu": {"acc,none": 0.26094890510948904, "acc_stderr,none": 0.018824163385020864, "alias": "include_base_44_telugu"}, "include_base_44_telugu_few_shot_og_applied_science": {"alias": " - include_base_44_telugu_few_shot_og_applied_science", "acc,none": 0.2710843373493976, "acc_stderr,none": 0.034605799075530255}, "include_base_44_telugu_few_shot_og_arts_humanities": {"alias": " - include_base_44_telugu_few_shot_og_arts_humanities", "acc,none": 0.2513089005235602, "acc_stderr,none": 0.03146867590755503}, "include_base_44_telugu_few_shot_og_social_science": {"alias": " - include_base_44_telugu_few_shot_og_social_science", "acc,none": 0.26506024096385544, "acc_stderr,none": 0.03436024037944967}, "include_base_44_telugu_few_shot_og_stem": {"alias": " - include_base_44_telugu_few_shot_og_stem", "acc,none": 0.24, "acc_stderr,none": 0.08717797887081345}, "include_base_44_turkish": {"acc,none": 0.2846715328467153, "acc_stderr,none": 0.01933494896597911, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.29518072289156627, "acc_stderr,none": 0.035509201856896294}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.26506024096385544, "acc_stderr,none": 0.03436024037944967}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.28313253012048195, "acc_stderr,none": 0.03507295431370518}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.32, "acc_stderr,none": 0.06663945022680343}, "piqa": {"alias": "piqa", "acc,none": 0.5538628944504896, "acc_stderr,none": 0.011597936590301235, "acc_norm,none": 0.5473340587595212, "acc_norm_stderr,none": 0.011613431650872873}, "xnli_ar": {"alias": "xnli_ar", "acc,none": 0.334136546184739, "acc_stderr,none": 0.009454577602463626}, "xnli_de": {"alias": "xnli_de", "acc,none": 0.3353413654618474, "acc_stderr,none": 0.009463034891512706}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.3461847389558233, "acc_stderr,none": 0.009536061379898339}, "xnli_es": {"alias": "xnli_es", "acc,none": 0.3401606425702811, "acc_stderr,none": 0.009496174608136405}, "xnli_fr": {"alias": "xnli_fr", "acc,none": 0.3289156626506024, "acc_stderr,none": 0.009417125981806732}, "xnli_hi": {"alias": "xnli_hi", "acc,none": 0.3401606425702811, "acc_stderr,none": 0.009496174608136397}, "xnli_ru": {"alias": "xnli_ru", "acc,none": 0.3373493975903614, "acc_stderr,none": 0.009476976849778593}, "xnli_sw": {"alias": "xnli_sw", "acc,none": 0.3389558232931727, "acc_stderr,none": 0.00948799273220152}, "xnli_th": {"alias": "xnli_th", "acc,none": 0.3365461847389558, "acc_stderr,none": 0.009471423054177135}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.3248995983935743, "acc_stderr,none": 0.009387421581685765}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.3313253012048193, "acc_stderr,none": 0.009434574056101952}} +{"created_at": "2025-06-25T15:49:29.705970", "global_step": 1500, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.181740614334471, "acc_stderr,none": 0.011269198948880236, "acc_norm,none": 0.2354948805460751, "acc_norm_stderr,none": 0.012399451855004748}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.2849326599326599, "acc_stderr,none": 0.009262170695590662, "acc_norm,none": 0.2908249158249158, "acc_norm_stderr,none": 0.009318815921176657}, "belebele_arb_Arab": {"alias": "belebele_arb_Arab", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_ben_Beng": {"alias": "belebele_ben_Beng", "acc,none": 0.22666666666666666, "acc_stderr,none": 0.013963598349030463, "acc_norm,none": 0.22666666666666666, "acc_norm_stderr,none": 0.013963598349030463}, "belebele_deu_Latn": {"alias": "belebele_deu_Latn", "acc,none": 0.2411111111111111, "acc_stderr,none": 0.014266513886578921, "acc_norm,none": 0.2411111111111111, "acc_norm_stderr,none": 0.014266513886578921}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.22333333333333333, "acc_stderr,none": 0.0138903842971987, "acc_norm,none": 0.22333333333333333, "acc_norm_stderr,none": 0.0138903842971987}, "belebele_fra_Latn": {"alias": "belebele_fra_Latn", "acc,none": 0.2388888888888889, "acc_stderr,none": 0.014221393731276205, "acc_norm,none": 0.2388888888888889, "acc_norm_stderr,none": 0.014221393731276205}, "belebele_hin_Deva": {"alias": "belebele_hin_Deva", "acc,none": 0.2311111111111111, "acc_stderr,none": 0.014059256663218193, "acc_norm,none": 0.2311111111111111, "acc_norm_stderr,none": 0.014059256663218193}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.22, "acc_stderr,none": 0.013815887744596777, "acc_norm,none": 0.22, "acc_norm_stderr,none": 0.013815887744596777}, "belebele_jpn_Jpan": {"alias": "belebele_jpn_Jpan", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_kor_Hang": {"alias": "belebele_kor_Hang", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.01401170515888452, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.01401170515888452}, "belebele_rus_Cyrl": {"alias": "belebele_rus_Cyrl", "acc,none": 0.23333333333333334, "acc_stderr,none": 0.014106258477755677, "acc_norm,none": 0.23333333333333334, "acc_norm_stderr,none": 0.014106258477755677}, "belebele_spa_Latn": {"alias": "belebele_spa_Latn", "acc,none": 0.22777777777777777, "acc_stderr,none": 0.01398772152368797, "acc_norm,none": 0.22777777777777777, "acc_norm_stderr,none": 0.01398772152368797}, "belebele_swh_Latn": {"alias": "belebele_swh_Latn", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884528, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884528}, "belebele_tel_Telu": {"alias": "belebele_tel_Telu", "acc,none": 0.23, "acc_stderr,none": 0.014035549969945592, "acc_norm,none": 0.23, "acc_norm_stderr,none": 0.014035549969945592}, "belebele_tha_Thai": {"alias": "belebele_tha_Thai", "acc,none": 0.23222222222222222, "acc_stderr,none": 0.014082825936214571, "acc_norm,none": 0.23222222222222222, "acc_norm_stderr,none": 0.014082825936214571}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.2311111111111111, "acc_stderr,none": 0.014059256663218198, "acc_norm,none": 0.2311111111111111, "acc_norm_stderr,none": 0.014059256663218198}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.2577175861382195, "acc_stderr,none": 0.00436483800033562, "acc_norm,none": 0.2517426807408883, "acc_norm_stderr,none": 0.0043312717177738606}, "include_base_44_arabic": {"acc,none": 0.22826086956521738, "acc_stderr,none": 0.017821516362696545, "alias": "include_base_44_arabic"}, "include_base_44_arabic_few_shot_og_arts_humanities": {"alias": " - include_base_44_arabic_few_shot_og_arts_humanities", "acc,none": 0.24761904761904763, "acc_stderr,none": 0.04232473532055043}, "include_base_44_arabic_few_shot_og_business_commerce": {"alias": " - include_base_44_arabic_few_shot_og_business_commerce", "acc,none": 0.17073170731707318, "acc_stderr,none": 0.04180826184428596}, "include_base_44_arabic_few_shot_og_driving_license": {"alias": " - include_base_44_arabic_few_shot_og_driving_license", "acc,none": 0.3047619047619048, "acc_stderr,none": 0.0451367671816831}, "include_base_44_arabic_few_shot_og_general_knowledge": {"alias": " - include_base_44_arabic_few_shot_og_general_knowledge", "acc,none": 0.2571428571428571, "acc_stderr,none": 0.04285714285714284}, "include_base_44_arabic_few_shot_og_social_science": {"alias": " - include_base_44_arabic_few_shot_og_social_science", "acc,none": 0.1619047619047619, "acc_stderr,none": 0.036121005232589175}, "include_base_44_arabic_few_shot_og_stem": {"alias": " - include_base_44_arabic_few_shot_og_stem", "acc,none": 0.2, "acc_stderr,none": 0.05714285714285715}, "include_base_44_bengali": {"acc,none": 0.2791970802919708, "acc_stderr,none": 0.019188515511982175, "alias": "include_base_44_bengali"}, "include_base_44_bengali_few_shot_og_arts_humanities": {"alias": " - include_base_44_bengali_few_shot_og_arts_humanities", "acc,none": 0.28313253012048195, "acc_stderr,none": 0.03507295431370518}, "include_base_44_bengali_few_shot_og_general_knowledge": {"alias": " - include_base_44_bengali_few_shot_og_general_knowledge", "acc,none": 0.3192771084337349, "acc_stderr,none": 0.03629335329947859}, "include_base_44_bengali_few_shot_og_professional_certification": {"alias": " - include_base_44_bengali_few_shot_og_professional_certification", "acc,none": 0.2469879518072289, "acc_stderr,none": 0.03357351982064536}, "include_base_44_bengali_few_shot_og_stem": {"alias": " - include_base_44_bengali_few_shot_og_stem", "acc,none": 0.24, "acc_stderr,none": 0.06101187572589323}, "include_base_44_chinese": {"acc,none": 0.24036697247706423, "acc_stderr,none": 0.01837197548559227, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.04993832453115033}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.25287356321839083, "acc_stderr,none": 0.04687049503854671}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.30985915492957744, "acc_stderr,none": 0.055271596817883316}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295696}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.20689655172413793, "acc_stderr,none": 0.04368097459950702}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.051995626882956975}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.18309859154929578, "acc_stderr,none": 0.04622514734921428}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.25, "acc_stderr,none": 0.11180339887498948}, "include_base_44_french": {"acc,none": 0.24343675417661098, "acc_stderr,none": 0.020879902766189416, "alias": "include_base_44_french"}, "include_base_44_french_few_shot_og_arts_humanities": {"alias": " - include_base_44_french_few_shot_og_arts_humanities", "acc,none": 0.21052631578947367, "acc_stderr,none": 0.025043734285836165}, "include_base_44_french_few_shot_og_driving_license": {"alias": " - include_base_44_french_few_shot_og_driving_license", "acc,none": 0.3829787234042553, "acc_stderr,none": 0.07167347772513391}, "include_base_44_french_few_shot_og_health_oriented_education": {"alias": " - include_base_44_french_few_shot_og_health_oriented_education", "acc,none": 0.1875, "acc_stderr,none": 0.10077822185373188}, "include_base_44_french_few_shot_og_social_science": {"alias": " - include_base_44_french_few_shot_og_social_science", "acc,none": 0.25675675675675674, "acc_stderr,none": 0.051128728837659454}, "include_base_44_french_few_shot_og_stem": {"alias": " - include_base_44_french_few_shot_og_stem", "acc,none": 0.375, "acc_stderr,none": 0.125}, "include_base_44_german": {"acc,none": 0.2446043165467626, "acc_stderr,none": 0.03633502668584038, "alias": "include_base_44_german"}, "include_base_44_german_few_shot_og_driving_license": {"alias": " - include_base_44_german_few_shot_og_driving_license", "acc,none": 0.13043478260869565, "acc_stderr,none": 0.07180198468215394}, "include_base_44_german_few_shot_og_social_science": {"alias": " - include_base_44_german_few_shot_og_social_science", "acc,none": 0.2967032967032967, "acc_stderr,none": 0.04815143362682777}, "include_base_44_german_few_shot_og_stem": {"alias": " - include_base_44_german_few_shot_og_stem", "acc,none": 0.16, "acc_stderr,none": 0.0748331477354788}, "include_base_44_hindi": {"acc,none": 0.24314442413162707, "acc_stderr,none": 0.018404472188155673, "alias": "include_base_44_hindi"}, "include_base_44_hindi_few_shot_og_applied_science": {"alias": " - include_base_44_hindi_few_shot_og_applied_science", "acc,none": 0.2676056338028169, "acc_stderr,none": 0.05291406220869697}, "include_base_44_hindi_few_shot_og_arts_humanities": {"alias": " - include_base_44_hindi_few_shot_og_arts_humanities", "acc,none": 0.19791666666666666, "acc_stderr,none": 0.040877951813226225}, "include_base_44_hindi_few_shot_og_driving_license": {"alias": " - include_base_44_hindi_few_shot_og_driving_license", "acc,none": 0.2112676056338028, "acc_stderr,none": 0.0487901635934895}, "include_base_44_hindi_few_shot_og_general_knowledge": {"alias": " - include_base_44_hindi_few_shot_og_general_knowledge", "acc,none": 0.29577464788732394, "acc_stderr,none": 0.054549061214188996}, "include_base_44_hindi_few_shot_og_health_oriented_education": {"alias": " - include_base_44_hindi_few_shot_og_health_oriented_education", "acc,none": 0.28169014084507044, "acc_stderr,none": 0.05376414171383253}, "include_base_44_hindi_few_shot_og_professional_certification": {"alias": " - include_base_44_hindi_few_shot_og_professional_certification", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295697}, "include_base_44_hindi_few_shot_og_social_science": {"alias": " - include_base_44_hindi_few_shot_og_social_science", "acc,none": 0.23943661971830985, "acc_stderr,none": 0.05100514327793383}, "include_base_44_hindi_few_shot_og_stem": {"alias": " - include_base_44_hindi_few_shot_og_stem", "acc,none": 0.16, "acc_stderr,none": 0.0748331477354788}, "include_base_44_italian": {"acc,none": 0.2591240875912409, "acc_stderr,none": 0.018704896052409277, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.17142857142857143, "acc_stderr,none": 0.06463490595976273}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.2874251497005988, "acc_stderr,none": 0.035125586477990835}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.3333333333333333, "acc_stderr,none": 0.14213381090374033}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.3032258064516129, "acc_stderr,none": 0.03703980981843178}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.20359281437125748, "acc_stderr,none": 0.031253219622833416}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.25, "acc_stderr,none": 0.1305582419667734}, "include_base_44_japanese": {"acc,none": 0.26147704590818366, "acc_stderr,none": 0.019674770710381876, "alias": "include_base_44_japanese"}, "include_base_44_japanese_few_shot_og_driving_license": {"alias": " - include_base_44_japanese_few_shot_og_driving_license", "acc,none": 0.24242424242424243, "acc_stderr,none": 0.04329004329004327}, "include_base_44_japanese_few_shot_og_medical_license": {"alias": " - include_base_44_japanese_few_shot_og_medical_license", "acc,none": 0.2835820895522388, "acc_stderr,none": 0.03187187537919797}, "include_base_44_japanese_few_shot_og_professional_certification": {"alias": " - include_base_44_japanese_few_shot_og_professional_certification", "acc,none": 0.24875621890547264, "acc_stderr,none": 0.030567675938916714}, "include_base_44_korean": {"acc,none": 0.232, "acc_stderr,none": 0.01891175873560934, "alias": "include_base_44_korean"}, "include_base_44_korean_few_shot_og_professional_certification": {"alias": " - include_base_44_korean_few_shot_og_professional_certification", "acc,none": 0.24, "acc_stderr,none": 0.027065293652239003}, "include_base_44_korean_few_shot_og_social_science": {"alias": " - include_base_44_korean_few_shot_og_social_science", "acc,none": 0.224, "acc_stderr,none": 0.02642136168734791}, "include_base_44_russian": {"acc,none": 0.26811594202898553, "acc_stderr,none": 0.0187046497046539, "alias": "include_base_44_russian"}, "include_base_44_russian_few_shot_og_applied_science": {"alias": " - include_base_44_russian_few_shot_og_applied_science", "acc,none": 0.2028985507246377, "acc_stderr,none": 0.048768771474726615}, "include_base_44_russian_few_shot_og_arts_humanities": {"alias": " - include_base_44_russian_few_shot_og_arts_humanities", "acc,none": 0.2318840579710145, "acc_stderr,none": 0.05117930441535768}, "include_base_44_russian_few_shot_og_business_commerce": {"alias": " - include_base_44_russian_few_shot_og_business_commerce", "acc,none": 0.2753623188405797, "acc_stderr,none": 0.0541699276519132}, "include_base_44_russian_few_shot_og_driving_license": {"alias": " - include_base_44_russian_few_shot_og_driving_license", "acc,none": 0.21428571428571427, "acc_stderr,none": 0.07896725691322383}, "include_base_44_russian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_russian_few_shot_og_health_oriented_education", "acc,none": 0.38823529411764707, "acc_stderr,none": 0.05317409082203827}, "include_base_44_russian_few_shot_og_marine_license": {"alias": " - include_base_44_russian_few_shot_og_marine_license", "acc,none": 0.37681159420289856, "acc_stderr,none": 0.05876481248527006}, "include_base_44_russian_few_shot_og_social_science": {"alias": " - include_base_44_russian_few_shot_og_social_science", "acc,none": 0.2463768115942029, "acc_stderr,none": 0.05225436631107233}, "include_base_44_russian_few_shot_og_stem": {"alias": " - include_base_44_russian_few_shot_og_stem", "acc,none": 0.18085106382978725, "acc_stderr,none": 0.03991172470919534}, "include_base_44_spanish": {"acc,none": 0.2690909090909091, "acc_stderr,none": 0.018875479233219906, "alias": "include_base_44_spanish"}, "include_base_44_spanish_few_shot_og_arts_humanities": {"alias": " - include_base_44_spanish_few_shot_og_arts_humanities", "acc,none": 0.224, "acc_stderr,none": 0.026421361687347905}, "include_base_44_spanish_few_shot_og_health_oriented_education": {"alias": " - include_base_44_spanish_few_shot_og_health_oriented_education", "acc,none": 0.32, "acc_stderr,none": 0.09521904571390466}, "include_base_44_spanish_few_shot_og_social_science": {"alias": " - include_base_44_spanish_few_shot_og_social_science", "acc,none": 0.296, "acc_stderr,none": 0.028928939388379638}, "include_base_44_spanish_few_shot_og_stem": {"alias": " - include_base_44_spanish_few_shot_og_stem", "acc,none": 0.4, "acc_stderr,none": 0.1}, "include_base_44_telugu": {"acc,none": 0.2281021897810219, "acc_stderr,none": 0.017908632491873543, "alias": "include_base_44_telugu"}, "include_base_44_telugu_few_shot_og_applied_science": {"alias": " - include_base_44_telugu_few_shot_og_applied_science", "acc,none": 0.23493975903614459, "acc_stderr,none": 0.03300533186128922}, "include_base_44_telugu_few_shot_og_arts_humanities": {"alias": " - include_base_44_telugu_few_shot_og_arts_humanities", "acc,none": 0.24607329842931938, "acc_stderr,none": 0.031247840365749128}, "include_base_44_telugu_few_shot_og_social_science": {"alias": " - include_base_44_telugu_few_shot_og_social_science", "acc,none": 0.18072289156626506, "acc_stderr,none": 0.029955737855810138}, "include_base_44_telugu_few_shot_og_stem": {"alias": " - include_base_44_telugu_few_shot_og_stem", "acc,none": 0.36, "acc_stderr,none": 0.09797958971132713}, "include_base_44_turkish": {"acc,none": 0.28284671532846717, "acc_stderr,none": 0.019231686318235016, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.29518072289156627, "acc_stderr,none": 0.03550920185689629}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.25903614457831325, "acc_stderr,none": 0.034106466140718564}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.25903614457831325, "acc_stderr,none": 0.03410646614071857}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.4, "acc_stderr,none": 0.06998542122237651}, "piqa": {"alias": "piqa", "acc,none": 0.5331882480957563, "acc_stderr,none": 0.011640096923563131, "acc_norm,none": 0.515233949945593, "acc_norm_stderr,none": 0.011660408257153638}, "xnli_ar": {"alias": "xnli_ar", "acc,none": 0.3265060240963855, "acc_stderr,none": 0.00939939733596211}, "xnli_de": {"alias": "xnli_de", "acc,none": 0.3285140562248996, "acc_stderr,none": 0.00941419073413175}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.3293172690763052, "acc_stderr,none": 0.009420053435910418}, "xnli_es": {"alias": "xnli_es", "acc,none": 0.3313253012048193, "acc_stderr,none": 0.00943457405610196}, "xnli_fr": {"alias": "xnli_fr", "acc,none": 0.3257028112449799, "acc_stderr,none": 0.009393425164263995}, "xnli_hi": {"alias": "xnli_hi", "acc,none": 0.334136546184739, "acc_stderr,none": 0.009454577602463633}, "xnli_ru": {"alias": "xnli_ru", "acc,none": 0.3353413654618474, "acc_stderr,none": 0.009463034891512706}, "xnli_sw": {"alias": "xnli_sw", "acc,none": 0.3281124497991968, "acc_stderr,none": 0.009411247685593387}, "xnli_th": {"alias": "xnli_th", "acc,none": 0.3473895582329317, "acc_stderr,none": 0.0095438354093349}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.3329317269076305, "acc_stderr,none": 0.009446051001358237}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.3329317269076305, "acc_stderr,none": 0.009446051001358226}} +{"created_at": "2025-06-25T16:20:40.020404", "global_step": 2000, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.2090443686006826, "acc_stderr,none": 0.011882746987406451, "acc_norm,none": 0.2593856655290102, "acc_norm_stderr,none": 0.012808273573927092}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.26052188552188554, "acc_stderr,none": 0.00900643589033659, "acc_norm,none": 0.26725589225589225, "acc_norm_stderr,none": 0.00908046324601747}, "belebele_arb_Arab": {"alias": "belebele_arb_Arab", "acc,none": 0.23333333333333334, "acc_stderr,none": 0.014106258477755675, "acc_norm,none": 0.23333333333333334, "acc_norm_stderr,none": 0.014106258477755675}, "belebele_ben_Beng": {"alias": "belebele_ben_Beng", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884506, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884506}, "belebele_deu_Latn": {"alias": "belebele_deu_Latn", "acc,none": 0.21888888888888888, "acc_stderr,none": 0.01379076697825695, "acc_norm,none": 0.21888888888888888, "acc_norm_stderr,none": 0.01379076697825695}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.23444444444444446, "acc_stderr,none": 0.014129554968110771, "acc_norm,none": 0.23444444444444446, "acc_norm_stderr,none": 0.014129554968110771}, "belebele_fra_Latn": {"alias": "belebele_fra_Latn", "acc,none": 0.2311111111111111, "acc_stderr,none": 0.014059256663218186, "acc_norm,none": 0.2311111111111111, "acc_norm_stderr,none": 0.014059256663218186}, "belebele_hin_Deva": {"alias": "belebele_hin_Deva", "acc,none": 0.22444444444444445, "acc_stderr,none": 0.013914930474237287, "acc_norm,none": 0.22444444444444445, "acc_norm_stderr,none": 0.013914930474237287}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.23777777777777778, "acc_stderr,none": 0.014198634809308193, "acc_norm,none": 0.23777777777777778, "acc_norm_stderr,none": 0.014198634809308193}, "belebele_jpn_Jpan": {"alias": "belebele_jpn_Jpan", "acc,none": 0.23666666666666666, "acc_stderr,none": 0.014175742474391954, "acc_norm,none": 0.23666666666666666, "acc_norm_stderr,none": 0.014175742474391954}, "belebele_kor_Hang": {"alias": "belebele_kor_Hang", "acc,none": 0.2311111111111111, "acc_stderr,none": 0.01405925666321818, "acc_norm,none": 0.2311111111111111, "acc_norm_stderr,none": 0.01405925666321818}, "belebele_rus_Cyrl": {"alias": "belebele_rus_Cyrl", "acc,none": 0.2311111111111111, "acc_stderr,none": 0.014059256663218167, "acc_norm,none": 0.2311111111111111, "acc_norm_stderr,none": 0.014059256663218167}, "belebele_spa_Latn": {"alias": "belebele_spa_Latn", "acc,none": 0.24555555555555555, "acc_stderr,none": 0.014355180865342971, "acc_norm,none": 0.24555555555555555, "acc_norm_stderr,none": 0.014355180865342971}, "belebele_swh_Latn": {"alias": "belebele_swh_Latn", "acc,none": 0.2311111111111111, "acc_stderr,none": 0.014059256663218188, "acc_norm,none": 0.2311111111111111, "acc_norm_stderr,none": 0.014059256663218188}, "belebele_tel_Telu": {"alias": "belebele_tel_Telu", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884526, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884526}, "belebele_tha_Thai": {"alias": "belebele_tha_Thai", "acc,none": 0.2311111111111111, "acc_stderr,none": 0.0140592566632182, "acc_norm,none": 0.2311111111111111, "acc_norm_stderr,none": 0.0140592566632182}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.22555555555555556, "acc_stderr,none": 0.013939334910458125, "acc_norm,none": 0.22555555555555556, "acc_norm_stderr,none": 0.013939334910458125}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.23333333333333334, "acc_stderr,none": 0.014106258477755675, "acc_norm,none": 0.23333333333333334, "acc_norm_stderr,none": 0.014106258477755675}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.25791674965146383, "acc_stderr,none": 0.004365938407209613, "acc_norm,none": 0.25582553276239794, "acc_norm_stderr,none": 0.004354325017137536}, "include_base_44_arabic": {"acc,none": 0.2318840579710145, "acc_stderr,none": 0.017989172672953205, "alias": "include_base_44_arabic"}, "include_base_44_arabic_few_shot_og_arts_humanities": {"alias": " - include_base_44_arabic_few_shot_og_arts_humanities", "acc,none": 0.2571428571428571, "acc_stderr,none": 0.04285714285714284}, "include_base_44_arabic_few_shot_og_business_commerce": {"alias": " - include_base_44_arabic_few_shot_og_business_commerce", "acc,none": 0.17073170731707318, "acc_stderr,none": 0.04180826184428596}, "include_base_44_arabic_few_shot_og_driving_license": {"alias": " - include_base_44_arabic_few_shot_og_driving_license", "acc,none": 0.2857142857142857, "acc_stderr,none": 0.04429811949614584}, "include_base_44_arabic_few_shot_og_general_knowledge": {"alias": " - include_base_44_arabic_few_shot_og_general_knowledge", "acc,none": 0.23809523809523808, "acc_stderr,none": 0.041764667586049034}, "include_base_44_arabic_few_shot_og_social_science": {"alias": " - include_base_44_arabic_few_shot_og_social_science", "acc,none": 0.20952380952380953, "acc_stderr,none": 0.039906571509931876}, "include_base_44_arabic_few_shot_og_stem": {"alias": " - include_base_44_arabic_few_shot_og_stem", "acc,none": 0.2, "acc_stderr,none": 0.05714285714285715}, "include_base_44_bengali": {"acc,none": 0.28102189781021897, "acc_stderr,none": 0.01921219573015146, "alias": "include_base_44_bengali"}, "include_base_44_bengali_few_shot_og_arts_humanities": {"alias": " - include_base_44_bengali_few_shot_og_arts_humanities", "acc,none": 0.2891566265060241, "acc_stderr,none": 0.03529486801511115}, "include_base_44_bengali_few_shot_og_general_knowledge": {"alias": " - include_base_44_bengali_few_shot_og_general_knowledge", "acc,none": 0.3253012048192771, "acc_stderr,none": 0.036471685236832294}, "include_base_44_bengali_few_shot_og_professional_certification": {"alias": " - include_base_44_bengali_few_shot_og_professional_certification", "acc,none": 0.24096385542168675, "acc_stderr,none": 0.0332939411907353}, "include_base_44_bengali_few_shot_og_stem": {"alias": " - include_base_44_bengali_few_shot_og_stem", "acc,none": 0.24, "acc_stderr,none": 0.06101187572589323}, "include_base_44_chinese": {"acc,none": 0.24220183486238533, "acc_stderr,none": 0.01845830580234921, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.2676056338028169, "acc_stderr,none": 0.05291406220869697}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.2413793103448276, "acc_stderr,none": 0.046143776682648914}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.28169014084507044, "acc_stderr,none": 0.05376414171383253}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.23943661971830985, "acc_stderr,none": 0.05100514327793383}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.22988505747126436, "acc_stderr,none": 0.045371581852507746}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.23943661971830985, "acc_stderr,none": 0.05100514327793383}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.19718309859154928, "acc_stderr,none": 0.04755476905953276}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.25, "acc_stderr,none": 0.11180339887498948}, "include_base_44_french": {"acc,none": 0.2744630071599045, "acc_stderr,none": 0.021848029244846145, "alias": "include_base_44_french"}, "include_base_44_french_few_shot_og_arts_humanities": {"alias": " - include_base_44_french_few_shot_og_arts_humanities", "acc,none": 0.2631578947368421, "acc_stderr,none": 0.027050324671055014}, "include_base_44_french_few_shot_og_driving_license": {"alias": " - include_base_44_french_few_shot_og_driving_license", "acc,none": 0.3617021276595745, "acc_stderr,none": 0.07084485475872632}, "include_base_44_french_few_shot_og_health_oriented_education": {"alias": " - include_base_44_french_few_shot_og_health_oriented_education", "acc,none": 0.25, "acc_stderr,none": 0.11180339887498948}, "include_base_44_french_few_shot_og_social_science": {"alias": " - include_base_44_french_few_shot_og_social_science", "acc,none": 0.24324324324324326, "acc_stderr,none": 0.050215421942054}, "include_base_44_french_few_shot_og_stem": {"alias": " - include_base_44_french_few_shot_og_stem", "acc,none": 0.375, "acc_stderr,none": 0.125}, "include_base_44_german": {"acc,none": 0.2805755395683453, "acc_stderr,none": 0.03738464350280541, "alias": "include_base_44_german"}, "include_base_44_german_few_shot_og_driving_license": {"alias": " - include_base_44_german_few_shot_og_driving_license", "acc,none": 0.5217391304347826, "acc_stderr,none": 0.10649955403405122}, "include_base_44_german_few_shot_og_social_science": {"alias": " - include_base_44_german_few_shot_og_social_science", "acc,none": 0.24175824175824176, "acc_stderr,none": 0.04513082148355003}, "include_base_44_german_few_shot_og_stem": {"alias": " - include_base_44_german_few_shot_og_stem", "acc,none": 0.2, "acc_stderr,none": 0.08164965809277261}, "include_base_44_hindi": {"acc,none": 0.2376599634369287, "acc_stderr,none": 0.01826947768945123, "alias": "include_base_44_hindi"}, "include_base_44_hindi_few_shot_og_applied_science": {"alias": " - include_base_44_hindi_few_shot_og_applied_science", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295698}, "include_base_44_hindi_few_shot_og_arts_humanities": {"alias": " - include_base_44_hindi_few_shot_og_arts_humanities", "acc,none": 0.1875, "acc_stderr,none": 0.040045204719824215}, "include_base_44_hindi_few_shot_og_driving_license": {"alias": " - include_base_44_hindi_few_shot_og_driving_license", "acc,none": 0.2112676056338028, "acc_stderr,none": 0.0487901635934895}, "include_base_44_hindi_few_shot_og_general_knowledge": {"alias": " - include_base_44_hindi_few_shot_og_general_knowledge", "acc,none": 0.28169014084507044, "acc_stderr,none": 0.05376414171383253}, "include_base_44_hindi_few_shot_og_health_oriented_education": {"alias": " - include_base_44_hindi_few_shot_og_health_oriented_education", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295696}, "include_base_44_hindi_few_shot_og_professional_certification": {"alias": " - include_base_44_hindi_few_shot_og_professional_certification", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295697}, "include_base_44_hindi_few_shot_og_social_science": {"alias": " - include_base_44_hindi_few_shot_og_social_science", "acc,none": 0.2676056338028169, "acc_stderr,none": 0.05291406220869697}, "include_base_44_hindi_few_shot_og_stem": {"alias": " - include_base_44_hindi_few_shot_og_stem", "acc,none": 0.16, "acc_stderr,none": 0.0748331477354788}, "include_base_44_italian": {"acc,none": 0.23905109489051096, "acc_stderr,none": 0.018297651634444044, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.22857142857142856, "acc_stderr,none": 0.0720144043214405}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.25149700598802394, "acc_stderr,none": 0.03367511880168704}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.16666666666666666, "acc_stderr,none": 0.11236664374387369}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.25806451612903225, "acc_stderr,none": 0.03526036935484004}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.2155688622754491, "acc_stderr,none": 0.031916582528798025}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.25, "acc_stderr,none": 0.1305582419667734}, "include_base_44_japanese": {"acc,none": 0.2634730538922156, "acc_stderr,none": 0.019718303395166017, "alias": "include_base_44_japanese"}, "include_base_44_japanese_few_shot_og_driving_license": {"alias": " - include_base_44_japanese_few_shot_og_driving_license", "acc,none": 0.24242424242424243, "acc_stderr,none": 0.04329004329004327}, "include_base_44_japanese_few_shot_og_medical_license": {"alias": " - include_base_44_japanese_few_shot_og_medical_license", "acc,none": 0.2885572139303483, "acc_stderr,none": 0.03203841040213322}, "include_base_44_japanese_few_shot_og_professional_certification": {"alias": " - include_base_44_japanese_few_shot_og_professional_certification", "acc,none": 0.24875621890547264, "acc_stderr,none": 0.030567675938916718}, "include_base_44_korean": {"acc,none": 0.234, "acc_stderr,none": 0.01896985560532149, "alias": "include_base_44_korean"}, "include_base_44_korean_few_shot_og_professional_certification": {"alias": " - include_base_44_korean_few_shot_og_professional_certification", "acc,none": 0.24, "acc_stderr,none": 0.027065293652239003}, "include_base_44_korean_few_shot_og_social_science": {"alias": " - include_base_44_korean_few_shot_og_social_science", "acc,none": 0.228, "acc_stderr,none": 0.026587432487268484}, "include_base_44_russian": {"acc,none": 0.2717391304347826, "acc_stderr,none": 0.01880668963538939, "alias": "include_base_44_russian"}, "include_base_44_russian_few_shot_og_applied_science": {"alias": " - include_base_44_russian_few_shot_og_applied_science", "acc,none": 0.21739130434782608, "acc_stderr,none": 0.0500194557943937}, "include_base_44_russian_few_shot_og_arts_humanities": {"alias": " - include_base_44_russian_few_shot_og_arts_humanities", "acc,none": 0.21739130434782608, "acc_stderr,none": 0.0500194557943937}, "include_base_44_russian_few_shot_og_business_commerce": {"alias": " - include_base_44_russian_few_shot_og_business_commerce", "acc,none": 0.2608695652173913, "acc_stderr,none": 0.05324977701702559}, "include_base_44_russian_few_shot_og_driving_license": {"alias": " - include_base_44_russian_few_shot_og_driving_license", "acc,none": 0.21428571428571427, "acc_stderr,none": 0.07896725691322383}, "include_base_44_russian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_russian_few_shot_og_health_oriented_education", "acc,none": 0.38823529411764707, "acc_stderr,none": 0.05317409082203827}, "include_base_44_russian_few_shot_og_marine_license": {"alias": " - include_base_44_russian_few_shot_og_marine_license", "acc,none": 0.37681159420289856, "acc_stderr,none": 0.05876481248527006}, "include_base_44_russian_few_shot_og_social_science": {"alias": " - include_base_44_russian_few_shot_og_social_science", "acc,none": 0.2753623188405797, "acc_stderr,none": 0.05416992765191318}, "include_base_44_russian_few_shot_og_stem": {"alias": " - include_base_44_russian_few_shot_og_stem", "acc,none": 0.19148936170212766, "acc_stderr,none": 0.040801273706799875}, "include_base_44_spanish": {"acc,none": 0.2690909090909091, "acc_stderr,none": 0.01891671635993305, "alias": "include_base_44_spanish"}, "include_base_44_spanish_few_shot_og_arts_humanities": {"alias": " - include_base_44_spanish_few_shot_og_arts_humanities", "acc,none": 0.264, "acc_stderr,none": 0.027934518957690908}, "include_base_44_spanish_few_shot_og_health_oriented_education": {"alias": " - include_base_44_spanish_few_shot_og_health_oriented_education", "acc,none": 0.4, "acc_stderr,none": 0.1}, "include_base_44_spanish_few_shot_og_social_science": {"alias": " - include_base_44_spanish_few_shot_og_social_science", "acc,none": 0.252, "acc_stderr,none": 0.02751385193303135}, "include_base_44_spanish_few_shot_og_stem": {"alias": " - include_base_44_spanish_few_shot_og_stem", "acc,none": 0.36, "acc_stderr,none": 0.09797958971132713}, "include_base_44_telugu": {"acc,none": 0.2427007299270073, "acc_stderr,none": 0.01831742278461218, "alias": "include_base_44_telugu"}, "include_base_44_telugu_few_shot_og_applied_science": {"alias": " - include_base_44_telugu_few_shot_og_applied_science", "acc,none": 0.23493975903614459, "acc_stderr,none": 0.03300533186128922}, "include_base_44_telugu_few_shot_og_arts_humanities": {"alias": " - include_base_44_telugu_few_shot_og_arts_humanities", "acc,none": 0.24607329842931938, "acc_stderr,none": 0.03124784036574912}, "include_base_44_telugu_few_shot_og_social_science": {"alias": " - include_base_44_telugu_few_shot_og_social_science", "acc,none": 0.22289156626506024, "acc_stderr,none": 0.03240004825594688}, "include_base_44_telugu_few_shot_og_stem": {"alias": " - include_base_44_telugu_few_shot_og_stem", "acc,none": 0.4, "acc_stderr,none": 0.1}, "include_base_44_turkish": {"acc,none": 0.2427007299270073, "acc_stderr,none": 0.018357569930625466, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.23493975903614459, "acc_stderr,none": 0.03300533186128922}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.25301204819277107, "acc_stderr,none": 0.03384429155233135}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.22289156626506024, "acc_stderr,none": 0.032400048255946896}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.3, "acc_stderr,none": 0.06546536707079771}, "piqa": {"alias": "piqa", "acc,none": 0.529379760609358, "acc_stderr,none": 0.011645667565050864, "acc_norm,none": 0.49891186071817195, "acc_norm_stderr,none": 0.011665796539540878}, "xnli_ar": {"alias": "xnli_ar", "acc,none": 0.3321285140562249, "acc_stderr,none": 0.009440328001240637}, "xnli_de": {"alias": "xnli_de", "acc,none": 0.334136546184739, "acc_stderr,none": 0.009454577602463625}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.3413654618473896, "acc_stderr,none": 0.00950428807888022}, "xnli_es": {"alias": "xnli_es", "acc,none": 0.3301204819277108, "acc_stderr,none": 0.009425884992430706}, "xnli_fr": {"alias": "xnli_fr", "acc,none": 0.3473895582329317, "acc_stderr,none": 0.0095438354093349}, "xnli_hi": {"alias": "xnli_hi", "acc,none": 0.3369477911646586, "acc_stderr,none": 0.009474203778757717}, "xnli_ru": {"alias": "xnli_ru", "acc,none": 0.3365461847389558, "acc_stderr,none": 0.00947142305417713}, "xnli_sw": {"alias": "xnli_sw", "acc,none": 0.3401606425702811, "acc_stderr,none": 0.009496174608136395}, "xnli_th": {"alias": "xnli_th", "acc,none": 0.3385542168674699, "acc_stderr,none": 0.009485250208516883}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.3273092369477912, "acc_stderr,none": 0.009405338156614929}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.3381526104417671, "acc_stderr,none": 0.009482500057981024}} +{"created_at": "2025-06-25T16:43:16.599707", "global_step": 2500, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.19795221843003413, "acc_stderr,none": 0.011643990971573401, "acc_norm,none": 0.2431740614334471, "acc_norm_stderr,none": 0.012536554144587089}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.26725589225589225, "acc_stderr,none": 0.00908046324601747, "acc_norm,none": 0.2815656565656566, "acc_norm_stderr,none": 0.009228934764519288}, "belebele_arb_Arab": {"alias": "belebele_arb_Arab", "acc,none": 0.23444444444444446, "acc_stderr,none": 0.014129554968110773, "acc_norm,none": 0.23444444444444446, "acc_norm_stderr,none": 0.014129554968110773}, "belebele_ben_Beng": {"alias": "belebele_ben_Beng", "acc,none": 0.2388888888888889, "acc_stderr,none": 0.014221393731276195, "acc_norm,none": 0.2388888888888889, "acc_norm_stderr,none": 0.014221393731276195}, "belebele_deu_Latn": {"alias": "belebele_deu_Latn", "acc,none": 0.22777777777777777, "acc_stderr,none": 0.013987721523687956, "acc_norm,none": 0.22777777777777777, "acc_norm_stderr,none": 0.013987721523687956}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_fra_Latn": {"alias": "belebele_fra_Latn", "acc,none": 0.22333333333333333, "acc_stderr,none": 0.013890384297198709, "acc_norm,none": 0.22333333333333333, "acc_norm_stderr,none": 0.013890384297198709}, "belebele_hin_Deva": {"alias": "belebele_hin_Deva", "acc,none": 0.23, "acc_stderr,none": 0.014035549969945592, "acc_norm,none": 0.23, "acc_norm_stderr,none": 0.014035549969945592}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.22111111111111112, "acc_stderr,none": 0.013840863699859556, "acc_norm,none": 0.22111111111111112, "acc_norm_stderr,none": 0.013840863699859556}, "belebele_jpn_Jpan": {"alias": "belebele_jpn_Jpan", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_kor_Hang": {"alias": "belebele_kor_Hang", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.01401170515888452, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.01401170515888452}, "belebele_rus_Cyrl": {"alias": "belebele_rus_Cyrl", "acc,none": 0.22333333333333333, "acc_stderr,none": 0.013890384297198733, "acc_norm,none": 0.22333333333333333, "acc_norm_stderr,none": 0.013890384297198733}, "belebele_spa_Latn": {"alias": "belebele_spa_Latn", "acc,none": 0.22666666666666666, "acc_stderr,none": 0.013963598349030491, "acc_norm,none": 0.22666666666666666, "acc_norm_stderr,none": 0.013963598349030491}, "belebele_swh_Latn": {"alias": "belebele_swh_Latn", "acc,none": 0.23222222222222222, "acc_stderr,none": 0.014082825936214571, "acc_norm,none": 0.23222222222222222, "acc_norm_stderr,none": 0.014082825936214571}, "belebele_tel_Telu": {"alias": "belebele_tel_Telu", "acc,none": 0.24222222222222223, "acc_stderr,none": 0.014288876375699814, "acc_norm,none": 0.24222222222222223, "acc_norm_stderr,none": 0.014288876375699814}, "belebele_tha_Thai": {"alias": "belebele_tha_Thai", "acc,none": 0.2311111111111111, "acc_stderr,none": 0.014059256663218202, "acc_norm,none": 0.2311111111111111, "acc_norm_stderr,none": 0.014059256663218202}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.23, "acc_stderr,none": 0.014035549969945592, "acc_norm,none": 0.23, "acc_norm_stderr,none": 0.014035549969945592}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.23, "acc_stderr,none": 0.014035549969945592, "acc_norm,none": 0.23, "acc_norm_stderr,none": 0.014035549969945592}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.26010754829715194, "acc_stderr,none": 0.004377965074211625, "acc_norm,none": 0.252141007767377, "acc_norm_stderr,none": 0.004333543083293471}, "include_base_44_arabic": {"acc,none": 0.22282608695652173, "acc_stderr,none": 0.01765188573272388, "alias": "include_base_44_arabic"}, "include_base_44_arabic_few_shot_og_arts_humanities": {"alias": " - include_base_44_arabic_few_shot_og_arts_humanities", "acc,none": 0.23809523809523808, "acc_stderr,none": 0.041764667586049006}, "include_base_44_arabic_few_shot_og_business_commerce": {"alias": " - include_base_44_arabic_few_shot_og_business_commerce", "acc,none": 0.15853658536585366, "acc_stderr,none": 0.040582599273365695}, "include_base_44_arabic_few_shot_og_driving_license": {"alias": " - include_base_44_arabic_few_shot_og_driving_license", "acc,none": 0.3142857142857143, "acc_stderr,none": 0.045521571818039494}, "include_base_44_arabic_few_shot_og_general_knowledge": {"alias": " - include_base_44_arabic_few_shot_og_general_knowledge", "acc,none": 0.23809523809523808, "acc_stderr,none": 0.041764667586049034}, "include_base_44_arabic_few_shot_og_social_science": {"alias": " - include_base_44_arabic_few_shot_og_social_science", "acc,none": 0.1619047619047619, "acc_stderr,none": 0.036121005232589175}, "include_base_44_arabic_few_shot_og_stem": {"alias": " - include_base_44_arabic_few_shot_og_stem", "acc,none": 0.2, "acc_stderr,none": 0.057142857142857134}, "include_base_44_bengali": {"acc,none": 0.29014598540145986, "acc_stderr,none": 0.01939486736491474, "alias": "include_base_44_bengali"}, "include_base_44_bengali_few_shot_og_arts_humanities": {"alias": " - include_base_44_bengali_few_shot_og_arts_humanities", "acc,none": 0.3132530120481928, "acc_stderr,none": 0.036108050180310235}, "include_base_44_bengali_few_shot_og_general_knowledge": {"alias": " - include_base_44_bengali_few_shot_og_general_knowledge", "acc,none": 0.3253012048192771, "acc_stderr,none": 0.036471685236832294}, "include_base_44_bengali_few_shot_og_professional_certification": {"alias": " - include_base_44_bengali_few_shot_og_professional_certification", "acc,none": 0.2469879518072289, "acc_stderr,none": 0.03357351982064536}, "include_base_44_bengali_few_shot_og_stem": {"alias": " - include_base_44_bengali_few_shot_og_stem", "acc,none": 0.24, "acc_stderr,none": 0.06101187572589323}, "include_base_44_chinese": {"acc,none": 0.24587155963302754, "acc_stderr,none": 0.018517539247711934, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.049938324531150324}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.25287356321839083, "acc_stderr,none": 0.04687049503854671}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.323943661971831, "acc_stderr,none": 0.05593416612923642}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.23943661971830985, "acc_stderr,none": 0.05100514327793383}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.2413793103448276, "acc_stderr,none": 0.04614377668264891}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.051995626882956975}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.19718309859154928, "acc_stderr,none": 0.04755476905953276}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.1875, "acc_stderr,none": 0.10077822185373188}, "include_base_44_french": {"acc,none": 0.24105011933174225, "acc_stderr,none": 0.020839188490481865, "alias": "include_base_44_french"}, "include_base_44_french_few_shot_og_arts_humanities": {"alias": " - include_base_44_french_few_shot_og_arts_humanities", "acc,none": 0.21804511278195488, "acc_stderr,none": 0.025365363516347498}, "include_base_44_french_few_shot_og_driving_license": {"alias": " - include_base_44_french_few_shot_og_driving_license", "acc,none": 0.3829787234042553, "acc_stderr,none": 0.07167347772513392}, "include_base_44_french_few_shot_og_health_oriented_education": {"alias": " - include_base_44_french_few_shot_og_health_oriented_education", "acc,none": 0.125, "acc_stderr,none": 0.08539125638299665}, "include_base_44_french_few_shot_og_social_science": {"alias": " - include_base_44_french_few_shot_og_social_science", "acc,none": 0.25675675675675674, "acc_stderr,none": 0.051128728837659454}, "include_base_44_french_few_shot_og_stem": {"alias": " - include_base_44_french_few_shot_og_stem", "acc,none": 0.25, "acc_stderr,none": 0.11180339887498948}, "include_base_44_german": {"acc,none": 0.2805755395683453, "acc_stderr,none": 0.0357917459282541, "alias": "include_base_44_german"}, "include_base_44_german_few_shot_og_driving_license": {"alias": " - include_base_44_german_few_shot_og_driving_license", "acc,none": 0.6521739130434783, "acc_stderr,none": 0.10154334054280735}, "include_base_44_german_few_shot_og_social_science": {"alias": " - include_base_44_german_few_shot_og_social_science", "acc,none": 0.1978021978021978, "acc_stderr,none": 0.04198895203196222}, "include_base_44_german_few_shot_og_stem": {"alias": " - include_base_44_german_few_shot_og_stem", "acc,none": 0.24, "acc_stderr,none": 0.08717797887081345}, "include_base_44_hindi": {"acc,none": 0.23583180987202926, "acc_stderr,none": 0.018189400815726023, "alias": "include_base_44_hindi"}, "include_base_44_hindi_few_shot_og_applied_science": {"alias": " - include_base_44_hindi_few_shot_og_applied_science", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295698}, "include_base_44_hindi_few_shot_og_arts_humanities": {"alias": " - include_base_44_hindi_few_shot_og_arts_humanities", "acc,none": 0.17708333333333334, "acc_stderr,none": 0.03916561682056947}, "include_base_44_hindi_few_shot_og_driving_license": {"alias": " - include_base_44_hindi_few_shot_og_driving_license", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.04993832453115031}, "include_base_44_hindi_few_shot_og_general_knowledge": {"alias": " - include_base_44_hindi_few_shot_og_general_knowledge", "acc,none": 0.30985915492957744, "acc_stderr,none": 0.05527159681788331}, "include_base_44_hindi_few_shot_og_health_oriented_education": {"alias": " - include_base_44_hindi_few_shot_og_health_oriented_education", "acc,none": 0.2676056338028169, "acc_stderr,none": 0.05291406220869697}, "include_base_44_hindi_few_shot_og_professional_certification": {"alias": " - include_base_44_hindi_few_shot_og_professional_certification", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295696}, "include_base_44_hindi_few_shot_og_social_science": {"alias": " - include_base_44_hindi_few_shot_og_social_science", "acc,none": 0.2112676056338028, "acc_stderr,none": 0.048790163593489484}, "include_base_44_hindi_few_shot_og_stem": {"alias": " - include_base_44_hindi_few_shot_og_stem", "acc,none": 0.16, "acc_stderr,none": 0.0748331477354788}, "include_base_44_italian": {"acc,none": 0.25, "acc_stderr,none": 0.018506484935758665, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.17142857142857143, "acc_stderr,none": 0.06463490595976273}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.2634730538922156, "acc_stderr,none": 0.03419073042180668}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.16666666666666666, "acc_stderr,none": 0.11236664374387367}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.2967741935483871, "acc_stderr,none": 0.03681290636819265}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.20958083832335328, "acc_stderr,none": 0.03159006158827181}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.3333333333333333, "acc_stderr,none": 0.14213381090374033}, "include_base_44_japanese": {"acc,none": 0.2634730538922156, "acc_stderr,none": 0.01972538101830729, "alias": "include_base_44_japanese"}, "include_base_44_japanese_few_shot_og_driving_license": {"alias": " - include_base_44_japanese_few_shot_og_driving_license", "acc,none": 0.24242424242424243, "acc_stderr,none": 0.04329004329004327}, "include_base_44_japanese_few_shot_og_medical_license": {"alias": " - include_base_44_japanese_few_shot_og_medical_license", "acc,none": 0.2835820895522388, "acc_stderr,none": 0.03187187537919797}, "include_base_44_japanese_few_shot_og_professional_certification": {"alias": " - include_base_44_japanese_few_shot_og_professional_certification", "acc,none": 0.2537313432835821, "acc_stderr,none": 0.030769444967296024}, "include_base_44_korean": {"acc,none": 0.236, "acc_stderr,none": 0.019024397778730717, "alias": "include_base_44_korean"}, "include_base_44_korean_few_shot_og_professional_certification": {"alias": " - include_base_44_korean_few_shot_og_professional_certification", "acc,none": 0.244, "acc_stderr,none": 0.027217995464553175}, "include_base_44_korean_few_shot_og_social_science": {"alias": " - include_base_44_korean_few_shot_og_social_science", "acc,none": 0.228, "acc_stderr,none": 0.026587432487268487}, "include_base_44_russian": {"acc,none": 0.25905797101449274, "acc_stderr,none": 0.018423108953793465, "alias": "include_base_44_russian"}, "include_base_44_russian_few_shot_og_applied_science": {"alias": " - include_base_44_russian_few_shot_og_applied_science", "acc,none": 0.18840579710144928, "acc_stderr,none": 0.047420064740574204}, "include_base_44_russian_few_shot_og_arts_humanities": {"alias": " - include_base_44_russian_few_shot_og_arts_humanities", "acc,none": 0.15942028985507245, "acc_stderr,none": 0.0443922126579713}, "include_base_44_russian_few_shot_og_business_commerce": {"alias": " - include_base_44_russian_few_shot_og_business_commerce", "acc,none": 0.2608695652173913, "acc_stderr,none": 0.05324977701702559}, "include_base_44_russian_few_shot_og_driving_license": {"alias": " - include_base_44_russian_few_shot_og_driving_license", "acc,none": 0.17857142857142858, "acc_stderr,none": 0.07370704611510596}, "include_base_44_russian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_russian_few_shot_og_health_oriented_education", "acc,none": 0.38823529411764707, "acc_stderr,none": 0.053174090822038264}, "include_base_44_russian_few_shot_og_marine_license": {"alias": " - include_base_44_russian_few_shot_og_marine_license", "acc,none": 0.37681159420289856, "acc_stderr,none": 0.05876481248527006}, "include_base_44_russian_few_shot_og_social_science": {"alias": " - include_base_44_russian_few_shot_og_social_science", "acc,none": 0.2753623188405797, "acc_stderr,none": 0.05416992765191318}, "include_base_44_russian_few_shot_og_stem": {"alias": " - include_base_44_russian_few_shot_og_stem", "acc,none": 0.19148936170212766, "acc_stderr,none": 0.040801273706799875}, "include_base_44_spanish": {"acc,none": 0.2545454545454545, "acc_stderr,none": 0.018557360382759833, "alias": "include_base_44_spanish"}, "include_base_44_spanish_few_shot_og_arts_humanities": {"alias": " - include_base_44_spanish_few_shot_og_arts_humanities", "acc,none": 0.22, "acc_stderr,none": 0.02625179282460584}, "include_base_44_spanish_few_shot_og_health_oriented_education": {"alias": " - include_base_44_spanish_few_shot_og_health_oriented_education", "acc,none": 0.24, "acc_stderr,none": 0.08717797887081345}, "include_base_44_spanish_few_shot_og_social_science": {"alias": " - include_base_44_spanish_few_shot_og_social_science", "acc,none": 0.276, "acc_stderr,none": 0.028328537274211345}, "include_base_44_spanish_few_shot_og_stem": {"alias": " - include_base_44_spanish_few_shot_og_stem", "acc,none": 0.4, "acc_stderr,none": 0.1}, "include_base_44_telugu": {"acc,none": 0.24087591240875914, "acc_stderr,none": 0.018259367400023804, "alias": "include_base_44_telugu"}, "include_base_44_telugu_few_shot_og_applied_science": {"alias": " - include_base_44_telugu_few_shot_og_applied_science", "acc,none": 0.25301204819277107, "acc_stderr,none": 0.03384429155233137}, "include_base_44_telugu_few_shot_og_arts_humanities": {"alias": " - include_base_44_telugu_few_shot_og_arts_humanities", "acc,none": 0.2356020942408377, "acc_stderr,none": 0.030787364755364144}, "include_base_44_telugu_few_shot_og_social_science": {"alias": " - include_base_44_telugu_few_shot_og_social_science", "acc,none": 0.21084337349397592, "acc_stderr,none": 0.0317555478662992}, "include_base_44_telugu_few_shot_og_stem": {"alias": " - include_base_44_telugu_few_shot_og_stem", "acc,none": 0.4, "acc_stderr,none": 0.1}, "include_base_44_turkish": {"acc,none": 0.29014598540145986, "acc_stderr,none": 0.019405491825144058, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.3192771084337349, "acc_stderr,none": 0.036293353299478595}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.2710843373493976, "acc_stderr,none": 0.034605799075530255}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.25903614457831325, "acc_stderr,none": 0.03410646614071857}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.36, "acc_stderr,none": 0.06857142857142856}, "piqa": {"alias": "piqa", "acc,none": 0.5369967355821545, "acc_stderr,none": 0.011633844942776446, "acc_norm,none": 0.5125136017410229, "acc_norm_stderr,none": 0.011662170084916892}, "xnli_ar": {"alias": "xnli_ar", "acc,none": 0.3357429718875502, "acc_stderr,none": 0.009465838617337356}, "xnli_de": {"alias": "xnli_de", "acc,none": 0.3485943775100402, "acc_stderr,none": 0.009551542053301821}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.3321285140562249, "acc_stderr,none": 0.009440328001240637}, "xnli_es": {"alias": "xnli_es", "acc,none": 0.3273092369477912, "acc_stderr,none": 0.009405338156614927}, "xnli_fr": {"alias": "xnli_fr", "acc,none": 0.3409638554216867, "acc_stderr,none": 0.009501591178361541}, "xnli_hi": {"alias": "xnli_hi", "acc,none": 0.3373493975903614, "acc_stderr,none": 0.009476976849778577}, "xnli_ru": {"alias": "xnli_ru", "acc,none": 0.3244979919678715, "acc_stderr,none": 0.009384407992489767}, "xnli_sw": {"alias": "xnli_sw", "acc,none": 0.3176706827309237, "acc_stderr,none": 0.009331965741359506}, "xnli_th": {"alias": "xnli_th", "acc,none": 0.3325301204819277, "acc_stderr,none": 0.009443193365903345}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.3248995983935743, "acc_stderr,none": 0.00938742158168577}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.3389558232931727, "acc_stderr,none": 0.009487992732201526}} +{"created_at": "2025-06-25T17:10:13.159800", "global_step": 3000, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.19368600682593856, "acc_stderr,none": 0.01154842540997854, "acc_norm,none": 0.2525597269624573, "acc_norm_stderr,none": 0.01269672898020771}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.26430976430976433, "acc_stderr,none": 0.009048410451863014, "acc_norm,none": 0.2601010101010101, "acc_norm_stderr,none": 0.009001718541079954}, "belebele_arb_Arab": {"alias": "belebele_arb_Arab", "acc,none": 0.23555555555555555, "acc_stderr,none": 0.014152716079132658, "acc_norm,none": 0.23555555555555555, "acc_norm_stderr,none": 0.014152716079132658}, "belebele_ben_Beng": {"alias": "belebele_ben_Beng", "acc,none": 0.2788888888888889, "acc_stderr,none": 0.014956736888683079, "acc_norm,none": 0.2788888888888889, "acc_norm_stderr,none": 0.014956736888683079}, "belebele_deu_Latn": {"alias": "belebele_deu_Latn", "acc,none": 0.23666666666666666, "acc_stderr,none": 0.014175742474391958, "acc_norm,none": 0.23666666666666666, "acc_norm_stderr,none": 0.014175742474391958}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.2222222222222222, "acc_stderr,none": 0.013865695626579393, "acc_norm,none": 0.2222222222222222, "acc_norm_stderr,none": 0.013865695626579393}, "belebele_fra_Latn": {"alias": "belebele_fra_Latn", "acc,none": 0.2311111111111111, "acc_stderr,none": 0.014059256663218197, "acc_norm,none": 0.2311111111111111, "acc_norm_stderr,none": 0.014059256663218197}, "belebele_hin_Deva": {"alias": "belebele_hin_Deva", "acc,none": 0.23333333333333334, "acc_stderr,none": 0.014106258477755672, "acc_norm,none": 0.23333333333333334, "acc_norm_stderr,none": 0.014106258477755672}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_jpn_Jpan": {"alias": "belebele_jpn_Jpan", "acc,none": 0.2544444444444444, "acc_stderr,none": 0.014526354751055187, "acc_norm,none": 0.2544444444444444, "acc_norm_stderr,none": 0.014526354751055187}, "belebele_kor_Hang": {"alias": "belebele_kor_Hang", "acc,none": 0.22777777777777777, "acc_stderr,none": 0.01398772152368795, "acc_norm,none": 0.22777777777777777, "acc_norm_stderr,none": 0.01398772152368795}, "belebele_rus_Cyrl": {"alias": "belebele_rus_Cyrl", "acc,none": 0.23, "acc_stderr,none": 0.014035549969945585, "acc_norm,none": 0.23, "acc_norm_stderr,none": 0.014035549969945585}, "belebele_spa_Latn": {"alias": "belebele_spa_Latn", "acc,none": 0.2311111111111111, "acc_stderr,none": 0.014059256663218197, "acc_norm,none": 0.2311111111111111, "acc_norm_stderr,none": 0.014059256663218197}, "belebele_swh_Latn": {"alias": "belebele_swh_Latn", "acc,none": 0.23, "acc_stderr,none": 0.014035549969945592, "acc_norm,none": 0.23, "acc_norm_stderr,none": 0.014035549969945592}, "belebele_tel_Telu": {"alias": "belebele_tel_Telu", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884525, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884525}, "belebele_tha_Thai": {"alias": "belebele_tha_Thai", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.01401170515888453, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.01401170515888453}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.23222222222222222, "acc_stderr,none": 0.014082825936214571, "acc_norm,none": 0.23222222222222222, "acc_norm_stderr,none": 0.014082825936214571}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.2311111111111111, "acc_stderr,none": 0.014059256663218191, "acc_norm,none": 0.2311111111111111, "acc_norm_stderr,none": 0.014059256663218191}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.2577175861382195, "acc_stderr,none": 0.004364838000335621, "acc_norm,none": 0.25960963951404104, "acc_norm_stderr,none": 0.004375244237045136}, "include_base_44_arabic": {"acc,none": 0.213768115942029, "acc_stderr,none": 0.017452313583081646, "alias": "include_base_44_arabic"}, "include_base_44_arabic_few_shot_og_arts_humanities": {"alias": " - include_base_44_arabic_few_shot_og_arts_humanities", "acc,none": 0.20952380952380953, "acc_stderr,none": 0.03990657150993187}, "include_base_44_arabic_few_shot_og_business_commerce": {"alias": " - include_base_44_arabic_few_shot_og_business_commerce", "acc,none": 0.18292682926829268, "acc_stderr,none": 0.04295628044484376}, "include_base_44_arabic_few_shot_og_driving_license": {"alias": " - include_base_44_arabic_few_shot_og_driving_license", "acc,none": 0.26666666666666666, "acc_stderr,none": 0.043362909039199406}, "include_base_44_arabic_few_shot_og_general_knowledge": {"alias": " - include_base_44_arabic_few_shot_og_general_knowledge", "acc,none": 0.2571428571428571, "acc_stderr,none": 0.04285714285714284}, "include_base_44_arabic_few_shot_og_social_science": {"alias": " - include_base_44_arabic_few_shot_og_social_science", "acc,none": 0.1523809523809524, "acc_stderr,none": 0.03524106511591412}, "include_base_44_arabic_few_shot_og_stem": {"alias": " - include_base_44_arabic_few_shot_og_stem", "acc,none": 0.2, "acc_stderr,none": 0.05714285714285715}, "include_base_44_bengali": {"acc,none": 0.2354014598540146, "acc_stderr,none": 0.018154966660257715, "alias": "include_base_44_bengali"}, "include_base_44_bengali_few_shot_og_arts_humanities": {"alias": " - include_base_44_bengali_few_shot_og_arts_humanities", "acc,none": 0.26506024096385544, "acc_stderr,none": 0.03436024037944967}, "include_base_44_bengali_few_shot_og_general_knowledge": {"alias": " - include_base_44_bengali_few_shot_og_general_knowledge", "acc,none": 0.24096385542168675, "acc_stderr,none": 0.033293941190735296}, "include_base_44_bengali_few_shot_og_professional_certification": {"alias": " - include_base_44_bengali_few_shot_og_professional_certification", "acc,none": 0.19879518072289157, "acc_stderr,none": 0.03106939026078942}, "include_base_44_bengali_few_shot_og_stem": {"alias": " - include_base_44_bengali_few_shot_og_stem", "acc,none": 0.24, "acc_stderr,none": 0.06101187572589322}, "include_base_44_chinese": {"acc,none": 0.23853211009174313, "acc_stderr,none": 0.01835118006583838, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.049938324531150324}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.2413793103448276, "acc_stderr,none": 0.046143776682648914}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.29577464788732394, "acc_stderr,none": 0.054549061214188996}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.23943661971830985, "acc_stderr,none": 0.05100514327793383}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.22988505747126436, "acc_stderr,none": 0.045371581852507746}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.051995626882956975}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.19718309859154928, "acc_stderr,none": 0.04755476905953276}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.1875, "acc_stderr,none": 0.10077822185373188}, "include_base_44_french": {"acc,none": 0.2577565632458234, "acc_stderr,none": 0.021313559542183193, "alias": "include_base_44_french"}, "include_base_44_french_few_shot_og_arts_humanities": {"alias": " - include_base_44_french_few_shot_og_arts_humanities", "acc,none": 0.22932330827067668, "acc_stderr,none": 0.02582481829225878}, "include_base_44_french_few_shot_og_driving_license": {"alias": " - include_base_44_french_few_shot_og_driving_license", "acc,none": 0.40425531914893614, "acc_stderr,none": 0.07235674844413013}, "include_base_44_french_few_shot_og_health_oriented_education": {"alias": " - include_base_44_french_few_shot_og_health_oriented_education", "acc,none": 0.1875, "acc_stderr,none": 0.10077822185373188}, "include_base_44_french_few_shot_og_social_science": {"alias": " - include_base_44_french_few_shot_og_social_science", "acc,none": 0.2702702702702703, "acc_stderr,none": 0.05197789984508372}, "include_base_44_french_few_shot_og_stem": {"alias": " - include_base_44_french_few_shot_og_stem", "acc,none": 0.3125, "acc_stderr,none": 0.11967838846954226}, "include_base_44_german": {"acc,none": 0.31654676258992803, "acc_stderr,none": 0.03828515089823217, "alias": "include_base_44_german"}, "include_base_44_german_few_shot_og_driving_license": {"alias": " - include_base_44_german_few_shot_og_driving_license", "acc,none": 0.6086956521739131, "acc_stderr,none": 0.10405096111532161}, "include_base_44_german_few_shot_og_social_science": {"alias": " - include_base_44_german_few_shot_og_social_science", "acc,none": 0.26373626373626374, "acc_stderr,none": 0.04644942852497397}, "include_base_44_german_few_shot_og_stem": {"alias": " - include_base_44_german_few_shot_og_stem", "acc,none": 0.24, "acc_stderr,none": 0.08717797887081347}, "include_base_44_hindi": {"acc,none": 0.2376599634369287, "acc_stderr,none": 0.0182549406677916, "alias": "include_base_44_hindi"}, "include_base_44_hindi_few_shot_og_applied_science": {"alias": " - include_base_44_hindi_few_shot_og_applied_science", "acc,none": 0.23943661971830985, "acc_stderr,none": 0.05100514327793383}, "include_base_44_hindi_few_shot_og_arts_humanities": {"alias": " - include_base_44_hindi_few_shot_og_arts_humanities", "acc,none": 0.17708333333333334, "acc_stderr,none": 0.03916561682056947}, "include_base_44_hindi_few_shot_og_driving_license": {"alias": " - include_base_44_hindi_few_shot_og_driving_license", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.04993832453115031}, "include_base_44_hindi_few_shot_og_general_knowledge": {"alias": " - include_base_44_hindi_few_shot_og_general_knowledge", "acc,none": 0.29577464788732394, "acc_stderr,none": 0.05454906121418899}, "include_base_44_hindi_few_shot_og_health_oriented_education": {"alias": " - include_base_44_hindi_few_shot_og_health_oriented_education", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295696}, "include_base_44_hindi_few_shot_og_professional_certification": {"alias": " - include_base_44_hindi_few_shot_og_professional_certification", "acc,none": 0.2676056338028169, "acc_stderr,none": 0.05291406220869697}, "include_base_44_hindi_few_shot_og_social_science": {"alias": " - include_base_44_hindi_few_shot_og_social_science", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295697}, "include_base_44_hindi_few_shot_og_stem": {"alias": " - include_base_44_hindi_few_shot_og_stem", "acc,none": 0.16, "acc_stderr,none": 0.0748331477354788}, "include_base_44_italian": {"acc,none": 0.26094890510948904, "acc_stderr,none": 0.018741300105007158, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.17142857142857143, "acc_stderr,none": 0.06463490595976273}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.30538922155688625, "acc_stderr,none": 0.03574732654202544}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.25, "acc_stderr,none": 0.1305582419667734}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.2967741935483871, "acc_stderr,none": 0.03681290636819265}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.20359281437125748, "acc_stderr,none": 0.031253219622833416}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.25, "acc_stderr,none": 0.1305582419667734}, "include_base_44_japanese": {"acc,none": 0.26746506986027946, "acc_stderr,none": 0.019825695930865774, "alias": "include_base_44_japanese"}, "include_base_44_japanese_few_shot_og_driving_license": {"alias": " - include_base_44_japanese_few_shot_og_driving_license", "acc,none": 0.26262626262626265, "acc_stderr,none": 0.04445287676983945}, "include_base_44_japanese_few_shot_og_medical_license": {"alias": " - include_base_44_japanese_few_shot_og_medical_license", "acc,none": 0.2835820895522388, "acc_stderr,none": 0.03187187537919797}, "include_base_44_japanese_few_shot_og_professional_certification": {"alias": " - include_base_44_japanese_few_shot_og_professional_certification", "acc,none": 0.2537313432835821, "acc_stderr,none": 0.030769444967296018}, "include_base_44_korean": {"acc,none": 0.228, "acc_stderr,none": 0.018799299311901857, "alias": "include_base_44_korean"}, "include_base_44_korean_few_shot_og_professional_certification": {"alias": " - include_base_44_korean_few_shot_og_professional_certification", "acc,none": 0.232, "acc_stderr,none": 0.026750070374865164}, "include_base_44_korean_few_shot_og_social_science": {"alias": " - include_base_44_korean_few_shot_og_social_science", "acc,none": 0.224, "acc_stderr,none": 0.02642136168734791}, "include_base_44_russian": {"acc,none": 0.27717391304347827, "acc_stderr,none": 0.01905575299812984, "alias": "include_base_44_russian"}, "include_base_44_russian_few_shot_og_applied_science": {"alias": " - include_base_44_russian_few_shot_og_applied_science", "acc,none": 0.2608695652173913, "acc_stderr,none": 0.053249777017025594}, "include_base_44_russian_few_shot_og_arts_humanities": {"alias": " - include_base_44_russian_few_shot_og_arts_humanities", "acc,none": 0.2608695652173913, "acc_stderr,none": 0.05324977701702559}, "include_base_44_russian_few_shot_og_business_commerce": {"alias": " - include_base_44_russian_few_shot_og_business_commerce", "acc,none": 0.2608695652173913, "acc_stderr,none": 0.05324977701702559}, "include_base_44_russian_few_shot_og_driving_license": {"alias": " - include_base_44_russian_few_shot_og_driving_license", "acc,none": 0.25, "acc_stderr,none": 0.08333333333333333}, "include_base_44_russian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_russian_few_shot_og_health_oriented_education", "acc,none": 0.36470588235294116, "acc_stderr,none": 0.05251932770420085}, "include_base_44_russian_few_shot_og_marine_license": {"alias": " - include_base_44_russian_few_shot_og_marine_license", "acc,none": 0.34782608695652173, "acc_stderr,none": 0.05775749253522358}, "include_base_44_russian_few_shot_og_social_science": {"alias": " - include_base_44_russian_few_shot_og_social_science", "acc,none": 0.2028985507246377, "acc_stderr,none": 0.04876877147472661}, "include_base_44_russian_few_shot_og_stem": {"alias": " - include_base_44_russian_few_shot_og_stem", "acc,none": 0.24468085106382978, "acc_stderr,none": 0.04457832688439453}, "include_base_44_spanish": {"acc,none": 0.27090909090909093, "acc_stderr,none": 0.018929034796004753, "alias": "include_base_44_spanish"}, "include_base_44_spanish_few_shot_og_arts_humanities": {"alias": " - include_base_44_spanish_few_shot_og_arts_humanities", "acc,none": 0.24, "acc_stderr,none": 0.027065293652239003}, "include_base_44_spanish_few_shot_og_health_oriented_education": {"alias": " - include_base_44_spanish_few_shot_og_health_oriented_education", "acc,none": 0.32, "acc_stderr,none": 0.09521904571390466}, "include_base_44_spanish_few_shot_og_social_science": {"alias": " - include_base_44_spanish_few_shot_og_social_science", "acc,none": 0.28, "acc_stderr,none": 0.028454148277832315}, "include_base_44_spanish_few_shot_og_stem": {"alias": " - include_base_44_spanish_few_shot_og_stem", "acc,none": 0.44, "acc_stderr,none": 0.10132456102380442}, "include_base_44_telugu": {"acc,none": 0.24087591240875914, "acc_stderr,none": 0.01826605268110087, "alias": "include_base_44_telugu"}, "include_base_44_telugu_few_shot_og_applied_science": {"alias": " - include_base_44_telugu_few_shot_og_applied_science", "acc,none": 0.23493975903614459, "acc_stderr,none": 0.03300533186128922}, "include_base_44_telugu_few_shot_og_arts_humanities": {"alias": " - include_base_44_telugu_few_shot_og_arts_humanities", "acc,none": 0.24607329842931938, "acc_stderr,none": 0.031247840365749117}, "include_base_44_telugu_few_shot_og_social_science": {"alias": " - include_base_44_telugu_few_shot_og_social_science", "acc,none": 0.21686746987951808, "acc_stderr,none": 0.03208284450356365}, "include_base_44_telugu_few_shot_og_stem": {"alias": " - include_base_44_telugu_few_shot_og_stem", "acc,none": 0.4, "acc_stderr,none": 0.1}, "include_base_44_turkish": {"acc,none": 0.2737226277372263, "acc_stderr,none": 0.0189006492330004, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.3433734939759036, "acc_stderr,none": 0.03696584317010601}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.18674698795180722, "acc_stderr,none": 0.0303387491445006}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.26506024096385544, "acc_stderr,none": 0.03436024037944967}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.36, "acc_stderr,none": 0.06857142857142856}, "piqa": {"alias": "piqa", "acc,none": 0.5179542981501633, "acc_stderr,none": 0.011658300623287153, "acc_norm,none": 0.5097932535364527, "acc_norm_stderr,none": 0.011663586263283223}, "xnli_ar": {"alias": "xnli_ar", "acc,none": 0.3192771084337349, "acc_stderr,none": 0.009344511873557415}, "xnli_de": {"alias": "xnli_de", "acc,none": 0.3309236947791165, "acc_stderr,none": 0.009431685461463309}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.3200803212851406, "acc_stderr,none": 0.009350737047480016}, "xnli_es": {"alias": "xnli_es", "acc,none": 0.3349397590361446, "acc_stderr,none": 0.009460223484996465}, "xnli_fr": {"alias": "xnli_fr", "acc,none": 0.3337349397590361, "acc_stderr,none": 0.009451743112667053}, "xnli_hi": {"alias": "xnli_hi", "acc,none": 0.3329317269076305, "acc_stderr,none": 0.009446051001358226}, "xnli_ru": {"alias": "xnli_ru", "acc,none": 0.3393574297188755, "acc_stderr,none": 0.009490727635646755}, "xnli_sw": {"alias": "xnli_sw", "acc,none": 0.3236947791164659, "acc_stderr,none": 0.009378357180373073}, "xnli_th": {"alias": "xnli_th", "acc,none": 0.3349397590361446, "acc_stderr,none": 0.009460223484996467}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.3329317269076305, "acc_stderr,none": 0.009446051001358225}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.3349397590361446, "acc_stderr,none": 0.009460223484996465}} +{"created_at": "2025-06-25T17:37:28.480456", "global_step": 3500, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.19539249146757678, "acc_stderr,none": 0.011586907189952911, "acc_norm,none": 0.24744027303754265, "acc_norm_stderr,none": 0.01261035266329267}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.27146464646464646, "acc_stderr,none": 0.009125362970360623, "acc_norm,none": 0.2601010101010101, "acc_norm_stderr,none": 0.009001718541079954}, "belebele_arb_Arab": {"alias": "belebele_arb_Arab", "acc,none": 0.22, "acc_stderr,none": 0.013815887744596803, "acc_norm,none": 0.22, "acc_norm_stderr,none": 0.013815887744596803}, "belebele_ben_Beng": {"alias": "belebele_ben_Beng", "acc,none": 0.22111111111111112, "acc_stderr,none": 0.013840863699859516, "acc_norm,none": 0.22111111111111112, "acc_norm_stderr,none": 0.013840863699859516}, "belebele_deu_Latn": {"alias": "belebele_deu_Latn", "acc,none": 0.22777777777777777, "acc_stderr,none": 0.013987721523687938, "acc_norm,none": 0.22777777777777777, "acc_norm_stderr,none": 0.013987721523687938}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.21222222222222223, "acc_stderr,none": 0.013636956209422683, "acc_norm,none": 0.21222222222222223, "acc_norm_stderr,none": 0.013636956209422683}, "belebele_fra_Latn": {"alias": "belebele_fra_Latn", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884528, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884528}, "belebele_hin_Deva": {"alias": "belebele_hin_Deva", "acc,none": 0.22111111111111112, "acc_stderr,none": 0.013840863699859547, "acc_norm,none": 0.22111111111111112, "acc_norm_stderr,none": 0.013840863699859547}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.22444444444444445, "acc_stderr,none": 0.013914930474237287, "acc_norm,none": 0.22444444444444445, "acc_norm_stderr,none": 0.013914930474237287}, "belebele_jpn_Jpan": {"alias": "belebele_jpn_Jpan", "acc,none": 0.22444444444444445, "acc_stderr,none": 0.013914930474237287, "acc_norm,none": 0.22444444444444445, "acc_norm_stderr,none": 0.013914930474237287}, "belebele_kor_Hang": {"alias": "belebele_kor_Hang", "acc,none": 0.22, "acc_stderr,none": 0.013815887744596793, "acc_norm,none": 0.22, "acc_norm_stderr,none": 0.013815887744596793}, "belebele_rus_Cyrl": {"alias": "belebele_rus_Cyrl", "acc,none": 0.21444444444444444, "acc_stderr,none": 0.013688819788367371, "acc_norm,none": 0.21444444444444444, "acc_norm_stderr,none": 0.013688819788367371}, "belebele_spa_Latn": {"alias": "belebele_spa_Latn", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884525, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884525}, "belebele_swh_Latn": {"alias": "belebele_swh_Latn", "acc,none": 0.23, "acc_stderr,none": 0.014035549969945594, "acc_norm,none": 0.23, "acc_norm_stderr,none": 0.014035549969945594}, "belebele_tel_Telu": {"alias": "belebele_tel_Telu", "acc,none": 0.21888888888888888, "acc_stderr,none": 0.013790766978256947, "acc_norm,none": 0.21888888888888888, "acc_norm_stderr,none": 0.013790766978256947}, "belebele_tha_Thai": {"alias": "belebele_tha_Thai", "acc,none": 0.21333333333333335, "acc_stderr,none": 0.01366296286301118, "acc_norm,none": 0.21333333333333335, "acc_norm_stderr,none": 0.01366296286301118}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.22555555555555556, "acc_stderr,none": 0.013939334910458148, "acc_norm,none": 0.22555555555555556, "acc_norm_stderr,none": 0.013939334910458148}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.21777777777777776, "acc_stderr,none": 0.013765500608039476, "acc_norm,none": 0.21777777777777776, "acc_norm_stderr,none": 0.013765500608039476}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.2586138219478192, "acc_stderr,none": 0.004369780529824013, "acc_norm,none": 0.2590121489743079, "acc_norm_stderr,none": 0.004371969542814558}, "include_base_44_arabic": {"acc,none": 0.2608695652173913, "acc_stderr,none": 0.018521246502787803, "alias": "include_base_44_arabic"}, "include_base_44_arabic_few_shot_og_arts_humanities": {"alias": " - include_base_44_arabic_few_shot_og_arts_humanities", "acc,none": 0.2857142857142857, "acc_stderr,none": 0.04429811949614585}, "include_base_44_arabic_few_shot_og_business_commerce": {"alias": " - include_base_44_arabic_few_shot_og_business_commerce", "acc,none": 0.18292682926829268, "acc_stderr,none": 0.04295628044484376}, "include_base_44_arabic_few_shot_og_driving_license": {"alias": " - include_base_44_arabic_few_shot_og_driving_license", "acc,none": 0.3142857142857143, "acc_stderr,none": 0.045521571818039494}, "include_base_44_arabic_few_shot_og_general_knowledge": {"alias": " - include_base_44_arabic_few_shot_og_general_knowledge", "acc,none": 0.3619047619047619, "acc_stderr,none": 0.0471219474848361}, "include_base_44_arabic_few_shot_og_social_science": {"alias": " - include_base_44_arabic_few_shot_og_social_science", "acc,none": 0.1619047619047619, "acc_stderr,none": 0.036121005232589175}, "include_base_44_arabic_few_shot_og_stem": {"alias": " - include_base_44_arabic_few_shot_og_stem", "acc,none": 0.22, "acc_stderr,none": 0.05917804336345138}, "include_base_44_bengali": {"acc,none": 0.25364963503649635, "acc_stderr,none": 0.018568411805027996, "alias": "include_base_44_bengali"}, "include_base_44_bengali_few_shot_og_arts_humanities": {"alias": " - include_base_44_bengali_few_shot_og_arts_humanities", "acc,none": 0.2469879518072289, "acc_stderr,none": 0.03357351982064537}, "include_base_44_bengali_few_shot_og_general_knowledge": {"alias": " - include_base_44_bengali_few_shot_og_general_knowledge", "acc,none": 0.3132530120481928, "acc_stderr,none": 0.03610805018031023}, "include_base_44_bengali_few_shot_og_professional_certification": {"alias": " - include_base_44_bengali_few_shot_og_professional_certification", "acc,none": 0.21084337349397592, "acc_stderr,none": 0.031755547866299194}, "include_base_44_bengali_few_shot_og_stem": {"alias": " - include_base_44_bengali_few_shot_og_stem", "acc,none": 0.22, "acc_stderr,none": 0.0591780433634514}, "include_base_44_chinese": {"acc,none": 0.26238532110091745, "acc_stderr,none": 0.018836134612054158, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.18309859154929578, "acc_stderr,none": 0.04622514734921429}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.25287356321839083, "acc_stderr,none": 0.04687049503854671}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.36619718309859156, "acc_stderr,none": 0.05758184314388002}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.2676056338028169, "acc_stderr,none": 0.05291406220869697}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.21839080459770116, "acc_stderr,none": 0.044551545932103705}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.30985915492957744, "acc_stderr,none": 0.05527159681788331}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.2676056338028169, "acc_stderr,none": 0.05291406220869697}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.1875, "acc_stderr,none": 0.10077822185373188}, "include_base_44_french": {"acc,none": 0.25059665871121717, "acc_stderr,none": 0.021205924386894965, "alias": "include_base_44_french"}, "include_base_44_french_few_shot_og_arts_humanities": {"alias": " - include_base_44_french_few_shot_og_arts_humanities", "acc,none": 0.22932330827067668, "acc_stderr,none": 0.02582481829225878}, "include_base_44_french_few_shot_og_driving_license": {"alias": " - include_base_44_french_few_shot_og_driving_license", "acc,none": 0.3191489361702128, "acc_stderr,none": 0.0687296045180637}, "include_base_44_french_few_shot_og_health_oriented_education": {"alias": " - include_base_44_french_few_shot_og_health_oriented_education", "acc,none": 0.1875, "acc_stderr,none": 0.10077822185373188}, "include_base_44_french_few_shot_og_social_science": {"alias": " - include_base_44_french_few_shot_og_social_science", "acc,none": 0.2702702702702703, "acc_stderr,none": 0.05197789984508372}, "include_base_44_french_few_shot_og_stem": {"alias": " - include_base_44_french_few_shot_og_stem", "acc,none": 0.375, "acc_stderr,none": 0.125}, "include_base_44_german": {"acc,none": 0.2446043165467626, "acc_stderr,none": 0.03611161281088708, "alias": "include_base_44_german"}, "include_base_44_german_few_shot_og_driving_license": {"alias": " - include_base_44_german_few_shot_og_driving_license", "acc,none": 0.43478260869565216, "acc_stderr,none": 0.10568965974008647}, "include_base_44_german_few_shot_og_social_science": {"alias": " - include_base_44_german_few_shot_og_social_science", "acc,none": 0.1978021978021978, "acc_stderr,none": 0.04198895203196222}, "include_base_44_german_few_shot_og_stem": {"alias": " - include_base_44_german_few_shot_og_stem", "acc,none": 0.24, "acc_stderr,none": 0.08717797887081347}, "include_base_44_hindi": {"acc,none": 0.21937842778793418, "acc_stderr,none": 0.01769066232332176, "alias": "include_base_44_hindi"}, "include_base_44_hindi_few_shot_og_applied_science": {"alias": " - include_base_44_hindi_few_shot_og_applied_science", "acc,none": 0.2112676056338028, "acc_stderr,none": 0.048790163593489505}, "include_base_44_hindi_few_shot_og_arts_humanities": {"alias": " - include_base_44_hindi_few_shot_og_arts_humanities", "acc,none": 0.16666666666666666, "acc_stderr,none": 0.03823595564509367}, "include_base_44_hindi_few_shot_og_driving_license": {"alias": " - include_base_44_hindi_few_shot_og_driving_license", "acc,none": 0.16901408450704225, "acc_stderr,none": 0.04479290819909659}, "include_base_44_hindi_few_shot_og_general_knowledge": {"alias": " - include_base_44_hindi_few_shot_og_general_knowledge", "acc,none": 0.28169014084507044, "acc_stderr,none": 0.05376414171383253}, "include_base_44_hindi_few_shot_og_health_oriented_education": {"alias": " - include_base_44_hindi_few_shot_og_health_oriented_education", "acc,none": 0.30985915492957744, "acc_stderr,none": 0.055271596817883316}, "include_base_44_hindi_few_shot_og_professional_certification": {"alias": " - include_base_44_hindi_few_shot_og_professional_certification", "acc,none": 0.2112676056338028, "acc_stderr,none": 0.048790163593489484}, "include_base_44_hindi_few_shot_og_social_science": {"alias": " - include_base_44_hindi_few_shot_og_social_science", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.04993832453115033}, "include_base_44_hindi_few_shot_og_stem": {"alias": " - include_base_44_hindi_few_shot_og_stem", "acc,none": 0.16, "acc_stderr,none": 0.0748331477354788}, "include_base_44_italian": {"acc,none": 0.23357664233576642, "acc_stderr,none": 0.018096181723363546, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.17142857142857143, "acc_stderr,none": 0.06463490595976273}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.25748502994011974, "acc_stderr,none": 0.03393708648569707}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.4166666666666667, "acc_stderr,none": 0.1486470975026408}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.21935483870967742, "acc_stderr,none": 0.033345718921890366}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.2155688622754491, "acc_stderr,none": 0.031916582528798025}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.3333333333333333, "acc_stderr,none": 0.14213381090374033}, "include_base_44_japanese": {"acc,none": 0.22954091816367264, "acc_stderr,none": 0.018831231470519034, "alias": "include_base_44_japanese"}, "include_base_44_japanese_few_shot_og_driving_license": {"alias": " - include_base_44_japanese_few_shot_og_driving_license", "acc,none": 0.21212121212121213, "acc_stderr,none": 0.04129606932540889}, "include_base_44_japanese_few_shot_og_medical_license": {"alias": " - include_base_44_japanese_few_shot_og_medical_license", "acc,none": 0.21890547263681592, "acc_stderr,none": 0.029239174636647}, "include_base_44_japanese_few_shot_og_professional_certification": {"alias": " - include_base_44_japanese_few_shot_og_professional_certification", "acc,none": 0.24875621890547264, "acc_stderr,none": 0.030567675938916714}, "include_base_44_korean": {"acc,none": 0.236, "acc_stderr,none": 0.01901426224376586, "alias": "include_base_44_korean"}, "include_base_44_korean_few_shot_og_professional_certification": {"alias": " - include_base_44_korean_few_shot_og_professional_certification", "acc,none": 0.252, "acc_stderr,none": 0.027513851933031342}, "include_base_44_korean_few_shot_og_social_science": {"alias": " - include_base_44_korean_few_shot_og_social_science", "acc,none": 0.22, "acc_stderr,none": 0.026251792824605845}, "include_base_44_russian": {"acc,none": 0.24094202898550723, "acc_stderr,none": 0.018078775818847422, "alias": "include_base_44_russian"}, "include_base_44_russian_few_shot_og_applied_science": {"alias": " - include_base_44_russian_few_shot_og_applied_science", "acc,none": 0.15942028985507245, "acc_stderr,none": 0.04439221265797127}, "include_base_44_russian_few_shot_og_arts_humanities": {"alias": " - include_base_44_russian_few_shot_og_arts_humanities", "acc,none": 0.2318840579710145, "acc_stderr,none": 0.05117930441535768}, "include_base_44_russian_few_shot_og_business_commerce": {"alias": " - include_base_44_russian_few_shot_og_business_commerce", "acc,none": 0.18840579710144928, "acc_stderr,none": 0.047420064740574204}, "include_base_44_russian_few_shot_og_driving_license": {"alias": " - include_base_44_russian_few_shot_og_driving_license", "acc,none": 0.4642857142857143, "acc_stderr,none": 0.09597925890831603}, "include_base_44_russian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_russian_few_shot_og_health_oriented_education", "acc,none": 0.2, "acc_stderr,none": 0.04364357804719844}, "include_base_44_russian_few_shot_og_marine_license": {"alias": " - include_base_44_russian_few_shot_og_marine_license", "acc,none": 0.2318840579710145, "acc_stderr,none": 0.05117930441535768}, "include_base_44_russian_few_shot_og_social_science": {"alias": " - include_base_44_russian_few_shot_og_social_science", "acc,none": 0.3333333333333333, "acc_stderr,none": 0.05716619504750293}, "include_base_44_russian_few_shot_og_stem": {"alias": " - include_base_44_russian_few_shot_og_stem", "acc,none": 0.2553191489361702, "acc_stderr,none": 0.045215289202472475}, "include_base_44_spanish": {"acc,none": 0.27454545454545454, "acc_stderr,none": 0.019052829790094775, "alias": "include_base_44_spanish"}, "include_base_44_spanish_few_shot_og_arts_humanities": {"alias": " - include_base_44_spanish_few_shot_og_arts_humanities", "acc,none": 0.256, "acc_stderr,none": 0.02765710871820491}, "include_base_44_spanish_few_shot_og_health_oriented_education": {"alias": " - include_base_44_spanish_few_shot_og_health_oriented_education", "acc,none": 0.32, "acc_stderr,none": 0.09521904571390466}, "include_base_44_spanish_few_shot_og_social_science": {"alias": " - include_base_44_spanish_few_shot_og_social_science", "acc,none": 0.276, "acc_stderr,none": 0.02832853727421135}, "include_base_44_spanish_few_shot_og_stem": {"alias": " - include_base_44_spanish_few_shot_og_stem", "acc,none": 0.4, "acc_stderr,none": 0.1}, "include_base_44_telugu": {"acc,none": 0.2427007299270073, "acc_stderr,none": 0.01835879457035302, "alias": "include_base_44_telugu"}, "include_base_44_telugu_few_shot_og_applied_science": {"alias": " - include_base_44_telugu_few_shot_og_applied_science", "acc,none": 0.25301204819277107, "acc_stderr,none": 0.03384429155233135}, "include_base_44_telugu_few_shot_og_arts_humanities": {"alias": " - include_base_44_telugu_few_shot_og_arts_humanities", "acc,none": 0.2617801047120419, "acc_stderr,none": 0.03189219352501193}, "include_base_44_telugu_few_shot_og_social_science": {"alias": " - include_base_44_telugu_few_shot_og_social_science", "acc,none": 0.21686746987951808, "acc_stderr,none": 0.03208284450356365}, "include_base_44_telugu_few_shot_og_stem": {"alias": " - include_base_44_telugu_few_shot_og_stem", "acc,none": 0.2, "acc_stderr,none": 0.08164965809277261}, "include_base_44_turkish": {"acc,none": 0.24817518248175183, "acc_stderr,none": 0.018435274068418498, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.2710843373493976, "acc_stderr,none": 0.03460579907553027}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.25903614457831325, "acc_stderr,none": 0.03410646614071856}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.25301204819277107, "acc_stderr,none": 0.03384429155233135}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.12, "acc_stderr,none": 0.04642307659791979}, "piqa": {"alias": "piqa", "acc,none": 0.5233949945593036, "acc_stderr,none": 0.011653047155927788, "acc_norm,none": 0.5048966267682263, "acc_norm_stderr,none": 0.011665264730078145}, "xnli_ar": {"alias": "xnli_ar", "acc,none": 0.3325301204819277, "acc_stderr,none": 0.00944319336590334}, "xnli_de": {"alias": "xnli_de", "acc,none": 0.3337349397590361, "acc_stderr,none": 0.009451743112667057}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.3261044176706827, "acc_stderr,none": 0.00939641517272266}, "xnli_es": {"alias": "xnli_es", "acc,none": 0.3321285140562249, "acc_stderr,none": 0.009440328001240637}, "xnli_fr": {"alias": "xnli_fr", "acc,none": 0.3329317269076305, "acc_stderr,none": 0.009446051001358226}, "xnli_hi": {"alias": "xnli_hi", "acc,none": 0.3333333333333333, "acc_stderr,none": 0.009448900914617609}, "xnli_ru": {"alias": "xnli_ru", "acc,none": 0.3333333333333333, "acc_stderr,none": 0.009448900914617609}, "xnli_sw": {"alias": "xnli_sw", "acc,none": 0.3305220883534137, "acc_stderr,none": 0.009428789109289815}, "xnli_th": {"alias": "xnli_th", "acc,none": 0.3220883534136546, "acc_stderr,none": 0.00936616080707205}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.334136546184739, "acc_stderr,none": 0.009454577602463621}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.3164658634538153, "acc_stderr,none": 0.009322472076957607}} +{"created_at": "2025-06-25T18:02:12.023617", "global_step": 4000, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.20819112627986347, "acc_stderr,none": 0.01186486611844807, "acc_norm,none": 0.25341296928327645, "acc_norm_stderr,none": 0.012710896778378606}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.2668350168350168, "acc_stderr,none": 0.009075915859267253, "acc_norm,none": 0.2680976430976431, "acc_norm_stderr,none": 0.0090895265782137}, "belebele_arb_Arab": {"alias": "belebele_arb_Arab", "acc,none": 0.22777777777777777, "acc_stderr,none": 0.013987721523687963, "acc_norm,none": 0.22777777777777777, "acc_norm_stderr,none": 0.013987721523687963}, "belebele_ben_Beng": {"alias": "belebele_ben_Beng", "acc,none": 0.24333333333333335, "acc_stderr,none": 0.01431110796368305, "acc_norm,none": 0.24333333333333335, "acc_norm_stderr,none": 0.01431110796368305}, "belebele_deu_Latn": {"alias": "belebele_deu_Latn", "acc,none": 0.27555555555555555, "acc_stderr,none": 0.014901407215241913, "acc_norm,none": 0.27555555555555555, "acc_norm_stderr,none": 0.014901407215241913}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.27555555555555555, "acc_stderr,none": 0.014901407215241901, "acc_norm,none": 0.27555555555555555, "acc_norm_stderr,none": 0.014901407215241901}, "belebele_fra_Latn": {"alias": "belebele_fra_Latn", "acc,none": 0.2688888888888889, "acc_stderr,none": 0.014787619747567617, "acc_norm,none": 0.2688888888888889, "acc_norm_stderr,none": 0.014787619747567617}, "belebele_hin_Deva": {"alias": "belebele_hin_Deva", "acc,none": 0.2388888888888889, "acc_stderr,none": 0.014221393731276202, "acc_norm,none": 0.2388888888888889, "acc_norm_stderr,none": 0.014221393731276202}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.25, "acc_stderr,none": 0.014441782171967503, "acc_norm,none": 0.25, "acc_norm_stderr,none": 0.014441782171967503}, "belebele_jpn_Jpan": {"alias": "belebele_jpn_Jpan", "acc,none": 0.22777777777777777, "acc_stderr,none": 0.013987721523687985, "acc_norm,none": 0.22777777777777777, "acc_norm_stderr,none": 0.013987721523687985}, "belebele_kor_Hang": {"alias": "belebele_kor_Hang", "acc,none": 0.24333333333333335, "acc_stderr,none": 0.014311107963683048, "acc_norm,none": 0.24333333333333335, "acc_norm_stderr,none": 0.014311107963683048}, "belebele_rus_Cyrl": {"alias": "belebele_rus_Cyrl", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884495, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884495}, "belebele_spa_Latn": {"alias": "belebele_spa_Latn", "acc,none": 0.2588888888888889, "acc_stderr,none": 0.014608933836168464, "acc_norm,none": 0.2588888888888889, "acc_norm_stderr,none": 0.014608933836168464}, "belebele_swh_Latn": {"alias": "belebele_swh_Latn", "acc,none": 0.2577777777777778, "acc_stderr,none": 0.014588474089651672, "acc_norm,none": 0.2577777777777778, "acc_norm_stderr,none": 0.014588474089651672}, "belebele_tel_Telu": {"alias": "belebele_tel_Telu", "acc,none": 0.25666666666666665, "acc_stderr,none": 0.014567891342380037, "acc_norm,none": 0.25666666666666665, "acc_norm_stderr,none": 0.014567891342380037}, "belebele_tha_Thai": {"alias": "belebele_tha_Thai", "acc,none": 0.23333333333333334, "acc_stderr,none": 0.014106258477755675, "acc_norm,none": 0.23333333333333334, "acc_norm_stderr,none": 0.014106258477755675}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.2644444444444444, "acc_stderr,none": 0.014709405413413146, "acc_norm,none": 0.2644444444444444, "acc_norm_stderr,none": 0.014709405413413146}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.22777777777777777, "acc_stderr,none": 0.013987721523687971, "acc_norm,none": 0.22777777777777777, "acc_norm_stderr,none": 0.013987721523687971}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.2591117307309301, "acc_stderr,none": 0.004372516060164746, "acc_norm,none": 0.26180043815972914, "acc_norm_stderr,none": 0.004387161203087957}, "include_base_44_arabic": {"acc,none": 0.2391304347826087, "acc_stderr,none": 0.018159092551144923, "alias": "include_base_44_arabic"}, "include_base_44_arabic_few_shot_og_arts_humanities": {"alias": " - include_base_44_arabic_few_shot_og_arts_humanities", "acc,none": 0.24761904761904763, "acc_stderr,none": 0.04232473532055043}, "include_base_44_arabic_few_shot_og_business_commerce": {"alias": " - include_base_44_arabic_few_shot_og_business_commerce", "acc,none": 0.24390243902439024, "acc_stderr,none": 0.04771495084460432}, "include_base_44_arabic_few_shot_og_driving_license": {"alias": " - include_base_44_arabic_few_shot_og_driving_license", "acc,none": 0.18095238095238095, "acc_stderr,none": 0.037750269583862385}, "include_base_44_arabic_few_shot_og_general_knowledge": {"alias": " - include_base_44_arabic_few_shot_og_general_knowledge", "acc,none": 0.20952380952380953, "acc_stderr,none": 0.03990657150993186}, "include_base_44_arabic_few_shot_og_social_science": {"alias": " - include_base_44_arabic_few_shot_og_social_science", "acc,none": 0.3142857142857143, "acc_stderr,none": 0.045521571818039494}, "include_base_44_arabic_few_shot_og_stem": {"alias": " - include_base_44_arabic_few_shot_og_stem", "acc,none": 0.24, "acc_stderr,none": 0.061011875725893214}, "include_base_44_bengali": {"acc,none": 0.26094890510948904, "acc_stderr,none": 0.018793753408229065, "alias": "include_base_44_bengali"}, "include_base_44_bengali_few_shot_og_arts_humanities": {"alias": " - include_base_44_bengali_few_shot_og_arts_humanities", "acc,none": 0.2469879518072289, "acc_stderr,none": 0.03357351982064536}, "include_base_44_bengali_few_shot_og_general_knowledge": {"alias": " - include_base_44_bengali_few_shot_og_general_knowledge", "acc,none": 0.24096385542168675, "acc_stderr,none": 0.03329394119073532}, "include_base_44_bengali_few_shot_og_professional_certification": {"alias": " - include_base_44_bengali_few_shot_og_professional_certification", "acc,none": 0.30120481927710846, "acc_stderr,none": 0.0357160923005348}, "include_base_44_bengali_few_shot_og_stem": {"alias": " - include_base_44_bengali_few_shot_og_stem", "acc,none": 0.24, "acc_stderr,none": 0.0610118757258932}, "include_base_44_chinese": {"acc,none": 0.23119266055045873, "acc_stderr,none": 0.01801605352849781, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.23943661971830985, "acc_stderr,none": 0.05100514327793383}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.1839080459770115, "acc_stderr,none": 0.041775406780189865}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.352112676056338, "acc_stderr,none": 0.05708756925195619}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.18309859154929578, "acc_stderr,none": 0.04622514734921429}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.19540229885057472, "acc_stderr,none": 0.04275678110973871}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.04993832453115033}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.28169014084507044, "acc_stderr,none": 0.05376414171383254}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.125, "acc_stderr,none": 0.08539125638299665}, "include_base_44_french": {"acc,none": 0.2529832935560859, "acc_stderr,none": 0.021123752255008624, "alias": "include_base_44_french"}, "include_base_44_french_few_shot_og_arts_humanities": {"alias": " - include_base_44_french_few_shot_og_arts_humanities", "acc,none": 0.23684210526315788, "acc_stderr,none": 0.02611642456080608}, "include_base_44_french_few_shot_og_driving_license": {"alias": " - include_base_44_french_few_shot_og_driving_license", "acc,none": 0.425531914893617, "acc_stderr,none": 0.07289875413448858}, "include_base_44_french_few_shot_og_health_oriented_education": {"alias": " - include_base_44_french_few_shot_og_health_oriented_education", "acc,none": 0.3125, "acc_stderr,none": 0.11967838846954226}, "include_base_44_french_few_shot_og_social_science": {"alias": " - include_base_44_french_few_shot_og_social_science", "acc,none": 0.20270270270270271, "acc_stderr,none": 0.04705204303422786}, "include_base_44_french_few_shot_og_stem": {"alias": " - include_base_44_french_few_shot_og_stem", "acc,none": 0.1875, "acc_stderr,none": 0.10077822185373188}, "include_base_44_german": {"acc,none": 0.28776978417266186, "acc_stderr,none": 0.03871051629560743, "alias": "include_base_44_german"}, "include_base_44_german_few_shot_og_driving_license": {"alias": " - include_base_44_german_few_shot_og_driving_license", "acc,none": 0.34782608695652173, "acc_stderr,none": 0.10154334054280735}, "include_base_44_german_few_shot_og_social_science": {"alias": " - include_base_44_german_few_shot_og_social_science", "acc,none": 0.26373626373626374, "acc_stderr,none": 0.04644942852497397}, "include_base_44_german_few_shot_og_stem": {"alias": " - include_base_44_german_few_shot_og_stem", "acc,none": 0.32, "acc_stderr,none": 0.09521904571390466}, "include_base_44_hindi": {"acc,none": 0.25045703839122485, "acc_stderr,none": 0.018590769145105206, "alias": "include_base_44_hindi"}, "include_base_44_hindi_few_shot_og_applied_science": {"alias": " - include_base_44_hindi_few_shot_og_applied_science", "acc,none": 0.323943661971831, "acc_stderr,none": 0.05593416612923642}, "include_base_44_hindi_few_shot_og_arts_humanities": {"alias": " - include_base_44_hindi_few_shot_og_arts_humanities", "acc,none": 0.21875, "acc_stderr,none": 0.04241380578364351}, "include_base_44_hindi_few_shot_og_driving_license": {"alias": " - include_base_44_hindi_few_shot_og_driving_license", "acc,none": 0.28169014084507044, "acc_stderr,none": 0.05376414171383253}, "include_base_44_hindi_few_shot_og_general_knowledge": {"alias": " - include_base_44_hindi_few_shot_og_general_knowledge", "acc,none": 0.2112676056338028, "acc_stderr,none": 0.048790163593489484}, "include_base_44_hindi_few_shot_og_health_oriented_education": {"alias": " - include_base_44_hindi_few_shot_og_health_oriented_education", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295697}, "include_base_44_hindi_few_shot_og_professional_certification": {"alias": " - include_base_44_hindi_few_shot_og_professional_certification", "acc,none": 0.2676056338028169, "acc_stderr,none": 0.05291406220869697}, "include_base_44_hindi_few_shot_og_social_science": {"alias": " - include_base_44_hindi_few_shot_og_social_science", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.049938324531150324}, "include_base_44_hindi_few_shot_og_stem": {"alias": " - include_base_44_hindi_few_shot_og_stem", "acc,none": 0.2, "acc_stderr,none": 0.08164965809277261}, "include_base_44_italian": {"acc,none": 0.27007299270072993, "acc_stderr,none": 0.018990364880734134, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.2, "acc_stderr,none": 0.06859943405700354}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.2994011976047904, "acc_stderr,none": 0.03554736535384932}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.3333333333333333, "acc_stderr,none": 0.14213381090374033}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.2967741935483871, "acc_stderr,none": 0.036812906368192656}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.2215568862275449, "acc_stderr,none": 0.03223309610157496}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.3333333333333333, "acc_stderr,none": 0.14213381090374033}, "include_base_44_japanese": {"acc,none": 0.2554890219560878, "acc_stderr,none": 0.019543343509575418, "alias": "include_base_44_japanese"}, "include_base_44_japanese_few_shot_og_driving_license": {"alias": " - include_base_44_japanese_few_shot_og_driving_license", "acc,none": 0.25252525252525254, "acc_stderr,none": 0.043887175543277195}, "include_base_44_japanese_few_shot_og_medical_license": {"alias": " - include_base_44_japanese_few_shot_og_medical_license", "acc,none": 0.2537313432835821, "acc_stderr,none": 0.03076944496729602}, "include_base_44_japanese_few_shot_og_professional_certification": {"alias": " - include_base_44_japanese_few_shot_og_professional_certification", "acc,none": 0.25870646766169153, "acc_stderr,none": 0.030965903123573023}, "include_base_44_korean": {"acc,none": 0.266, "acc_stderr,none": 0.019754720853100774, "alias": "include_base_44_korean"}, "include_base_44_korean_few_shot_og_professional_certification": {"alias": " - include_base_44_korean_few_shot_og_professional_certification", "acc,none": 0.236, "acc_stderr,none": 0.026909337594953835}, "include_base_44_korean_few_shot_og_social_science": {"alias": " - include_base_44_korean_few_shot_og_social_science", "acc,none": 0.296, "acc_stderr,none": 0.028928939388379635}, "include_base_44_russian": {"acc,none": 0.29891304347826086, "acc_stderr,none": 0.019494151142747253, "alias": "include_base_44_russian"}, "include_base_44_russian_few_shot_og_applied_science": {"alias": " - include_base_44_russian_few_shot_og_applied_science", "acc,none": 0.36231884057971014, "acc_stderr,none": 0.05828983090868436}, "include_base_44_russian_few_shot_og_arts_humanities": {"alias": " - include_base_44_russian_few_shot_og_arts_humanities", "acc,none": 0.3333333333333333, "acc_stderr,none": 0.05716619504750293}, "include_base_44_russian_few_shot_og_business_commerce": {"alias": " - include_base_44_russian_few_shot_og_business_commerce", "acc,none": 0.36231884057971014, "acc_stderr,none": 0.05828983090868436}, "include_base_44_russian_few_shot_og_driving_license": {"alias": " - include_base_44_russian_few_shot_og_driving_license", "acc,none": 0.14285714285714285, "acc_stderr,none": 0.06734350297014736}, "include_base_44_russian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_russian_few_shot_og_health_oriented_education", "acc,none": 0.29411764705882354, "acc_stderr,none": 0.04971495616050098}, "include_base_44_russian_few_shot_og_marine_license": {"alias": " - include_base_44_russian_few_shot_og_marine_license", "acc,none": 0.2898550724637681, "acc_stderr,none": 0.05501859437429688}, "include_base_44_russian_few_shot_og_social_science": {"alias": " - include_base_44_russian_few_shot_og_social_science", "acc,none": 0.2463768115942029, "acc_stderr,none": 0.05225436631107233}, "include_base_44_russian_few_shot_og_stem": {"alias": " - include_base_44_russian_few_shot_og_stem", "acc,none": 0.2765957446808511, "acc_stderr,none": 0.046384384968156625}, "include_base_44_spanish": {"acc,none": 0.24909090909090909, "acc_stderr,none": 0.01838392498811663, "alias": "include_base_44_spanish"}, "include_base_44_spanish_few_shot_og_arts_humanities": {"alias": " - include_base_44_spanish_few_shot_og_arts_humanities", "acc,none": 0.224, "acc_stderr,none": 0.026421361687347905}, "include_base_44_spanish_few_shot_og_health_oriented_education": {"alias": " - include_base_44_spanish_few_shot_og_health_oriented_education", "acc,none": 0.44, "acc_stderr,none": 0.10132456102380442}, "include_base_44_spanish_few_shot_og_social_science": {"alias": " - include_base_44_spanish_few_shot_og_social_science", "acc,none": 0.244, "acc_stderr,none": 0.02721799546455318}, "include_base_44_spanish_few_shot_og_stem": {"alias": " - include_base_44_spanish_few_shot_og_stem", "acc,none": 0.36, "acc_stderr,none": 0.09797958971132713}, "include_base_44_telugu": {"acc,none": 0.24452554744525548, "acc_stderr,none": 0.018285043603937484, "alias": "include_base_44_telugu"}, "include_base_44_telugu_few_shot_og_applied_science": {"alias": " - include_base_44_telugu_few_shot_og_applied_science", "acc,none": 0.1927710843373494, "acc_stderr,none": 0.030709824050565274}, "include_base_44_telugu_few_shot_og_arts_humanities": {"alias": " - include_base_44_telugu_few_shot_og_arts_humanities", "acc,none": 0.27225130890052357, "acc_stderr,none": 0.03229229447785652}, "include_base_44_telugu_few_shot_og_social_science": {"alias": " - include_base_44_telugu_few_shot_og_social_science", "acc,none": 0.23493975903614459, "acc_stderr,none": 0.03300533186128922}, "include_base_44_telugu_few_shot_og_stem": {"alias": " - include_base_44_telugu_few_shot_og_stem", "acc,none": 0.44, "acc_stderr,none": 0.10132456102380442}, "include_base_44_turkish": {"acc,none": 0.2645985401459854, "acc_stderr,none": 0.018770126638958325, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.2469879518072289, "acc_stderr,none": 0.03357351982064537}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.21084337349397592, "acc_stderr,none": 0.0317555478662992}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.29518072289156627, "acc_stderr,none": 0.035509201856896294}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.4, "acc_stderr,none": 0.06998542122237653}, "piqa": {"alias": "piqa", "acc,none": 0.5206746463547334, "acc_stderr,none": 0.011655846995729703, "acc_norm,none": 0.501088139281828, "acc_norm_stderr,none": 0.011665796539540876}, "xnli_ar": {"alias": "xnli_ar", "acc,none": 0.3385542168674699, "acc_stderr,none": 0.009485250208516883}, "xnli_de": {"alias": "xnli_de", "acc,none": 0.3345381526104418, "acc_stderr,none": 0.009457404390939166}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.3357429718875502, "acc_stderr,none": 0.009465838617337347}, "xnli_es": {"alias": "xnli_es", "acc,none": 0.3365461847389558, "acc_stderr,none": 0.009471423054177133}, "xnli_fr": {"alias": "xnli_fr", "acc,none": 0.3313253012048193, "acc_stderr,none": 0.009434574056101966}, "xnli_hi": {"alias": "xnli_hi", "acc,none": 0.3381526104417671, "acc_stderr,none": 0.00948250005798103}, "xnli_ru": {"alias": "xnli_ru", "acc,none": 0.3309236947791165, "acc_stderr,none": 0.009431685461463305}, "xnli_sw": {"alias": "xnli_sw", "acc,none": 0.3353413654618474, "acc_stderr,none": 0.009463034891512703}, "xnli_th": {"alias": "xnli_th", "acc,none": 0.3281124497991968, "acc_stderr,none": 0.009411247685593387}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.3333333333333333, "acc_stderr,none": 0.009448900914617609}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.3413654618473896, "acc_stderr,none": 0.009504288078880218}} +{"created_at": "2025-06-25T18:30:41.085054", "global_step": 4500, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.1962457337883959, "acc_stderr,none": 0.011606019881416288, "acc_norm,none": 0.2551194539249147, "acc_norm_stderr,none": 0.012739038695202098}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.25547138047138046, "acc_stderr,none": 0.008949113551665557, "acc_norm,none": 0.257996632996633, "acc_norm_stderr,none": 0.008977970005203404}, "belebele_arb_Arab": {"alias": "belebele_arb_Arab", "acc,none": 0.23666666666666666, "acc_stderr,none": 0.014175742474391954, "acc_norm,none": 0.23666666666666666, "acc_norm_stderr,none": 0.014175742474391954}, "belebele_ben_Beng": {"alias": "belebele_ben_Beng", "acc,none": 0.21666666666666667, "acc_stderr,none": 0.01374008783070017, "acc_norm,none": 0.21666666666666667, "acc_norm_stderr,none": 0.01374008783070017}, "belebele_deu_Latn": {"alias": "belebele_deu_Latn", "acc,none": 0.26222222222222225, "acc_stderr,none": 0.014669580202217886, "acc_norm,none": 0.26222222222222225, "acc_norm_stderr,none": 0.014669580202217886}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.27111111111111114, "acc_stderr,none": 0.014826016446581962, "acc_norm,none": 0.27111111111111114, "acc_norm_stderr,none": 0.014826016446581962}, "belebele_fra_Latn": {"alias": "belebele_fra_Latn", "acc,none": 0.2811111111111111, "acc_stderr,none": 0.014993051943690185, "acc_norm,none": 0.2811111111111111, "acc_norm_stderr,none": 0.014993051943690185}, "belebele_hin_Deva": {"alias": "belebele_hin_Deva", "acc,none": 0.22111111111111112, "acc_stderr,none": 0.013840863699859532, "acc_norm,none": 0.22111111111111112, "acc_norm_stderr,none": 0.013840863699859532}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.2677777777777778, "acc_stderr,none": 0.014768244481214544, "acc_norm,none": 0.2677777777777778, "acc_norm_stderr,none": 0.014768244481214544}, "belebele_jpn_Jpan": {"alias": "belebele_jpn_Jpan", "acc,none": 0.2111111111111111, "acc_stderr,none": 0.013610798969328477, "acc_norm,none": 0.2111111111111111, "acc_norm_stderr,none": 0.013610798969328477}, "belebele_kor_Hang": {"alias": "belebele_kor_Hang", "acc,none": 0.22555555555555556, "acc_stderr,none": 0.013939334910458158, "acc_norm,none": 0.22555555555555556, "acc_norm_stderr,none": 0.013939334910458158}, "belebele_rus_Cyrl": {"alias": "belebele_rus_Cyrl", "acc,none": 0.21888888888888888, "acc_stderr,none": 0.013790766978256938, "acc_norm,none": 0.21888888888888888, "acc_norm_stderr,none": 0.013790766978256938}, "belebele_spa_Latn": {"alias": "belebele_spa_Latn", "acc,none": 0.27555555555555555, "acc_stderr,none": 0.014901407215241896, "acc_norm,none": 0.27555555555555555, "acc_norm_stderr,none": 0.014901407215241896}, "belebele_swh_Latn": {"alias": "belebele_swh_Latn", "acc,none": 0.27666666666666667, "acc_stderr,none": 0.014919965280947366, "acc_norm,none": 0.27666666666666667, "acc_norm_stderr,none": 0.014919965280947366}, "belebele_tel_Telu": {"alias": "belebele_tel_Telu", "acc,none": 0.21222222222222223, "acc_stderr,none": 0.013636956209422683, "acc_norm,none": 0.21222222222222223, "acc_norm_stderr,none": 0.013636956209422683}, "belebele_tha_Thai": {"alias": "belebele_tha_Thai", "acc,none": 0.22, "acc_stderr,none": 0.013815887744596808, "acc_norm,none": 0.22, "acc_norm_stderr,none": 0.013815887744596808}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.2788888888888889, "acc_stderr,none": 0.014956736888683089, "acc_norm,none": 0.2788888888888889, "acc_norm_stderr,none": 0.014956736888683089}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.23222222222222222, "acc_stderr,none": 0.014082825936214571, "acc_norm,none": 0.23222222222222222, "acc_norm_stderr,none": 0.014082825936214571}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.25871340370444135, "acc_stderr,none": 0.0043703282248317874, "acc_norm,none": 0.262796255725951, "acc_norm_stderr,none": 0.004392531344297404}, "include_base_44_arabic": {"acc,none": 0.2608695652173913, "acc_stderr,none": 0.018724873643444495, "alias": "include_base_44_arabic"}, "include_base_44_arabic_few_shot_og_arts_humanities": {"alias": " - include_base_44_arabic_few_shot_og_arts_humanities", "acc,none": 0.21904761904761905, "acc_stderr,none": 0.040556911537178254}, "include_base_44_arabic_few_shot_og_business_commerce": {"alias": " - include_base_44_arabic_few_shot_og_business_commerce", "acc,none": 0.24390243902439024, "acc_stderr,none": 0.04771495084460435}, "include_base_44_arabic_few_shot_og_driving_license": {"alias": " - include_base_44_arabic_few_shot_og_driving_license", "acc,none": 0.26666666666666666, "acc_stderr,none": 0.04336290903919941}, "include_base_44_arabic_few_shot_og_general_knowledge": {"alias": " - include_base_44_arabic_few_shot_og_general_knowledge", "acc,none": 0.2857142857142857, "acc_stderr,none": 0.04429811949614586}, "include_base_44_arabic_few_shot_og_social_science": {"alias": " - include_base_44_arabic_few_shot_og_social_science", "acc,none": 0.3142857142857143, "acc_stderr,none": 0.045521571818039494}, "include_base_44_arabic_few_shot_og_stem": {"alias": " - include_base_44_arabic_few_shot_og_stem", "acc,none": 0.2, "acc_stderr,none": 0.05714285714285715}, "include_base_44_bengali": {"acc,none": 0.23722627737226276, "acc_stderr,none": 0.01818892762520953, "alias": "include_base_44_bengali"}, "include_base_44_bengali_few_shot_og_arts_humanities": {"alias": " - include_base_44_bengali_few_shot_og_arts_humanities", "acc,none": 0.22289156626506024, "acc_stderr,none": 0.03240004825594687}, "include_base_44_bengali_few_shot_og_general_knowledge": {"alias": " - include_base_44_bengali_few_shot_og_general_knowledge", "acc,none": 0.20481927710843373, "acc_stderr,none": 0.03141784291663926}, "include_base_44_bengali_few_shot_og_professional_certification": {"alias": " - include_base_44_bengali_few_shot_og_professional_certification", "acc,none": 0.26506024096385544, "acc_stderr,none": 0.03436024037944967}, "include_base_44_bengali_few_shot_og_stem": {"alias": " - include_base_44_bengali_few_shot_og_stem", "acc,none": 0.3, "acc_stderr,none": 0.06546536707079771}, "include_base_44_chinese": {"acc,none": 0.25321100917431194, "acc_stderr,none": 0.018680033480683703, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.2112676056338028, "acc_stderr,none": 0.0487901635934895}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.22988505747126436, "acc_stderr,none": 0.045371581852507746}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.051995626882956975}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.30985915492957744, "acc_stderr,none": 0.055271596817883316}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.19540229885057472, "acc_stderr,none": 0.0427567811097387}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.28169014084507044, "acc_stderr,none": 0.05376414171383253}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.29577464788732394, "acc_stderr,none": 0.05454906121418898}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.3125, "acc_stderr,none": 0.11967838846954226}, "include_base_44_french": {"acc,none": 0.2577565632458234, "acc_stderr,none": 0.02130267335661609, "alias": "include_base_44_french"}, "include_base_44_french_few_shot_og_arts_humanities": {"alias": " - include_base_44_french_few_shot_og_arts_humanities", "acc,none": 0.2556390977443609, "acc_stderr,none": 0.026796771802814458}, "include_base_44_french_few_shot_og_driving_license": {"alias": " - include_base_44_french_few_shot_og_driving_license", "acc,none": 0.3829787234042553, "acc_stderr,none": 0.0716734777251339}, "include_base_44_french_few_shot_og_health_oriented_education": {"alias": " - include_base_44_french_few_shot_og_health_oriented_education", "acc,none": 0.1875, "acc_stderr,none": 0.10077822185373188}, "include_base_44_french_few_shot_og_social_science": {"alias": " - include_base_44_french_few_shot_og_social_science", "acc,none": 0.24324324324324326, "acc_stderr,none": 0.05021542194205402}, "include_base_44_french_few_shot_og_stem": {"alias": " - include_base_44_french_few_shot_og_stem", "acc,none": 0.0625, "acc_stderr,none": 0.0625}, "include_base_44_german": {"acc,none": 0.30935251798561153, "acc_stderr,none": 0.03945733595129815, "alias": "include_base_44_german"}, "include_base_44_german_few_shot_og_driving_license": {"alias": " - include_base_44_german_few_shot_og_driving_license", "acc,none": 0.21739130434782608, "acc_stderr,none": 0.08793911249520549}, "include_base_44_german_few_shot_og_social_science": {"alias": " - include_base_44_german_few_shot_og_social_science", "acc,none": 0.31868131868131866, "acc_stderr,none": 0.04911704114831278}, "include_base_44_german_few_shot_og_stem": {"alias": " - include_base_44_german_few_shot_og_stem", "acc,none": 0.36, "acc_stderr,none": 0.09797958971132713}, "include_base_44_hindi": {"acc,none": 0.24862888482632542, "acc_stderr,none": 0.01846506414144623, "alias": "include_base_44_hindi"}, "include_base_44_hindi_few_shot_og_applied_science": {"alias": " - include_base_44_hindi_few_shot_og_applied_science", "acc,none": 0.19718309859154928, "acc_stderr,none": 0.047554769059532744}, "include_base_44_hindi_few_shot_og_arts_humanities": {"alias": " - include_base_44_hindi_few_shot_og_arts_humanities", "acc,none": 0.3125, "acc_stderr,none": 0.047555369390792634}, "include_base_44_hindi_few_shot_og_driving_license": {"alias": " - include_base_44_hindi_few_shot_og_driving_license", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.051995626882956954}, "include_base_44_hindi_few_shot_og_general_knowledge": {"alias": " - include_base_44_hindi_few_shot_og_general_knowledge", "acc,none": 0.18309859154929578, "acc_stderr,none": 0.04622514734921427}, "include_base_44_hindi_few_shot_og_health_oriented_education": {"alias": " - include_base_44_hindi_few_shot_og_health_oriented_education", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295697}, "include_base_44_hindi_few_shot_og_professional_certification": {"alias": " - include_base_44_hindi_few_shot_og_professional_certification", "acc,none": 0.29577464788732394, "acc_stderr,none": 0.054549061214188996}, "include_base_44_hindi_few_shot_og_social_science": {"alias": " - include_base_44_hindi_few_shot_og_social_science", "acc,none": 0.18309859154929578, "acc_stderr,none": 0.0462251473492143}, "include_base_44_hindi_few_shot_og_stem": {"alias": " - include_base_44_hindi_few_shot_og_stem", "acc,none": 0.36, "acc_stderr,none": 0.09797958971132711}, "include_base_44_italian": {"acc,none": 0.27007299270072993, "acc_stderr,none": 0.019006497635187865, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.3142857142857143, "acc_stderr,none": 0.07961491954505552}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.31137724550898205, "acc_stderr,none": 0.03594016584565771}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.16666666666666666, "acc_stderr,none": 0.11236664374387369}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.25806451612903225, "acc_stderr,none": 0.03526036935484003}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.24550898203592814, "acc_stderr,none": 0.0334046315394559}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.16666666666666666, "acc_stderr,none": 0.11236664374387367}, "include_base_44_japanese": {"acc,none": 0.21756487025948104, "acc_stderr,none": 0.018485877000780394, "alias": "include_base_44_japanese"}, "include_base_44_japanese_few_shot_og_driving_license": {"alias": " - include_base_44_japanese_few_shot_og_driving_license", "acc,none": 0.2222222222222222, "acc_stderr,none": 0.04199605255658083}, "include_base_44_japanese_few_shot_og_medical_license": {"alias": " - include_base_44_japanese_few_shot_og_medical_license", "acc,none": 0.22388059701492538, "acc_stderr,none": 0.029475250236017197}, "include_base_44_japanese_few_shot_og_professional_certification": {"alias": " - include_base_44_japanese_few_shot_og_professional_certification", "acc,none": 0.208955223880597, "acc_stderr,none": 0.028748298931728655}, "include_base_44_korean": {"acc,none": 0.268, "acc_stderr,none": 0.01979574413715676, "alias": "include_base_44_korean"}, "include_base_44_korean_few_shot_og_professional_certification": {"alias": " - include_base_44_korean_few_shot_og_professional_certification", "acc,none": 0.236, "acc_stderr,none": 0.02690933759495387}, "include_base_44_korean_few_shot_og_social_science": {"alias": " - include_base_44_korean_few_shot_og_social_science", "acc,none": 0.3, "acc_stderr,none": 0.029040893477575845}, "include_base_44_russian": {"acc,none": 0.302536231884058, "acc_stderr,none": 0.019593026573840003, "alias": "include_base_44_russian"}, "include_base_44_russian_few_shot_og_applied_science": {"alias": " - include_base_44_russian_few_shot_og_applied_science", "acc,none": 0.34782608695652173, "acc_stderr,none": 0.05775749253522357}, "include_base_44_russian_few_shot_og_arts_humanities": {"alias": " - include_base_44_russian_few_shot_og_arts_humanities", "acc,none": 0.34782608695652173, "acc_stderr,none": 0.05775749253522358}, "include_base_44_russian_few_shot_og_business_commerce": {"alias": " - include_base_44_russian_few_shot_og_business_commerce", "acc,none": 0.2608695652173913, "acc_stderr,none": 0.05324977701702559}, "include_base_44_russian_few_shot_og_driving_license": {"alias": " - include_base_44_russian_few_shot_og_driving_license", "acc,none": 0.2857142857142857, "acc_stderr,none": 0.08694008849288351}, "include_base_44_russian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_russian_few_shot_og_health_oriented_education", "acc,none": 0.25882352941176473, "acc_stderr,none": 0.047788461203740945}, "include_base_44_russian_few_shot_og_marine_license": {"alias": " - include_base_44_russian_few_shot_og_marine_license", "acc,none": 0.36231884057971014, "acc_stderr,none": 0.05828983090868436}, "include_base_44_russian_few_shot_og_social_science": {"alias": " - include_base_44_russian_few_shot_og_social_science", "acc,none": 0.3333333333333333, "acc_stderr,none": 0.05716619504750292}, "include_base_44_russian_few_shot_og_stem": {"alias": " - include_base_44_russian_few_shot_og_stem", "acc,none": 0.24468085106382978, "acc_stderr,none": 0.04457832688439451}, "include_base_44_spanish": {"acc,none": 0.26545454545454544, "acc_stderr,none": 0.01886736218917087, "alias": "include_base_44_spanish"}, "include_base_44_spanish_few_shot_og_arts_humanities": {"alias": " - include_base_44_spanish_few_shot_og_arts_humanities", "acc,none": 0.248, "acc_stderr,none": 0.027367497504863544}, "include_base_44_spanish_few_shot_og_health_oriented_education": {"alias": " - include_base_44_spanish_few_shot_og_health_oriented_education", "acc,none": 0.36, "acc_stderr,none": 0.09797958971132713}, "include_base_44_spanish_few_shot_og_social_science": {"alias": " - include_base_44_spanish_few_shot_og_social_science", "acc,none": 0.276, "acc_stderr,none": 0.028328537274211342}, "include_base_44_spanish_few_shot_og_stem": {"alias": " - include_base_44_spanish_few_shot_og_stem", "acc,none": 0.24, "acc_stderr,none": 0.08717797887081345}, "include_base_44_telugu": {"acc,none": 0.2645985401459854, "acc_stderr,none": 0.01888856976846673, "alias": "include_base_44_telugu"}, "include_base_44_telugu_few_shot_og_applied_science": {"alias": " - include_base_44_telugu_few_shot_og_applied_science", "acc,none": 0.25903614457831325, "acc_stderr,none": 0.034106466140718536}, "include_base_44_telugu_few_shot_og_arts_humanities": {"alias": " - include_base_44_telugu_few_shot_og_arts_humanities", "acc,none": 0.2513089005235602, "acc_stderr,none": 0.03146867590755503}, "include_base_44_telugu_few_shot_og_social_science": {"alias": " - include_base_44_telugu_few_shot_og_social_science", "acc,none": 0.2710843373493976, "acc_stderr,none": 0.03460579907553026}, "include_base_44_telugu_few_shot_og_stem": {"alias": " - include_base_44_telugu_few_shot_og_stem", "acc,none": 0.36, "acc_stderr,none": 0.09797958971132711}, "include_base_44_turkish": {"acc,none": 0.25364963503649635, "acc_stderr,none": 0.01859355564364867, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.22289156626506024, "acc_stderr,none": 0.03240004825594688}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.23493975903614459, "acc_stderr,none": 0.03300533186128922}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.27710843373493976, "acc_stderr,none": 0.03484331592680588}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.34, "acc_stderr,none": 0.0676726816132972}, "piqa": {"alias": "piqa", "acc,none": 0.5375408052230686, "acc_stderr,none": 0.011632896120570523, "acc_norm,none": 0.5, "acc_norm_stderr,none": 0.011665824165343952}, "xnli_ar": {"alias": "xnli_ar", "acc,none": 0.3393574297188755, "acc_stderr,none": 0.009490727635646753}, "xnli_de": {"alias": "xnli_de", "acc,none": 0.3313253012048193, "acc_stderr,none": 0.009434574056101966}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.3305220883534137, "acc_stderr,none": 0.009428789109289827}, "xnli_es": {"alias": "xnli_es", "acc,none": 0.321285140562249, "acc_stderr,none": 0.009360015122426901}, "xnli_fr": {"alias": "xnli_fr", "acc,none": 0.3337349397590361, "acc_stderr,none": 0.009451743112667053}, "xnli_hi": {"alias": "xnli_hi", "acc,none": 0.3333333333333333, "acc_stderr,none": 0.009448900914617609}, "xnli_ru": {"alias": "xnli_ru", "acc,none": 0.3373493975903614, "acc_stderr,none": 0.009476976849778586}, "xnli_sw": {"alias": "xnli_sw", "acc,none": 0.3373493975903614, "acc_stderr,none": 0.009476976849778607}, "xnli_th": {"alias": "xnli_th", "acc,none": 0.3220883534136546, "acc_stderr,none": 0.009366160807072059}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.3369477911646586, "acc_stderr,none": 0.009474203778757706}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.334136546184739, "acc_stderr,none": 0.009454577602463626}} +{"created_at": "2025-06-25T18:53:48.950238", "global_step": 5000, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.20051194539249148, "acc_stderr,none": 0.011700318050499368, "acc_norm,none": 0.25426621160409557, "acc_norm_stderr,none": 0.012724999945157736}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.2702020202020202, "acc_stderr,none": 0.00911200222911985, "acc_norm,none": 0.26304713804713803, "acc_norm_stderr,none": 0.009034514898865822}, "belebele_arb_Arab": {"alias": "belebele_arb_Arab", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_ben_Beng": {"alias": "belebele_ben_Beng", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884506, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884506}, "belebele_deu_Latn": {"alias": "belebele_deu_Latn", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884509, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884509}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_fra_Latn": {"alias": "belebele_fra_Latn", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_hin_Deva": {"alias": "belebele_hin_Deva", "acc,none": 0.22444444444444445, "acc_stderr,none": 0.013914930474237287, "acc_norm,none": 0.22444444444444445, "acc_norm_stderr,none": 0.013914930474237287}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.23, "acc_stderr,none": 0.014035549969945592, "acc_norm,none": 0.23, "acc_norm_stderr,none": 0.014035549969945592}, "belebele_jpn_Jpan": {"alias": "belebele_jpn_Jpan", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_kor_Hang": {"alias": "belebele_kor_Hang", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.01401170515888452, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.01401170515888452}, "belebele_rus_Cyrl": {"alias": "belebele_rus_Cyrl", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884504, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884504}, "belebele_spa_Latn": {"alias": "belebele_spa_Latn", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_swh_Latn": {"alias": "belebele_swh_Latn", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_tel_Telu": {"alias": "belebele_tel_Telu", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884526, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884526}, "belebele_tha_Thai": {"alias": "belebele_tha_Thai", "acc,none": 0.22555555555555556, "acc_stderr,none": 0.013939334910458115, "acc_norm,none": 0.22555555555555556, "acc_norm_stderr,none": 0.013939334910458115}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.23, "acc_stderr,none": 0.014035549969945592, "acc_norm,none": 0.23, "acc_norm_stderr,none": 0.014035549969945592}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.2592113124875523, "acc_stderr,none": 0.004373062283376512, "acc_norm,none": 0.2597092212706632, "acc_norm_stderr,none": 0.004375788991216851}, "include_base_44_arabic": {"acc,none": 0.2246376811594203, "acc_stderr,none": 0.01773798469050555, "alias": "include_base_44_arabic"}, "include_base_44_arabic_few_shot_og_arts_humanities": {"alias": " - include_base_44_arabic_few_shot_og_arts_humanities", "acc,none": 0.24761904761904763, "acc_stderr,none": 0.04232473532055043}, "include_base_44_arabic_few_shot_og_business_commerce": {"alias": " - include_base_44_arabic_few_shot_og_business_commerce", "acc,none": 0.17073170731707318, "acc_stderr,none": 0.04180826184428596}, "include_base_44_arabic_few_shot_og_driving_license": {"alias": " - include_base_44_arabic_few_shot_og_driving_license", "acc,none": 0.29523809523809524, "acc_stderr,none": 0.044729159560441434}, "include_base_44_arabic_few_shot_og_general_knowledge": {"alias": " - include_base_44_arabic_few_shot_og_general_knowledge", "acc,none": 0.24761904761904763, "acc_stderr,none": 0.0423247353205504}, "include_base_44_arabic_few_shot_og_social_science": {"alias": " - include_base_44_arabic_few_shot_og_social_science", "acc,none": 0.1619047619047619, "acc_stderr,none": 0.036121005232589175}, "include_base_44_arabic_few_shot_og_stem": {"alias": " - include_base_44_arabic_few_shot_og_stem", "acc,none": 0.2, "acc_stderr,none": 0.057142857142857134}, "include_base_44_bengali": {"acc,none": 0.29014598540145986, "acc_stderr,none": 0.01939486736491474, "alias": "include_base_44_bengali"}, "include_base_44_bengali_few_shot_og_arts_humanities": {"alias": " - include_base_44_bengali_few_shot_og_arts_humanities", "acc,none": 0.3132530120481928, "acc_stderr,none": 0.036108050180310235}, "include_base_44_bengali_few_shot_og_general_knowledge": {"alias": " - include_base_44_bengali_few_shot_og_general_knowledge", "acc,none": 0.3253012048192771, "acc_stderr,none": 0.036471685236832294}, "include_base_44_bengali_few_shot_og_professional_certification": {"alias": " - include_base_44_bengali_few_shot_og_professional_certification", "acc,none": 0.2469879518072289, "acc_stderr,none": 0.03357351982064536}, "include_base_44_bengali_few_shot_og_stem": {"alias": " - include_base_44_bengali_few_shot_og_stem", "acc,none": 0.24, "acc_stderr,none": 0.06101187572589323}, "include_base_44_chinese": {"acc,none": 0.24587155963302754, "acc_stderr,none": 0.018517539247711934, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.049938324531150324}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.25287356321839083, "acc_stderr,none": 0.04687049503854671}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.323943661971831, "acc_stderr,none": 0.05593416612923642}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.23943661971830985, "acc_stderr,none": 0.05100514327793383}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.2413793103448276, "acc_stderr,none": 0.04614377668264891}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.051995626882956975}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.19718309859154928, "acc_stderr,none": 0.04755476905953276}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.1875, "acc_stderr,none": 0.10077822185373188}, "include_base_44_french": {"acc,none": 0.2529832935560859, "acc_stderr,none": 0.021175427759512762, "alias": "include_base_44_french"}, "include_base_44_french_few_shot_og_arts_humanities": {"alias": " - include_base_44_french_few_shot_og_arts_humanities", "acc,none": 0.22556390977443608, "acc_stderr,none": 0.025674657674902242}, "include_base_44_french_few_shot_og_driving_license": {"alias": " - include_base_44_french_few_shot_og_driving_license", "acc,none": 0.40425531914893614, "acc_stderr,none": 0.07235674844413013}, "include_base_44_french_few_shot_og_health_oriented_education": {"alias": " - include_base_44_french_few_shot_og_health_oriented_education", "acc,none": 0.1875, "acc_stderr,none": 0.10077822185373188}, "include_base_44_french_few_shot_og_social_science": {"alias": " - include_base_44_french_few_shot_og_social_science", "acc,none": 0.25675675675675674, "acc_stderr,none": 0.051128728837659454}, "include_base_44_french_few_shot_og_stem": {"alias": " - include_base_44_french_few_shot_og_stem", "acc,none": 0.3125, "acc_stderr,none": 0.11967838846954226}, "include_base_44_german": {"acc,none": 0.3237410071942446, "acc_stderr,none": 0.03810450206175078, "alias": "include_base_44_german"}, "include_base_44_german_few_shot_og_driving_license": {"alias": " - include_base_44_german_few_shot_og_driving_license", "acc,none": 0.6521739130434783, "acc_stderr,none": 0.10154334054280735}, "include_base_44_german_few_shot_og_social_science": {"alias": " - include_base_44_german_few_shot_og_social_science", "acc,none": 0.26373626373626374, "acc_stderr,none": 0.04644942852497396}, "include_base_44_german_few_shot_og_stem": {"alias": " - include_base_44_german_few_shot_og_stem", "acc,none": 0.24, "acc_stderr,none": 0.08717797887081347}, "include_base_44_hindi": {"acc,none": 0.2413162705667276, "acc_stderr,none": 0.018360767807872203, "alias": "include_base_44_hindi"}, "include_base_44_hindi_few_shot_og_applied_science": {"alias": " - include_base_44_hindi_few_shot_og_applied_science", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295698}, "include_base_44_hindi_few_shot_og_arts_humanities": {"alias": " - include_base_44_hindi_few_shot_og_arts_humanities", "acc,none": 0.1875, "acc_stderr,none": 0.040045204719824215}, "include_base_44_hindi_few_shot_og_driving_license": {"alias": " - include_base_44_hindi_few_shot_og_driving_license", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.04993832453115031}, "include_base_44_hindi_few_shot_og_general_knowledge": {"alias": " - include_base_44_hindi_few_shot_og_general_knowledge", "acc,none": 0.29577464788732394, "acc_stderr,none": 0.05454906121418899}, "include_base_44_hindi_few_shot_og_health_oriented_education": {"alias": " - include_base_44_hindi_few_shot_og_health_oriented_education", "acc,none": 0.2676056338028169, "acc_stderr,none": 0.05291406220869697}, "include_base_44_hindi_few_shot_og_professional_certification": {"alias": " - include_base_44_hindi_few_shot_og_professional_certification", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295697}, "include_base_44_hindi_few_shot_og_social_science": {"alias": " - include_base_44_hindi_few_shot_og_social_science", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295697}, "include_base_44_hindi_few_shot_og_stem": {"alias": " - include_base_44_hindi_few_shot_og_stem", "acc,none": 0.16, "acc_stderr,none": 0.0748331477354788}, "include_base_44_italian": {"acc,none": 0.2572992700729927, "acc_stderr,none": 0.018668571387726075, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.17142857142857143, "acc_stderr,none": 0.06463490595976273}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.2994011976047904, "acc_stderr,none": 0.03554736535384932}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.25, "acc_stderr,none": 0.1305582419667734}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.2903225806451613, "acc_stderr,none": 0.03657720706540911}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.20359281437125748, "acc_stderr,none": 0.031253219622833416}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.25, "acc_stderr,none": 0.1305582419667734}, "include_base_44_japanese": {"acc,none": 0.2634730538922156, "acc_stderr,none": 0.01972538101830729, "alias": "include_base_44_japanese"}, "include_base_44_japanese_few_shot_og_driving_license": {"alias": " - include_base_44_japanese_few_shot_og_driving_license", "acc,none": 0.24242424242424243, "acc_stderr,none": 0.04329004329004327}, "include_base_44_japanese_few_shot_og_medical_license": {"alias": " - include_base_44_japanese_few_shot_og_medical_license", "acc,none": 0.2835820895522388, "acc_stderr,none": 0.03187187537919797}, "include_base_44_japanese_few_shot_og_professional_certification": {"alias": " - include_base_44_japanese_few_shot_og_professional_certification", "acc,none": 0.2537313432835821, "acc_stderr,none": 0.030769444967296024}, "include_base_44_korean": {"acc,none": 0.234, "acc_stderr,none": 0.01896985560532149, "alias": "include_base_44_korean"}, "include_base_44_korean_few_shot_og_professional_certification": {"alias": " - include_base_44_korean_few_shot_og_professional_certification", "acc,none": 0.24, "acc_stderr,none": 0.027065293652239003}, "include_base_44_korean_few_shot_og_social_science": {"alias": " - include_base_44_korean_few_shot_og_social_science", "acc,none": 0.228, "acc_stderr,none": 0.026587432487268487}, "include_base_44_russian": {"acc,none": 0.266304347826087, "acc_stderr,none": 0.018662425786691965, "alias": "include_base_44_russian"}, "include_base_44_russian_few_shot_og_applied_science": {"alias": " - include_base_44_russian_few_shot_og_applied_science", "acc,none": 0.2028985507246377, "acc_stderr,none": 0.04876877147472662}, "include_base_44_russian_few_shot_og_arts_humanities": {"alias": " - include_base_44_russian_few_shot_og_arts_humanities", "acc,none": 0.2028985507246377, "acc_stderr,none": 0.0487687714747266}, "include_base_44_russian_few_shot_og_business_commerce": {"alias": " - include_base_44_russian_few_shot_og_business_commerce", "acc,none": 0.2608695652173913, "acc_stderr,none": 0.05324977701702559}, "include_base_44_russian_few_shot_og_driving_license": {"alias": " - include_base_44_russian_few_shot_og_driving_license", "acc,none": 0.21428571428571427, "acc_stderr,none": 0.07896725691322383}, "include_base_44_russian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_russian_few_shot_og_health_oriented_education", "acc,none": 0.38823529411764707, "acc_stderr,none": 0.05317409082203827}, "include_base_44_russian_few_shot_og_marine_license": {"alias": " - include_base_44_russian_few_shot_og_marine_license", "acc,none": 0.37681159420289856, "acc_stderr,none": 0.05876481248527006}, "include_base_44_russian_few_shot_og_social_science": {"alias": " - include_base_44_russian_few_shot_og_social_science", "acc,none": 0.2608695652173913, "acc_stderr,none": 0.05324977701702559}, "include_base_44_russian_few_shot_og_stem": {"alias": " - include_base_44_russian_few_shot_og_stem", "acc,none": 0.19148936170212766, "acc_stderr,none": 0.040801273706799875}, "include_base_44_spanish": {"acc,none": 0.2672727272727273, "acc_stderr,none": 0.018850411012442, "alias": "include_base_44_spanish"}, "include_base_44_spanish_few_shot_og_arts_humanities": {"alias": " - include_base_44_spanish_few_shot_og_arts_humanities", "acc,none": 0.228, "acc_stderr,none": 0.026587432487268466}, "include_base_44_spanish_few_shot_og_health_oriented_education": {"alias": " - include_base_44_spanish_few_shot_og_health_oriented_education", "acc,none": 0.32, "acc_stderr,none": 0.09521904571390466}, "include_base_44_spanish_few_shot_og_social_science": {"alias": " - include_base_44_spanish_few_shot_og_social_science", "acc,none": 0.288, "acc_stderr,none": 0.028697004587398222}, "include_base_44_spanish_few_shot_og_stem": {"alias": " - include_base_44_spanish_few_shot_og_stem", "acc,none": 0.4, "acc_stderr,none": 0.1}, "include_base_44_telugu": {"acc,none": 0.24087591240875914, "acc_stderr,none": 0.018261531404880172, "alias": "include_base_44_telugu"}, "include_base_44_telugu_few_shot_og_applied_science": {"alias": " - include_base_44_telugu_few_shot_og_applied_science", "acc,none": 0.2469879518072289, "acc_stderr,none": 0.03357351982064536}, "include_base_44_telugu_few_shot_og_arts_humanities": {"alias": " - include_base_44_telugu_few_shot_og_arts_humanities", "acc,none": 0.24083769633507854, "acc_stderr,none": 0.031020782537814432}, "include_base_44_telugu_few_shot_og_social_science": {"alias": " - include_base_44_telugu_few_shot_og_social_science", "acc,none": 0.21084337349397592, "acc_stderr,none": 0.0317555478662992}, "include_base_44_telugu_few_shot_og_stem": {"alias": " - include_base_44_telugu_few_shot_og_stem", "acc,none": 0.4, "acc_stderr,none": 0.1}, "include_base_44_turkish": {"acc,none": 0.29014598540145986, "acc_stderr,none": 0.01938786453909353, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.3192771084337349, "acc_stderr,none": 0.036293353299478595}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.25903614457831325, "acc_stderr,none": 0.034106466140718564}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.26506024096385544, "acc_stderr,none": 0.03436024037944966}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.38, "acc_stderr,none": 0.06934092056863767}, "piqa": {"alias": "piqa", "acc,none": 0.5266594124047879, "acc_stderr,none": 0.011649229994347386, "acc_norm,none": 0.5048966267682263, "acc_norm_stderr,none": 0.011665264730078142}, "xnli_ar": {"alias": "xnli_ar", "acc,none": 0.3248995983935743, "acc_stderr,none": 0.00938742158168576}, "xnli_de": {"alias": "xnli_de", "acc,none": 0.3329317269076305, "acc_stderr,none": 0.009446051001358225}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.3437751004016064, "acc_stderr,none": 0.009520310502882944}, "xnli_es": {"alias": "xnli_es", "acc,none": 0.3309236947791165, "acc_stderr,none": 0.009431685461463304}, "xnli_fr": {"alias": "xnli_fr", "acc,none": 0.3337349397590361, "acc_stderr,none": 0.009451743112667053}, "xnli_hi": {"alias": "xnli_hi", "acc,none": 0.3321285140562249, "acc_stderr,none": 0.009440328001240637}, "xnli_ru": {"alias": "xnli_ru", "acc,none": 0.3337349397590361, "acc_stderr,none": 0.009451743112667057}, "xnli_sw": {"alias": "xnli_sw", "acc,none": 0.3345381526104418, "acc_stderr,none": 0.009457404390939166}, "xnli_th": {"alias": "xnli_th", "acc,none": 0.3465863453815261, "acc_stderr,none": 0.009538660220458996}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.334136546184739, "acc_stderr,none": 0.009454577602463621}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.3329317269076305, "acc_stderr,none": 0.009446051001358228}} +{"created_at": "2025-06-25T19:20:07.042213", "global_step": 5500, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.2030716723549488, "acc_stderr,none": 0.011755899303705582, "acc_norm,none": 0.25170648464163825, "acc_norm_stderr,none": 0.012682496334042961}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.26304713804713803, "acc_stderr,none": 0.009034514898865824, "acc_norm,none": 0.26346801346801346, "acc_norm_stderr,none": 0.009039157374497717}, "belebele_arb_Arab": {"alias": "belebele_arb_Arab", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_ben_Beng": {"alias": "belebele_ben_Beng", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884506, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884506}, "belebele_deu_Latn": {"alias": "belebele_deu_Latn", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884509, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884509}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_fra_Latn": {"alias": "belebele_fra_Latn", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_hin_Deva": {"alias": "belebele_hin_Deva", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884528, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884528}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_jpn_Jpan": {"alias": "belebele_jpn_Jpan", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_kor_Hang": {"alias": "belebele_kor_Hang", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.01401170515888452, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.01401170515888452}, "belebele_rus_Cyrl": {"alias": "belebele_rus_Cyrl", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884504, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884504}, "belebele_spa_Latn": {"alias": "belebele_spa_Latn", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_swh_Latn": {"alias": "belebele_swh_Latn", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_tel_Telu": {"alias": "belebele_tel_Telu", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884526, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884526}, "belebele_tha_Thai": {"alias": "belebele_tha_Thai", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.25941047600079664, "acc_stderr,none": 0.004374153847826759, "acc_norm,none": 0.2610037841067516, "acc_norm_stderr,none": 0.004382844128643421}, "include_base_44_arabic": {"acc,none": 0.2246376811594203, "acc_stderr,none": 0.01773798469050555, "alias": "include_base_44_arabic"}, "include_base_44_arabic_few_shot_og_arts_humanities": {"alias": " - include_base_44_arabic_few_shot_og_arts_humanities", "acc,none": 0.24761904761904763, "acc_stderr,none": 0.04232473532055043}, "include_base_44_arabic_few_shot_og_business_commerce": {"alias": " - include_base_44_arabic_few_shot_og_business_commerce", "acc,none": 0.17073170731707318, "acc_stderr,none": 0.04180826184428596}, "include_base_44_arabic_few_shot_og_driving_license": {"alias": " - include_base_44_arabic_few_shot_og_driving_license", "acc,none": 0.29523809523809524, "acc_stderr,none": 0.044729159560441434}, "include_base_44_arabic_few_shot_og_general_knowledge": {"alias": " - include_base_44_arabic_few_shot_og_general_knowledge", "acc,none": 0.24761904761904763, "acc_stderr,none": 0.0423247353205504}, "include_base_44_arabic_few_shot_og_social_science": {"alias": " - include_base_44_arabic_few_shot_og_social_science", "acc,none": 0.1619047619047619, "acc_stderr,none": 0.036121005232589175}, "include_base_44_arabic_few_shot_og_stem": {"alias": " - include_base_44_arabic_few_shot_og_stem", "acc,none": 0.2, "acc_stderr,none": 0.057142857142857134}, "include_base_44_bengali": {"acc,none": 0.29014598540145986, "acc_stderr,none": 0.01939486736491474, "alias": "include_base_44_bengali"}, "include_base_44_bengali_few_shot_og_arts_humanities": {"alias": " - include_base_44_bengali_few_shot_og_arts_humanities", "acc,none": 0.3132530120481928, "acc_stderr,none": 0.036108050180310235}, "include_base_44_bengali_few_shot_og_general_knowledge": {"alias": " - include_base_44_bengali_few_shot_og_general_knowledge", "acc,none": 0.3253012048192771, "acc_stderr,none": 0.036471685236832294}, "include_base_44_bengali_few_shot_og_professional_certification": {"alias": " - include_base_44_bengali_few_shot_og_professional_certification", "acc,none": 0.2469879518072289, "acc_stderr,none": 0.03357351982064536}, "include_base_44_bengali_few_shot_og_stem": {"alias": " - include_base_44_bengali_few_shot_og_stem", "acc,none": 0.24, "acc_stderr,none": 0.06101187572589323}, "include_base_44_chinese": {"acc,none": 0.24587155963302754, "acc_stderr,none": 0.018517539247711934, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.049938324531150324}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.25287356321839083, "acc_stderr,none": 0.04687049503854671}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.323943661971831, "acc_stderr,none": 0.05593416612923642}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.23943661971830985, "acc_stderr,none": 0.05100514327793383}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.2413793103448276, "acc_stderr,none": 0.04614377668264891}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.051995626882956975}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.19718309859154928, "acc_stderr,none": 0.04755476905953276}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.1875, "acc_stderr,none": 0.10077822185373188}, "include_base_44_french": {"acc,none": 0.2529832935560859, "acc_stderr,none": 0.021175427759512762, "alias": "include_base_44_french"}, "include_base_44_french_few_shot_og_arts_humanities": {"alias": " - include_base_44_french_few_shot_og_arts_humanities", "acc,none": 0.22556390977443608, "acc_stderr,none": 0.025674657674902242}, "include_base_44_french_few_shot_og_driving_license": {"alias": " - include_base_44_french_few_shot_og_driving_license", "acc,none": 0.40425531914893614, "acc_stderr,none": 0.07235674844413013}, "include_base_44_french_few_shot_og_health_oriented_education": {"alias": " - include_base_44_french_few_shot_og_health_oriented_education", "acc,none": 0.1875, "acc_stderr,none": 0.10077822185373188}, "include_base_44_french_few_shot_og_social_science": {"alias": " - include_base_44_french_few_shot_og_social_science", "acc,none": 0.25675675675675674, "acc_stderr,none": 0.051128728837659454}, "include_base_44_french_few_shot_og_stem": {"alias": " - include_base_44_french_few_shot_og_stem", "acc,none": 0.3125, "acc_stderr,none": 0.11967838846954226}, "include_base_44_german": {"acc,none": 0.3237410071942446, "acc_stderr,none": 0.03810450206175078, "alias": "include_base_44_german"}, "include_base_44_german_few_shot_og_driving_license": {"alias": " - include_base_44_german_few_shot_og_driving_license", "acc,none": 0.6521739130434783, "acc_stderr,none": 0.10154334054280735}, "include_base_44_german_few_shot_og_social_science": {"alias": " - include_base_44_german_few_shot_og_social_science", "acc,none": 0.26373626373626374, "acc_stderr,none": 0.04644942852497396}, "include_base_44_german_few_shot_og_stem": {"alias": " - include_base_44_german_few_shot_og_stem", "acc,none": 0.24, "acc_stderr,none": 0.08717797887081347}, "include_base_44_hindi": {"acc,none": 0.23948811700182815, "acc_stderr,none": 0.01830198403573232, "alias": "include_base_44_hindi"}, "include_base_44_hindi_few_shot_og_applied_science": {"alias": " - include_base_44_hindi_few_shot_og_applied_science", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295698}, "include_base_44_hindi_few_shot_og_arts_humanities": {"alias": " - include_base_44_hindi_few_shot_og_arts_humanities", "acc,none": 0.17708333333333334, "acc_stderr,none": 0.03916561682056947}, "include_base_44_hindi_few_shot_og_driving_license": {"alias": " - include_base_44_hindi_few_shot_og_driving_license", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.04993832453115031}, "include_base_44_hindi_few_shot_og_general_knowledge": {"alias": " - include_base_44_hindi_few_shot_og_general_knowledge", "acc,none": 0.29577464788732394, "acc_stderr,none": 0.05454906121418899}, "include_base_44_hindi_few_shot_og_health_oriented_education": {"alias": " - include_base_44_hindi_few_shot_og_health_oriented_education", "acc,none": 0.2676056338028169, "acc_stderr,none": 0.05291406220869697}, "include_base_44_hindi_few_shot_og_professional_certification": {"alias": " - include_base_44_hindi_few_shot_og_professional_certification", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295697}, "include_base_44_hindi_few_shot_og_social_science": {"alias": " - include_base_44_hindi_few_shot_og_social_science", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295697}, "include_base_44_hindi_few_shot_og_stem": {"alias": " - include_base_44_hindi_few_shot_og_stem", "acc,none": 0.16, "acc_stderr,none": 0.0748331477354788}, "include_base_44_italian": {"acc,none": 0.2572992700729927, "acc_stderr,none": 0.018668571387726075, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.17142857142857143, "acc_stderr,none": 0.06463490595976273}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.2994011976047904, "acc_stderr,none": 0.03554736535384932}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.25, "acc_stderr,none": 0.1305582419667734}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.2903225806451613, "acc_stderr,none": 0.03657720706540911}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.20359281437125748, "acc_stderr,none": 0.031253219622833416}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.25, "acc_stderr,none": 0.1305582419667734}, "include_base_44_japanese": {"acc,none": 0.2634730538922156, "acc_stderr,none": 0.01972538101830729, "alias": "include_base_44_japanese"}, "include_base_44_japanese_few_shot_og_driving_license": {"alias": " - include_base_44_japanese_few_shot_og_driving_license", "acc,none": 0.24242424242424243, "acc_stderr,none": 0.04329004329004327}, "include_base_44_japanese_few_shot_og_medical_license": {"alias": " - include_base_44_japanese_few_shot_og_medical_license", "acc,none": 0.2835820895522388, "acc_stderr,none": 0.03187187537919797}, "include_base_44_japanese_few_shot_og_professional_certification": {"alias": " - include_base_44_japanese_few_shot_og_professional_certification", "acc,none": 0.2537313432835821, "acc_stderr,none": 0.030769444967296024}, "include_base_44_korean": {"acc,none": 0.234, "acc_stderr,none": 0.01896985560532149, "alias": "include_base_44_korean"}, "include_base_44_korean_few_shot_og_professional_certification": {"alias": " - include_base_44_korean_few_shot_og_professional_certification", "acc,none": 0.24, "acc_stderr,none": 0.027065293652239003}, "include_base_44_korean_few_shot_og_social_science": {"alias": " - include_base_44_korean_few_shot_og_social_science", "acc,none": 0.228, "acc_stderr,none": 0.026587432487268487}, "include_base_44_russian": {"acc,none": 0.266304347826087, "acc_stderr,none": 0.018662425786691965, "alias": "include_base_44_russian"}, "include_base_44_russian_few_shot_og_applied_science": {"alias": " - include_base_44_russian_few_shot_og_applied_science", "acc,none": 0.2028985507246377, "acc_stderr,none": 0.04876877147472662}, "include_base_44_russian_few_shot_og_arts_humanities": {"alias": " - include_base_44_russian_few_shot_og_arts_humanities", "acc,none": 0.2028985507246377, "acc_stderr,none": 0.0487687714747266}, "include_base_44_russian_few_shot_og_business_commerce": {"alias": " - include_base_44_russian_few_shot_og_business_commerce", "acc,none": 0.2608695652173913, "acc_stderr,none": 0.05324977701702559}, "include_base_44_russian_few_shot_og_driving_license": {"alias": " - include_base_44_russian_few_shot_og_driving_license", "acc,none": 0.21428571428571427, "acc_stderr,none": 0.07896725691322383}, "include_base_44_russian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_russian_few_shot_og_health_oriented_education", "acc,none": 0.38823529411764707, "acc_stderr,none": 0.05317409082203827}, "include_base_44_russian_few_shot_og_marine_license": {"alias": " - include_base_44_russian_few_shot_og_marine_license", "acc,none": 0.37681159420289856, "acc_stderr,none": 0.05876481248527006}, "include_base_44_russian_few_shot_og_social_science": {"alias": " - include_base_44_russian_few_shot_og_social_science", "acc,none": 0.2608695652173913, "acc_stderr,none": 0.05324977701702559}, "include_base_44_russian_few_shot_og_stem": {"alias": " - include_base_44_russian_few_shot_og_stem", "acc,none": 0.19148936170212766, "acc_stderr,none": 0.040801273706799875}, "include_base_44_spanish": {"acc,none": 0.2672727272727273, "acc_stderr,none": 0.018850411012442, "alias": "include_base_44_spanish"}, "include_base_44_spanish_few_shot_og_arts_humanities": {"alias": " - include_base_44_spanish_few_shot_og_arts_humanities", "acc,none": 0.228, "acc_stderr,none": 0.026587432487268466}, "include_base_44_spanish_few_shot_og_health_oriented_education": {"alias": " - include_base_44_spanish_few_shot_og_health_oriented_education", "acc,none": 0.32, "acc_stderr,none": 0.09521904571390466}, "include_base_44_spanish_few_shot_og_social_science": {"alias": " - include_base_44_spanish_few_shot_og_social_science", "acc,none": 0.288, "acc_stderr,none": 0.028697004587398222}, "include_base_44_spanish_few_shot_og_stem": {"alias": " - include_base_44_spanish_few_shot_og_stem", "acc,none": 0.4, "acc_stderr,none": 0.1}, "include_base_44_telugu": {"acc,none": 0.24087591240875914, "acc_stderr,none": 0.018261531404880172, "alias": "include_base_44_telugu"}, "include_base_44_telugu_few_shot_og_applied_science": {"alias": " - include_base_44_telugu_few_shot_og_applied_science", "acc,none": 0.2469879518072289, "acc_stderr,none": 0.03357351982064536}, "include_base_44_telugu_few_shot_og_arts_humanities": {"alias": " - include_base_44_telugu_few_shot_og_arts_humanities", "acc,none": 0.24083769633507854, "acc_stderr,none": 0.031020782537814432}, "include_base_44_telugu_few_shot_og_social_science": {"alias": " - include_base_44_telugu_few_shot_og_social_science", "acc,none": 0.21084337349397592, "acc_stderr,none": 0.0317555478662992}, "include_base_44_telugu_few_shot_og_stem": {"alias": " - include_base_44_telugu_few_shot_og_stem", "acc,none": 0.4, "acc_stderr,none": 0.1}, "include_base_44_turkish": {"acc,none": 0.29014598540145986, "acc_stderr,none": 0.01938786453909353, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.3192771084337349, "acc_stderr,none": 0.036293353299478595}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.25903614457831325, "acc_stderr,none": 0.034106466140718564}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.26506024096385544, "acc_stderr,none": 0.03436024037944966}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.38, "acc_stderr,none": 0.06934092056863767}, "piqa": {"alias": "piqa", "acc,none": 0.5184983677910773, "acc_stderr,none": 0.011657837583818156, "acc_norm,none": 0.5016322089227421, "acc_norm_stderr,none": 0.011665762007194873}, "xnli_ar": {"alias": "xnli_ar", "acc,none": 0.3465863453815261, "acc_stderr,none": 0.009538660220458996}, "xnli_de": {"alias": "xnli_de", "acc,none": 0.3333333333333333, "acc_stderr,none": 0.009448900914617609}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.3441767068273092, "acc_stderr,none": 0.009522954469806033}, "xnli_es": {"alias": "xnli_es", "acc,none": 0.3325301204819277, "acc_stderr,none": 0.00944319336590334}, "xnli_fr": {"alias": "xnli_fr", "acc,none": 0.3325301204819277, "acc_stderr,none": 0.00944319336590334}, "xnli_hi": {"alias": "xnli_hi", "acc,none": 0.3337349397590361, "acc_stderr,none": 0.009451743112667057}, "xnli_ru": {"alias": "xnli_ru", "acc,none": 0.3345381526104418, "acc_stderr,none": 0.009457404390939166}, "xnli_sw": {"alias": "xnli_sw", "acc,none": 0.327710843373494, "acc_stderr,none": 0.009408296828870672}, "xnli_th": {"alias": "xnli_th", "acc,none": 0.3321285140562249, "acc_stderr,none": 0.009440328001240636}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.3333333333333333, "acc_stderr,none": 0.009448900914617609}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.3329317269076305, "acc_stderr,none": 0.009446051001358223}} +{"created_at": "2025-06-25T19:45:39.472510", "global_step": 6000, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.20477815699658702, "acc_stderr,none": 0.011792544338513395, "acc_norm,none": 0.257679180887372, "acc_norm_stderr,none": 0.012780770562768412}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.26262626262626265, "acc_stderr,none": 0.009029861776763754, "acc_norm,none": 0.2638888888888889, "acc_norm_stderr,none": 0.009043789220055129}, "belebele_arb_Arab": {"alias": "belebele_arb_Arab", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_ben_Beng": {"alias": "belebele_ben_Beng", "acc,none": 0.23, "acc_stderr,none": 0.014035549969945594, "acc_norm,none": 0.23, "acc_norm_stderr,none": 0.014035549969945594}, "belebele_deu_Latn": {"alias": "belebele_deu_Latn", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884509, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884509}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_fra_Latn": {"alias": "belebele_fra_Latn", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_hin_Deva": {"alias": "belebele_hin_Deva", "acc,none": 0.2311111111111111, "acc_stderr,none": 0.014059256663218193, "acc_norm,none": 0.2311111111111111, "acc_norm_stderr,none": 0.014059256663218193}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_jpn_Jpan": {"alias": "belebele_jpn_Jpan", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_kor_Hang": {"alias": "belebele_kor_Hang", "acc,none": 0.23, "acc_stderr,none": 0.014035549969945584, "acc_norm,none": 0.23, "acc_norm_stderr,none": 0.014035549969945584}, "belebele_rus_Cyrl": {"alias": "belebele_rus_Cyrl", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884504, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884504}, "belebele_spa_Latn": {"alias": "belebele_spa_Latn", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_swh_Latn": {"alias": "belebele_swh_Latn", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_tel_Telu": {"alias": "belebele_tel_Telu", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884526, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884526}, "belebele_tha_Thai": {"alias": "belebele_tha_Thai", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.26110336586337385, "acc_stderr,none": 0.004383384784038473, "acc_norm,none": 0.26030671181039633, "acc_norm_stderr,none": 0.0043790513570241395}, "include_base_44_arabic": {"acc,none": 0.2246376811594203, "acc_stderr,none": 0.01773798469050555, "alias": "include_base_44_arabic"}, "include_base_44_arabic_few_shot_og_arts_humanities": {"alias": " - include_base_44_arabic_few_shot_og_arts_humanities", "acc,none": 0.24761904761904763, "acc_stderr,none": 0.04232473532055043}, "include_base_44_arabic_few_shot_og_business_commerce": {"alias": " - include_base_44_arabic_few_shot_og_business_commerce", "acc,none": 0.17073170731707318, "acc_stderr,none": 0.04180826184428596}, "include_base_44_arabic_few_shot_og_driving_license": {"alias": " - include_base_44_arabic_few_shot_og_driving_license", "acc,none": 0.29523809523809524, "acc_stderr,none": 0.044729159560441434}, "include_base_44_arabic_few_shot_og_general_knowledge": {"alias": " - include_base_44_arabic_few_shot_og_general_knowledge", "acc,none": 0.24761904761904763, "acc_stderr,none": 0.0423247353205504}, "include_base_44_arabic_few_shot_og_social_science": {"alias": " - include_base_44_arabic_few_shot_og_social_science", "acc,none": 0.1619047619047619, "acc_stderr,none": 0.036121005232589175}, "include_base_44_arabic_few_shot_og_stem": {"alias": " - include_base_44_arabic_few_shot_og_stem", "acc,none": 0.2, "acc_stderr,none": 0.057142857142857134}, "include_base_44_bengali": {"acc,none": 0.29014598540145986, "acc_stderr,none": 0.01939486736491474, "alias": "include_base_44_bengali"}, "include_base_44_bengali_few_shot_og_arts_humanities": {"alias": " - include_base_44_bengali_few_shot_og_arts_humanities", "acc,none": 0.3132530120481928, "acc_stderr,none": 0.036108050180310235}, "include_base_44_bengali_few_shot_og_general_knowledge": {"alias": " - include_base_44_bengali_few_shot_og_general_knowledge", "acc,none": 0.3253012048192771, "acc_stderr,none": 0.036471685236832294}, "include_base_44_bengali_few_shot_og_professional_certification": {"alias": " - include_base_44_bengali_few_shot_og_professional_certification", "acc,none": 0.2469879518072289, "acc_stderr,none": 0.03357351982064536}, "include_base_44_bengali_few_shot_og_stem": {"alias": " - include_base_44_bengali_few_shot_og_stem", "acc,none": 0.24, "acc_stderr,none": 0.06101187572589323}, "include_base_44_chinese": {"acc,none": 0.24587155963302754, "acc_stderr,none": 0.018517539247711934, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.049938324531150324}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.25287356321839083, "acc_stderr,none": 0.04687049503854671}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.323943661971831, "acc_stderr,none": 0.05593416612923642}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.23943661971830985, "acc_stderr,none": 0.05100514327793383}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.2413793103448276, "acc_stderr,none": 0.04614377668264891}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.051995626882956975}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.19718309859154928, "acc_stderr,none": 0.04755476905953276}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.1875, "acc_stderr,none": 0.10077822185373188}, "include_base_44_french": {"acc,none": 0.2553699284009546, "acc_stderr,none": 0.021239711488784258, "alias": "include_base_44_french"}, "include_base_44_french_few_shot_og_arts_humanities": {"alias": " - include_base_44_french_few_shot_og_arts_humanities", "acc,none": 0.22556390977443608, "acc_stderr,none": 0.025674657674902242}, "include_base_44_french_few_shot_og_driving_license": {"alias": " - include_base_44_french_few_shot_og_driving_license", "acc,none": 0.40425531914893614, "acc_stderr,none": 0.07235674844413013}, "include_base_44_french_few_shot_og_health_oriented_education": {"alias": " - include_base_44_french_few_shot_og_health_oriented_education", "acc,none": 0.1875, "acc_stderr,none": 0.10077822185373188}, "include_base_44_french_few_shot_og_social_science": {"alias": " - include_base_44_french_few_shot_og_social_science", "acc,none": 0.2702702702702703, "acc_stderr,none": 0.05197789984508372}, "include_base_44_french_few_shot_og_stem": {"alias": " - include_base_44_french_few_shot_og_stem", "acc,none": 0.3125, "acc_stderr,none": 0.11967838846954226}, "include_base_44_german": {"acc,none": 0.3237410071942446, "acc_stderr,none": 0.03810450206175078, "alias": "include_base_44_german"}, "include_base_44_german_few_shot_og_driving_license": {"alias": " - include_base_44_german_few_shot_og_driving_license", "acc,none": 0.6521739130434783, "acc_stderr,none": 0.10154334054280735}, "include_base_44_german_few_shot_og_social_science": {"alias": " - include_base_44_german_few_shot_og_social_science", "acc,none": 0.26373626373626374, "acc_stderr,none": 0.04644942852497396}, "include_base_44_german_few_shot_og_stem": {"alias": " - include_base_44_german_few_shot_og_stem", "acc,none": 0.24, "acc_stderr,none": 0.08717797887081347}, "include_base_44_hindi": {"acc,none": 0.23948811700182815, "acc_stderr,none": 0.01830198403573232, "alias": "include_base_44_hindi"}, "include_base_44_hindi_few_shot_og_applied_science": {"alias": " - include_base_44_hindi_few_shot_og_applied_science", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295698}, "include_base_44_hindi_few_shot_og_arts_humanities": {"alias": " - include_base_44_hindi_few_shot_og_arts_humanities", "acc,none": 0.17708333333333334, "acc_stderr,none": 0.03916561682056947}, "include_base_44_hindi_few_shot_og_driving_license": {"alias": " - include_base_44_hindi_few_shot_og_driving_license", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.04993832453115031}, "include_base_44_hindi_few_shot_og_general_knowledge": {"alias": " - include_base_44_hindi_few_shot_og_general_knowledge", "acc,none": 0.29577464788732394, "acc_stderr,none": 0.05454906121418899}, "include_base_44_hindi_few_shot_og_health_oriented_education": {"alias": " - include_base_44_hindi_few_shot_og_health_oriented_education", "acc,none": 0.2676056338028169, "acc_stderr,none": 0.05291406220869697}, "include_base_44_hindi_few_shot_og_professional_certification": {"alias": " - include_base_44_hindi_few_shot_og_professional_certification", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295697}, "include_base_44_hindi_few_shot_og_social_science": {"alias": " - include_base_44_hindi_few_shot_og_social_science", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295697}, "include_base_44_hindi_few_shot_og_stem": {"alias": " - include_base_44_hindi_few_shot_og_stem", "acc,none": 0.16, "acc_stderr,none": 0.0748331477354788}, "include_base_44_italian": {"acc,none": 0.2572992700729927, "acc_stderr,none": 0.018668571387726075, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.17142857142857143, "acc_stderr,none": 0.06463490595976273}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.2994011976047904, "acc_stderr,none": 0.03554736535384932}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.25, "acc_stderr,none": 0.1305582419667734}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.2903225806451613, "acc_stderr,none": 0.03657720706540911}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.20359281437125748, "acc_stderr,none": 0.031253219622833416}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.25, "acc_stderr,none": 0.1305582419667734}, "include_base_44_japanese": {"acc,none": 0.2634730538922156, "acc_stderr,none": 0.01972538101830729, "alias": "include_base_44_japanese"}, "include_base_44_japanese_few_shot_og_driving_license": {"alias": " - include_base_44_japanese_few_shot_og_driving_license", "acc,none": 0.24242424242424243, "acc_stderr,none": 0.04329004329004327}, "include_base_44_japanese_few_shot_og_medical_license": {"alias": " - include_base_44_japanese_few_shot_og_medical_license", "acc,none": 0.2835820895522388, "acc_stderr,none": 0.03187187537919797}, "include_base_44_japanese_few_shot_og_professional_certification": {"alias": " - include_base_44_japanese_few_shot_og_professional_certification", "acc,none": 0.2537313432835821, "acc_stderr,none": 0.030769444967296024}, "include_base_44_korean": {"acc,none": 0.234, "acc_stderr,none": 0.01896985560532149, "alias": "include_base_44_korean"}, "include_base_44_korean_few_shot_og_professional_certification": {"alias": " - include_base_44_korean_few_shot_og_professional_certification", "acc,none": 0.24, "acc_stderr,none": 0.027065293652239003}, "include_base_44_korean_few_shot_og_social_science": {"alias": " - include_base_44_korean_few_shot_og_social_science", "acc,none": 0.228, "acc_stderr,none": 0.026587432487268487}, "include_base_44_russian": {"acc,none": 0.266304347826087, "acc_stderr,none": 0.018662425786691965, "alias": "include_base_44_russian"}, "include_base_44_russian_few_shot_og_applied_science": {"alias": " - include_base_44_russian_few_shot_og_applied_science", "acc,none": 0.2028985507246377, "acc_stderr,none": 0.04876877147472662}, "include_base_44_russian_few_shot_og_arts_humanities": {"alias": " - include_base_44_russian_few_shot_og_arts_humanities", "acc,none": 0.2028985507246377, "acc_stderr,none": 0.0487687714747266}, "include_base_44_russian_few_shot_og_business_commerce": {"alias": " - include_base_44_russian_few_shot_og_business_commerce", "acc,none": 0.2608695652173913, "acc_stderr,none": 0.05324977701702559}, "include_base_44_russian_few_shot_og_driving_license": {"alias": " - include_base_44_russian_few_shot_og_driving_license", "acc,none": 0.21428571428571427, "acc_stderr,none": 0.07896725691322383}, "include_base_44_russian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_russian_few_shot_og_health_oriented_education", "acc,none": 0.38823529411764707, "acc_stderr,none": 0.05317409082203827}, "include_base_44_russian_few_shot_og_marine_license": {"alias": " - include_base_44_russian_few_shot_og_marine_license", "acc,none": 0.37681159420289856, "acc_stderr,none": 0.05876481248527006}, "include_base_44_russian_few_shot_og_social_science": {"alias": " - include_base_44_russian_few_shot_og_social_science", "acc,none": 0.2608695652173913, "acc_stderr,none": 0.05324977701702559}, "include_base_44_russian_few_shot_og_stem": {"alias": " - include_base_44_russian_few_shot_og_stem", "acc,none": 0.19148936170212766, "acc_stderr,none": 0.040801273706799875}, "include_base_44_spanish": {"acc,none": 0.2672727272727273, "acc_stderr,none": 0.018850411012442, "alias": "include_base_44_spanish"}, "include_base_44_spanish_few_shot_og_arts_humanities": {"alias": " - include_base_44_spanish_few_shot_og_arts_humanities", "acc,none": 0.228, "acc_stderr,none": 0.026587432487268466}, "include_base_44_spanish_few_shot_og_health_oriented_education": {"alias": " - include_base_44_spanish_few_shot_og_health_oriented_education", "acc,none": 0.32, "acc_stderr,none": 0.09521904571390466}, "include_base_44_spanish_few_shot_og_social_science": {"alias": " - include_base_44_spanish_few_shot_og_social_science", "acc,none": 0.288, "acc_stderr,none": 0.028697004587398222}, "include_base_44_spanish_few_shot_og_stem": {"alias": " - include_base_44_spanish_few_shot_og_stem", "acc,none": 0.4, "acc_stderr,none": 0.1}, "include_base_44_telugu": {"acc,none": 0.2354014598540146, "acc_stderr,none": 0.018110101339169696, "alias": "include_base_44_telugu"}, "include_base_44_telugu_few_shot_og_applied_science": {"alias": " - include_base_44_telugu_few_shot_og_applied_science", "acc,none": 0.2469879518072289, "acc_stderr,none": 0.03357351982064536}, "include_base_44_telugu_few_shot_og_arts_humanities": {"alias": " - include_base_44_telugu_few_shot_og_arts_humanities", "acc,none": 0.23036649214659685, "acc_stderr,none": 0.030547441226520547}, "include_base_44_telugu_few_shot_og_social_science": {"alias": " - include_base_44_telugu_few_shot_og_social_science", "acc,none": 0.20481927710843373, "acc_stderr,none": 0.03141784291663926}, "include_base_44_telugu_few_shot_og_stem": {"alias": " - include_base_44_telugu_few_shot_og_stem", "acc,none": 0.4, "acc_stderr,none": 0.1}, "include_base_44_turkish": {"acc,none": 0.29014598540145986, "acc_stderr,none": 0.01938786453909353, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.3192771084337349, "acc_stderr,none": 0.036293353299478595}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.25903614457831325, "acc_stderr,none": 0.034106466140718564}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.26506024096385544, "acc_stderr,none": 0.03436024037944966}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.38, "acc_stderr,none": 0.06934092056863767}, "piqa": {"alias": "piqa", "acc,none": 0.530467899891186, "acc_stderr,none": 0.011644145418354871, "acc_norm,none": 0.49020674646354734, "acc_norm_stderr,none": 0.011663586263283223}, "xnli_ar": {"alias": "xnli_ar", "acc,none": 0.3461847389558233, "acc_stderr,none": 0.009536061379898335}, "xnli_de": {"alias": "xnli_de", "acc,none": 0.3333333333333333, "acc_stderr,none": 0.009448900914617609}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.3333333333333333, "acc_stderr,none": 0.00944890091461762}, "xnli_es": {"alias": "xnli_es", "acc,none": 0.3333333333333333, "acc_stderr,none": 0.009448900914617614}, "xnli_fr": {"alias": "xnli_fr", "acc,none": 0.3329317269076305, "acc_stderr,none": 0.009446051001358226}, "xnli_hi": {"alias": "xnli_hi", "acc,none": 0.3333333333333333, "acc_stderr,none": 0.009448900914617609}, "xnli_ru": {"alias": "xnli_ru", "acc,none": 0.3333333333333333, "acc_stderr,none": 0.009448900914617609}, "xnli_sw": {"alias": "xnli_sw", "acc,none": 0.327710843373494, "acc_stderr,none": 0.009408296828870672}, "xnli_th": {"alias": "xnli_th", "acc,none": 0.3285140562248996, "acc_stderr,none": 0.009414190734131758}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.3333333333333333, "acc_stderr,none": 0.009448900914617609}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.3293172690763052, "acc_stderr,none": 0.009420053435910411}} +{"created_at": "2025-06-25T20:13:02.400185", "global_step": 6500, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.19539249146757678, "acc_stderr,none": 0.01158690718995291, "acc_norm,none": 0.24573378839590443, "acc_norm_stderr,none": 0.012581033453730111}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.2596801346801347, "acc_stderr,none": 0.008996990428562217, "acc_norm,none": 0.2668350168350168, "acc_norm_stderr,none": 0.009075915859267264}, "belebele_arb_Arab": {"alias": "belebele_arb_Arab", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_ben_Beng": {"alias": "belebele_ben_Beng", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884506, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884506}, "belebele_deu_Latn": {"alias": "belebele_deu_Latn", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884509, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884509}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_fra_Latn": {"alias": "belebele_fra_Latn", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_hin_Deva": {"alias": "belebele_hin_Deva", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884528, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884528}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_jpn_Jpan": {"alias": "belebele_jpn_Jpan", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_kor_Hang": {"alias": "belebele_kor_Hang", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.01401170515888452, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.01401170515888452}, "belebele_rus_Cyrl": {"alias": "belebele_rus_Cyrl", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884504, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884504}, "belebele_spa_Latn": {"alias": "belebele_spa_Latn", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_swh_Latn": {"alias": "belebele_swh_Latn", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_tel_Telu": {"alias": "belebele_tel_Telu", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884526, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884526}, "belebele_tha_Thai": {"alias": "belebele_tha_Thai", "acc,none": 0.22777777777777777, "acc_stderr,none": 0.013987721523687971, "acc_norm,none": 0.22777777777777777, "acc_norm_stderr,none": 0.013987721523687971}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.26319458275243973, "acc_stderr,none": 0.004394671271021432, "acc_norm,none": 0.26249751045608444, "acc_norm_stderr,none": 0.0043909233532005605}, "include_base_44_arabic": {"acc,none": 0.2246376811594203, "acc_stderr,none": 0.01773798469050555, "alias": "include_base_44_arabic"}, "include_base_44_arabic_few_shot_og_arts_humanities": {"alias": " - include_base_44_arabic_few_shot_og_arts_humanities", "acc,none": 0.24761904761904763, "acc_stderr,none": 0.04232473532055043}, "include_base_44_arabic_few_shot_og_business_commerce": {"alias": " - include_base_44_arabic_few_shot_og_business_commerce", "acc,none": 0.17073170731707318, "acc_stderr,none": 0.04180826184428596}, "include_base_44_arabic_few_shot_og_driving_license": {"alias": " - include_base_44_arabic_few_shot_og_driving_license", "acc,none": 0.29523809523809524, "acc_stderr,none": 0.044729159560441434}, "include_base_44_arabic_few_shot_og_general_knowledge": {"alias": " - include_base_44_arabic_few_shot_og_general_knowledge", "acc,none": 0.24761904761904763, "acc_stderr,none": 0.0423247353205504}, "include_base_44_arabic_few_shot_og_social_science": {"alias": " - include_base_44_arabic_few_shot_og_social_science", "acc,none": 0.1619047619047619, "acc_stderr,none": 0.036121005232589175}, "include_base_44_arabic_few_shot_og_stem": {"alias": " - include_base_44_arabic_few_shot_og_stem", "acc,none": 0.2, "acc_stderr,none": 0.057142857142857134}, "include_base_44_bengali": {"acc,none": 0.29014598540145986, "acc_stderr,none": 0.01939486736491474, "alias": "include_base_44_bengali"}, "include_base_44_bengali_few_shot_og_arts_humanities": {"alias": " - include_base_44_bengali_few_shot_og_arts_humanities", "acc,none": 0.3132530120481928, "acc_stderr,none": 0.036108050180310235}, "include_base_44_bengali_few_shot_og_general_knowledge": {"alias": " - include_base_44_bengali_few_shot_og_general_knowledge", "acc,none": 0.3253012048192771, "acc_stderr,none": 0.036471685236832294}, "include_base_44_bengali_few_shot_og_professional_certification": {"alias": " - include_base_44_bengali_few_shot_og_professional_certification", "acc,none": 0.2469879518072289, "acc_stderr,none": 0.03357351982064536}, "include_base_44_bengali_few_shot_og_stem": {"alias": " - include_base_44_bengali_few_shot_og_stem", "acc,none": 0.24, "acc_stderr,none": 0.06101187572589323}, "include_base_44_chinese": {"acc,none": 0.24587155963302754, "acc_stderr,none": 0.018517539247711934, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.049938324531150324}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.25287356321839083, "acc_stderr,none": 0.04687049503854671}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.323943661971831, "acc_stderr,none": 0.05593416612923642}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.23943661971830985, "acc_stderr,none": 0.05100514327793383}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.2413793103448276, "acc_stderr,none": 0.04614377668264891}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.051995626882956975}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.19718309859154928, "acc_stderr,none": 0.04755476905953276}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.1875, "acc_stderr,none": 0.10077822185373188}, "include_base_44_french": {"acc,none": 0.2529832935560859, "acc_stderr,none": 0.021175427759512762, "alias": "include_base_44_french"}, "include_base_44_french_few_shot_og_arts_humanities": {"alias": " - include_base_44_french_few_shot_og_arts_humanities", "acc,none": 0.22556390977443608, "acc_stderr,none": 0.025674657674902242}, "include_base_44_french_few_shot_og_driving_license": {"alias": " - include_base_44_french_few_shot_og_driving_license", "acc,none": 0.40425531914893614, "acc_stderr,none": 0.07235674844413013}, "include_base_44_french_few_shot_og_health_oriented_education": {"alias": " - include_base_44_french_few_shot_og_health_oriented_education", "acc,none": 0.1875, "acc_stderr,none": 0.10077822185373188}, "include_base_44_french_few_shot_og_social_science": {"alias": " - include_base_44_french_few_shot_og_social_science", "acc,none": 0.25675675675675674, "acc_stderr,none": 0.051128728837659454}, "include_base_44_french_few_shot_og_stem": {"alias": " - include_base_44_french_few_shot_og_stem", "acc,none": 0.3125, "acc_stderr,none": 0.11967838846954226}, "include_base_44_german": {"acc,none": 0.3237410071942446, "acc_stderr,none": 0.03810450206175078, "alias": "include_base_44_german"}, "include_base_44_german_few_shot_og_driving_license": {"alias": " - include_base_44_german_few_shot_og_driving_license", "acc,none": 0.6521739130434783, "acc_stderr,none": 0.10154334054280735}, "include_base_44_german_few_shot_og_social_science": {"alias": " - include_base_44_german_few_shot_og_social_science", "acc,none": 0.26373626373626374, "acc_stderr,none": 0.04644942852497396}, "include_base_44_german_few_shot_og_stem": {"alias": " - include_base_44_german_few_shot_og_stem", "acc,none": 0.24, "acc_stderr,none": 0.08717797887081347}, "include_base_44_hindi": {"acc,none": 0.23948811700182815, "acc_stderr,none": 0.01830198403573232, "alias": "include_base_44_hindi"}, "include_base_44_hindi_few_shot_og_applied_science": {"alias": " - include_base_44_hindi_few_shot_og_applied_science", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295698}, "include_base_44_hindi_few_shot_og_arts_humanities": {"alias": " - include_base_44_hindi_few_shot_og_arts_humanities", "acc,none": 0.17708333333333334, "acc_stderr,none": 0.03916561682056947}, "include_base_44_hindi_few_shot_og_driving_license": {"alias": " - include_base_44_hindi_few_shot_og_driving_license", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.04993832453115031}, "include_base_44_hindi_few_shot_og_general_knowledge": {"alias": " - include_base_44_hindi_few_shot_og_general_knowledge", "acc,none": 0.29577464788732394, "acc_stderr,none": 0.05454906121418899}, "include_base_44_hindi_few_shot_og_health_oriented_education": {"alias": " - include_base_44_hindi_few_shot_og_health_oriented_education", "acc,none": 0.2676056338028169, "acc_stderr,none": 0.05291406220869697}, "include_base_44_hindi_few_shot_og_professional_certification": {"alias": " - include_base_44_hindi_few_shot_og_professional_certification", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295697}, "include_base_44_hindi_few_shot_og_social_science": {"alias": " - include_base_44_hindi_few_shot_og_social_science", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295697}, "include_base_44_hindi_few_shot_og_stem": {"alias": " - include_base_44_hindi_few_shot_og_stem", "acc,none": 0.16, "acc_stderr,none": 0.0748331477354788}, "include_base_44_italian": {"acc,none": 0.2572992700729927, "acc_stderr,none": 0.018668571387726075, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.17142857142857143, "acc_stderr,none": 0.06463490595976273}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.2994011976047904, "acc_stderr,none": 0.03554736535384932}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.25, "acc_stderr,none": 0.1305582419667734}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.2903225806451613, "acc_stderr,none": 0.03657720706540911}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.20359281437125748, "acc_stderr,none": 0.031253219622833416}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.25, "acc_stderr,none": 0.1305582419667734}, "include_base_44_japanese": {"acc,none": 0.2634730538922156, "acc_stderr,none": 0.01972538101830729, "alias": "include_base_44_japanese"}, "include_base_44_japanese_few_shot_og_driving_license": {"alias": " - include_base_44_japanese_few_shot_og_driving_license", "acc,none": 0.24242424242424243, "acc_stderr,none": 0.04329004329004327}, "include_base_44_japanese_few_shot_og_medical_license": {"alias": " - include_base_44_japanese_few_shot_og_medical_license", "acc,none": 0.2835820895522388, "acc_stderr,none": 0.03187187537919797}, "include_base_44_japanese_few_shot_og_professional_certification": {"alias": " - include_base_44_japanese_few_shot_og_professional_certification", "acc,none": 0.2537313432835821, "acc_stderr,none": 0.030769444967296024}, "include_base_44_korean": {"acc,none": 0.234, "acc_stderr,none": 0.01896985560532149, "alias": "include_base_44_korean"}, "include_base_44_korean_few_shot_og_professional_certification": {"alias": " - include_base_44_korean_few_shot_og_professional_certification", "acc,none": 0.24, "acc_stderr,none": 0.027065293652239003}, "include_base_44_korean_few_shot_og_social_science": {"alias": " - include_base_44_korean_few_shot_og_social_science", "acc,none": 0.228, "acc_stderr,none": 0.026587432487268487}, "include_base_44_russian": {"acc,none": 0.266304347826087, "acc_stderr,none": 0.018662425786691965, "alias": "include_base_44_russian"}, "include_base_44_russian_few_shot_og_applied_science": {"alias": " - include_base_44_russian_few_shot_og_applied_science", "acc,none": 0.2028985507246377, "acc_stderr,none": 0.04876877147472662}, "include_base_44_russian_few_shot_og_arts_humanities": {"alias": " - include_base_44_russian_few_shot_og_arts_humanities", "acc,none": 0.2028985507246377, "acc_stderr,none": 0.0487687714747266}, "include_base_44_russian_few_shot_og_business_commerce": {"alias": " - include_base_44_russian_few_shot_og_business_commerce", "acc,none": 0.2608695652173913, "acc_stderr,none": 0.05324977701702559}, "include_base_44_russian_few_shot_og_driving_license": {"alias": " - include_base_44_russian_few_shot_og_driving_license", "acc,none": 0.21428571428571427, "acc_stderr,none": 0.07896725691322383}, "include_base_44_russian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_russian_few_shot_og_health_oriented_education", "acc,none": 0.38823529411764707, "acc_stderr,none": 0.05317409082203827}, "include_base_44_russian_few_shot_og_marine_license": {"alias": " - include_base_44_russian_few_shot_og_marine_license", "acc,none": 0.37681159420289856, "acc_stderr,none": 0.05876481248527006}, "include_base_44_russian_few_shot_og_social_science": {"alias": " - include_base_44_russian_few_shot_og_social_science", "acc,none": 0.2608695652173913, "acc_stderr,none": 0.05324977701702559}, "include_base_44_russian_few_shot_og_stem": {"alias": " - include_base_44_russian_few_shot_og_stem", "acc,none": 0.19148936170212766, "acc_stderr,none": 0.040801273706799875}, "include_base_44_spanish": {"acc,none": 0.2672727272727273, "acc_stderr,none": 0.018850411012442, "alias": "include_base_44_spanish"}, "include_base_44_spanish_few_shot_og_arts_humanities": {"alias": " - include_base_44_spanish_few_shot_og_arts_humanities", "acc,none": 0.228, "acc_stderr,none": 0.026587432487268466}, "include_base_44_spanish_few_shot_og_health_oriented_education": {"alias": " - include_base_44_spanish_few_shot_og_health_oriented_education", "acc,none": 0.32, "acc_stderr,none": 0.09521904571390466}, "include_base_44_spanish_few_shot_og_social_science": {"alias": " - include_base_44_spanish_few_shot_og_social_science", "acc,none": 0.288, "acc_stderr,none": 0.028697004587398222}, "include_base_44_spanish_few_shot_og_stem": {"alias": " - include_base_44_spanish_few_shot_og_stem", "acc,none": 0.4, "acc_stderr,none": 0.1}, "include_base_44_telugu": {"acc,none": 0.24087591240875914, "acc_stderr,none": 0.018261531404880172, "alias": "include_base_44_telugu"}, "include_base_44_telugu_few_shot_og_applied_science": {"alias": " - include_base_44_telugu_few_shot_og_applied_science", "acc,none": 0.2469879518072289, "acc_stderr,none": 0.03357351982064536}, "include_base_44_telugu_few_shot_og_arts_humanities": {"alias": " - include_base_44_telugu_few_shot_og_arts_humanities", "acc,none": 0.24083769633507854, "acc_stderr,none": 0.031020782537814432}, "include_base_44_telugu_few_shot_og_social_science": {"alias": " - include_base_44_telugu_few_shot_og_social_science", "acc,none": 0.21084337349397592, "acc_stderr,none": 0.0317555478662992}, "include_base_44_telugu_few_shot_og_stem": {"alias": " - include_base_44_telugu_few_shot_og_stem", "acc,none": 0.4, "acc_stderr,none": 0.1}, "include_base_44_turkish": {"acc,none": 0.29014598540145986, "acc_stderr,none": 0.01938786453909353, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.3192771084337349, "acc_stderr,none": 0.036293353299478595}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.25903614457831325, "acc_stderr,none": 0.034106466140718564}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.26506024096385544, "acc_stderr,none": 0.03436024037944966}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.38, "acc_stderr,none": 0.06934092056863767}, "piqa": {"alias": "piqa", "acc,none": 0.5228509249183896, "acc_stderr,none": 0.011653634832401179, "acc_norm,none": 0.5043525571273123, "acc_norm_stderr,none": 0.01166538214464239}, "xnli_ar": {"alias": "xnli_ar", "acc,none": 0.3305220883534137, "acc_stderr,none": 0.009428789109289827}, "xnli_de": {"alias": "xnli_de", "acc,none": 0.3333333333333333, "acc_stderr,none": 0.009448900914617609}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.3369477911646586, "acc_stderr,none": 0.009474203778757713}, "xnli_es": {"alias": "xnli_es", "acc,none": 0.334136546184739, "acc_stderr,none": 0.009454577602463625}, "xnli_fr": {"alias": "xnli_fr", "acc,none": 0.3325301204819277, "acc_stderr,none": 0.009443193365903341}, "xnli_hi": {"alias": "xnli_hi", "acc,none": 0.3333333333333333, "acc_stderr,none": 0.009448900914617609}, "xnli_ru": {"alias": "xnli_ru", "acc,none": 0.336144578313253, "acc_stderr,none": 0.00946863466929353}, "xnli_sw": {"alias": "xnli_sw", "acc,none": 0.3369477911646586, "acc_stderr,none": 0.009474203778757719}, "xnli_th": {"alias": "xnli_th", "acc,none": 0.3349397590361446, "acc_stderr,none": 0.00946022348499647}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.3333333333333333, "acc_stderr,none": 0.009448900914617609}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.3196787148594378, "acc_stderr,none": 0.009347628444867173}} +{"created_at": "2025-06-25T20:49:03.495743", "global_step": 7000, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.2022184300341297, "acc_stderr,none": 0.011737454431872105, "acc_norm,none": 0.24914675767918087, "acc_norm_stderr,none": 0.012639407111926433}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.26304713804713803, "acc_stderr,none": 0.009034514898865826, "acc_norm,none": 0.2647306397306397, "acc_norm_stderr,none": 0.009053021086173958}, "belebele_arb_Arab": {"alias": "belebele_arb_Arab", "acc,none": 0.27, "acc_stderr,none": 0.014806876915962114, "acc_norm,none": 0.27, "acc_norm_stderr,none": 0.014806876915962114}, "belebele_ben_Beng": {"alias": "belebele_ben_Beng", "acc,none": 0.2788888888888889, "acc_stderr,none": 0.01495673688868308, "acc_norm,none": 0.2788888888888889, "acc_norm_stderr,none": 0.01495673688868308}, "belebele_deu_Latn": {"alias": "belebele_deu_Latn", "acc,none": 0.23555555555555555, "acc_stderr,none": 0.014152716079132628, "acc_norm,none": 0.23555555555555555, "acc_norm_stderr,none": 0.014152716079132628}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.23666666666666666, "acc_stderr,none": 0.014175742474391953, "acc_norm,none": 0.23666666666666666, "acc_norm_stderr,none": 0.014175742474391953}, "belebele_fra_Latn": {"alias": "belebele_fra_Latn", "acc,none": 0.24, "acc_stderr,none": 0.014244019879792671, "acc_norm,none": 0.24, "acc_norm_stderr,none": 0.014244019879792671}, "belebele_hin_Deva": {"alias": "belebele_hin_Deva", "acc,none": 0.2788888888888889, "acc_stderr,none": 0.01495673688868308, "acc_norm,none": 0.2788888888888889, "acc_norm_stderr,none": 0.01495673688868308}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.23555555555555555, "acc_stderr,none": 0.014152716079132653, "acc_norm,none": 0.23555555555555555, "acc_norm_stderr,none": 0.014152716079132653}, "belebele_jpn_Jpan": {"alias": "belebele_jpn_Jpan", "acc,none": 0.28, "acc_stderr,none": 0.014974951276705704, "acc_norm,none": 0.28, "acc_norm_stderr,none": 0.014974951276705704}, "belebele_kor_Hang": {"alias": "belebele_kor_Hang", "acc,none": 0.2788888888888889, "acc_stderr,none": 0.014956736888683079, "acc_norm,none": 0.2788888888888889, "acc_norm_stderr,none": 0.014956736888683079}, "belebele_rus_Cyrl": {"alias": "belebele_rus_Cyrl", "acc,none": 0.23666666666666666, "acc_stderr,none": 0.014175742474391954, "acc_norm,none": 0.23666666666666666, "acc_norm_stderr,none": 0.014175742474391954}, "belebele_spa_Latn": {"alias": "belebele_spa_Latn", "acc,none": 0.23444444444444446, "acc_stderr,none": 0.014129554968110771, "acc_norm,none": 0.23444444444444446, "acc_norm_stderr,none": 0.014129554968110771}, "belebele_swh_Latn": {"alias": "belebele_swh_Latn", "acc,none": 0.24222222222222223, "acc_stderr,none": 0.01428887637569982, "acc_norm,none": 0.24222222222222223, "acc_norm_stderr,none": 0.01428887637569982}, "belebele_tel_Telu": {"alias": "belebele_tel_Telu", "acc,none": 0.2788888888888889, "acc_stderr,none": 0.014956736888683079, "acc_norm,none": 0.2788888888888889, "acc_norm_stderr,none": 0.014956736888683079}, "belebele_tha_Thai": {"alias": "belebele_tha_Thai", "acc,none": 0.2788888888888889, "acc_stderr,none": 0.014956736888683087, "acc_norm,none": 0.2788888888888889, "acc_norm_stderr,none": 0.014956736888683087}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.25666666666666665, "acc_stderr,none": 0.014567891342380068, "acc_norm,none": 0.25666666666666665, "acc_norm_stderr,none": 0.014567891342380068}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.2788888888888889, "acc_stderr,none": 0.014956736888683087, "acc_norm,none": 0.2788888888888889, "acc_norm_stderr,none": 0.014956736888683087}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.25980880302728543, "acc_stderr,none": 0.004376333451909803, "acc_norm,none": 0.26180043815972914, "acc_norm_stderr,none": 0.004387161203087956}, "include_base_44_arabic": {"acc,none": 0.21557971014492755, "acc_stderr,none": 0.017409634677354367, "alias": "include_base_44_arabic"}, "include_base_44_arabic_few_shot_og_arts_humanities": {"alias": " - include_base_44_arabic_few_shot_og_arts_humanities", "acc,none": 0.24761904761904763, "acc_stderr,none": 0.04232473532055043}, "include_base_44_arabic_few_shot_og_business_commerce": {"alias": " - include_base_44_arabic_few_shot_og_business_commerce", "acc,none": 0.24390243902439024, "acc_stderr,none": 0.04771495084460434}, "include_base_44_arabic_few_shot_og_driving_license": {"alias": " - include_base_44_arabic_few_shot_og_driving_license", "acc,none": 0.10476190476190476, "acc_stderr,none": 0.03002992867261481}, "include_base_44_arabic_few_shot_og_general_knowledge": {"alias": " - include_base_44_arabic_few_shot_og_general_knowledge", "acc,none": 0.20952380952380953, "acc_stderr,none": 0.03990657150993187}, "include_base_44_arabic_few_shot_og_social_science": {"alias": " - include_base_44_arabic_few_shot_og_social_science", "acc,none": 0.2857142857142857, "acc_stderr,none": 0.04429811949614585}, "include_base_44_arabic_few_shot_og_stem": {"alias": " - include_base_44_arabic_few_shot_og_stem", "acc,none": 0.2, "acc_stderr,none": 0.05714285714285715}, "include_base_44_bengali": {"acc,none": 0.24087591240875914, "acc_stderr,none": 0.018313993697436273, "alias": "include_base_44_bengali"}, "include_base_44_bengali_few_shot_og_arts_humanities": {"alias": " - include_base_44_bengali_few_shot_og_arts_humanities", "acc,none": 0.23493975903614459, "acc_stderr,none": 0.03300533186128922}, "include_base_44_bengali_few_shot_og_general_knowledge": {"alias": " - include_base_44_bengali_few_shot_og_general_knowledge", "acc,none": 0.21686746987951808, "acc_stderr,none": 0.03208284450356365}, "include_base_44_bengali_few_shot_og_professional_certification": {"alias": " - include_base_44_bengali_few_shot_og_professional_certification", "acc,none": 0.26506024096385544, "acc_stderr,none": 0.03436024037944967}, "include_base_44_bengali_few_shot_og_stem": {"alias": " - include_base_44_bengali_few_shot_og_stem", "acc,none": 0.26, "acc_stderr,none": 0.06266203485560372}, "include_base_44_chinese": {"acc,none": 0.25871559633027524, "acc_stderr,none": 0.01873169956231196, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.352112676056338, "acc_stderr,none": 0.05708756925195619}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.2413793103448276, "acc_stderr,none": 0.046143776682648914}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.323943661971831, "acc_stderr,none": 0.055934166129236414}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.19718309859154928, "acc_stderr,none": 0.04755476905953275}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.20689655172413793, "acc_stderr,none": 0.04368097459950702}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.2112676056338028, "acc_stderr,none": 0.048790163593489484}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.2676056338028169, "acc_stderr,none": 0.05291406220869697}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.375, "acc_stderr,none": 0.125}, "include_base_44_french": {"acc,none": 0.2553699284009546, "acc_stderr,none": 0.021198584539885237, "alias": "include_base_44_french"}, "include_base_44_french_few_shot_og_arts_humanities": {"alias": " - include_base_44_french_few_shot_og_arts_humanities", "acc,none": 0.22556390977443608, "acc_stderr,none": 0.025674657674902242}, "include_base_44_french_few_shot_og_driving_license": {"alias": " - include_base_44_french_few_shot_og_driving_license", "acc,none": 0.425531914893617, "acc_stderr,none": 0.07289875413448858}, "include_base_44_french_few_shot_og_health_oriented_education": {"alias": " - include_base_44_french_few_shot_og_health_oriented_education", "acc,none": 0.1875, "acc_stderr,none": 0.10077822185373188}, "include_base_44_french_few_shot_og_social_science": {"alias": " - include_base_44_french_few_shot_og_social_science", "acc,none": 0.25675675675675674, "acc_stderr,none": 0.051128728837659454}, "include_base_44_french_few_shot_og_stem": {"alias": " - include_base_44_french_few_shot_og_stem", "acc,none": 0.3125, "acc_stderr,none": 0.11967838846954226}, "include_base_44_german": {"acc,none": 0.32374100719424465, "acc_stderr,none": 0.039048161152400174, "alias": "include_base_44_german"}, "include_base_44_german_few_shot_og_driving_license": {"alias": " - include_base_44_german_few_shot_og_driving_license", "acc,none": 0.5652173913043478, "acc_stderr,none": 0.10568965974008646}, "include_base_44_german_few_shot_og_social_science": {"alias": " - include_base_44_german_few_shot_og_social_science", "acc,none": 0.27472527472527475, "acc_stderr,none": 0.04705213398778437}, "include_base_44_german_few_shot_og_stem": {"alias": " - include_base_44_german_few_shot_og_stem", "acc,none": 0.28, "acc_stderr,none": 0.0916515138991168}, "include_base_44_hindi": {"acc,none": 0.2577696526508227, "acc_stderr,none": 0.0187048159505201, "alias": "include_base_44_hindi"}, "include_base_44_hindi_few_shot_og_applied_science": {"alias": " - include_base_44_hindi_few_shot_og_applied_science", "acc,none": 0.23943661971830985, "acc_stderr,none": 0.05100514327793383}, "include_base_44_hindi_few_shot_og_arts_humanities": {"alias": " - include_base_44_hindi_few_shot_og_arts_humanities", "acc,none": 0.2708333333333333, "acc_stderr,none": 0.045593471241867974}, "include_base_44_hindi_few_shot_og_driving_license": {"alias": " - include_base_44_hindi_few_shot_og_driving_license", "acc,none": 0.323943661971831, "acc_stderr,none": 0.05593416612923642}, "include_base_44_hindi_few_shot_og_general_knowledge": {"alias": " - include_base_44_hindi_few_shot_og_general_knowledge", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295697}, "include_base_44_hindi_few_shot_og_health_oriented_education": {"alias": " - include_base_44_hindi_few_shot_og_health_oriented_education", "acc,none": 0.15492957746478872, "acc_stderr,none": 0.0432478576664078}, "include_base_44_hindi_few_shot_og_professional_certification": {"alias": " - include_base_44_hindi_few_shot_og_professional_certification", "acc,none": 0.323943661971831, "acc_stderr,none": 0.05593416612923642}, "include_base_44_hindi_few_shot_og_social_science": {"alias": " - include_base_44_hindi_few_shot_og_social_science", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.04993832453115034}, "include_base_44_hindi_few_shot_og_stem": {"alias": " - include_base_44_hindi_few_shot_og_stem", "acc,none": 0.28, "acc_stderr,none": 0.09165151389911678}, "include_base_44_italian": {"acc,none": 0.26824817518248173, "acc_stderr,none": 0.01891741511236919, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.2, "acc_stderr,none": 0.06859943405700354}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.31736526946107785, "acc_stderr,none": 0.03612599731403395}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.25, "acc_stderr,none": 0.1305582419667734}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.2967741935483871, "acc_stderr,none": 0.03681290636819265}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.20958083832335328, "acc_stderr,none": 0.03159006158827181}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.25, "acc_stderr,none": 0.1305582419667734}, "include_base_44_japanese": {"acc,none": 0.25349301397205587, "acc_stderr,none": 0.01949226055956483, "alias": "include_base_44_japanese"}, "include_base_44_japanese_few_shot_og_driving_license": {"alias": " - include_base_44_japanese_few_shot_og_driving_license", "acc,none": 0.25252525252525254, "acc_stderr,none": 0.043887175543277195}, "include_base_44_japanese_few_shot_og_medical_license": {"alias": " - include_base_44_japanese_few_shot_og_medical_license", "acc,none": 0.25870646766169153, "acc_stderr,none": 0.030965903123573037}, "include_base_44_japanese_few_shot_og_professional_certification": {"alias": " - include_base_44_japanese_few_shot_og_professional_certification", "acc,none": 0.24875621890547264, "acc_stderr,none": 0.030567675938916707}, "include_base_44_korean": {"acc,none": 0.266, "acc_stderr,none": 0.01979046868071624, "alias": "include_base_44_korean"}, "include_base_44_korean_few_shot_og_professional_certification": {"alias": " - include_base_44_korean_few_shot_og_professional_certification", "acc,none": 0.252, "acc_stderr,none": 0.027513851933031352}, "include_base_44_korean_few_shot_og_social_science": {"alias": " - include_base_44_korean_few_shot_og_social_science", "acc,none": 0.28, "acc_stderr,none": 0.02845414827783233}, "include_base_44_russian": {"acc,none": 0.26268115942028986, "acc_stderr,none": 0.018714636743237995, "alias": "include_base_44_russian"}, "include_base_44_russian_few_shot_og_applied_science": {"alias": " - include_base_44_russian_few_shot_og_applied_science", "acc,none": 0.21739130434782608, "acc_stderr,none": 0.0500194557943937}, "include_base_44_russian_few_shot_og_arts_humanities": {"alias": " - include_base_44_russian_few_shot_og_arts_humanities", "acc,none": 0.18840579710144928, "acc_stderr,none": 0.0474200647405742}, "include_base_44_russian_few_shot_og_business_commerce": {"alias": " - include_base_44_russian_few_shot_og_business_commerce", "acc,none": 0.30434782608695654, "acc_stderr,none": 0.05579903894954331}, "include_base_44_russian_few_shot_og_driving_license": {"alias": " - include_base_44_russian_few_shot_og_driving_license", "acc,none": 0.21428571428571427, "acc_stderr,none": 0.07896725691322383}, "include_base_44_russian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_russian_few_shot_og_health_oriented_education", "acc,none": 0.3411764705882353, "acc_stderr,none": 0.05172904297361927}, "include_base_44_russian_few_shot_og_marine_license": {"alias": " - include_base_44_russian_few_shot_og_marine_license", "acc,none": 0.3333333333333333, "acc_stderr,none": 0.05716619504750293}, "include_base_44_russian_few_shot_og_social_science": {"alias": " - include_base_44_russian_few_shot_og_social_science", "acc,none": 0.2463768115942029, "acc_stderr,none": 0.05225436631107233}, "include_base_44_russian_few_shot_og_stem": {"alias": " - include_base_44_russian_few_shot_og_stem", "acc,none": 0.22340425531914893, "acc_stderr,none": 0.043191844364621576}, "include_base_44_spanish": {"acc,none": 0.28545454545454546, "acc_stderr,none": 0.019136887056688368, "alias": "include_base_44_spanish"}, "include_base_44_spanish_few_shot_og_arts_humanities": {"alias": " - include_base_44_spanish_few_shot_og_arts_humanities", "acc,none": 0.236, "acc_stderr,none": 0.02690933759495383}, "include_base_44_spanish_few_shot_og_health_oriented_education": {"alias": " - include_base_44_spanish_few_shot_og_health_oriented_education", "acc,none": 0.44, "acc_stderr,none": 0.10132456102380442}, "include_base_44_spanish_few_shot_og_social_science": {"alias": " - include_base_44_spanish_few_shot_og_social_science", "acc,none": 0.3, "acc_stderr,none": 0.029040893477575845}, "include_base_44_spanish_few_shot_og_stem": {"alias": " - include_base_44_spanish_few_shot_og_stem", "acc,none": 0.48, "acc_stderr,none": 0.10198039027185572}, "include_base_44_telugu": {"acc,none": 0.24817518248175183, "acc_stderr,none": 0.01848756597562893, "alias": "include_base_44_telugu"}, "include_base_44_telugu_few_shot_og_applied_science": {"alias": " - include_base_44_telugu_few_shot_og_applied_science", "acc,none": 0.2289156626506024, "acc_stderr,none": 0.03270745277352477}, "include_base_44_telugu_few_shot_og_arts_humanities": {"alias": " - include_base_44_telugu_few_shot_og_arts_humanities", "acc,none": 0.25654450261780104, "acc_stderr,none": 0.031683419272745185}, "include_base_44_telugu_few_shot_og_social_science": {"alias": " - include_base_44_telugu_few_shot_og_social_science", "acc,none": 0.2710843373493976, "acc_stderr,none": 0.03460579907553026}, "include_base_44_telugu_few_shot_og_stem": {"alias": " - include_base_44_telugu_few_shot_og_stem", "acc,none": 0.16, "acc_stderr,none": 0.07483314773547882}, "include_base_44_turkish": {"acc,none": 0.29744525547445255, "acc_stderr,none": 0.019490984022159017, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.2891566265060241, "acc_stderr,none": 0.03529486801511115}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.2469879518072289, "acc_stderr,none": 0.03357351982064536}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.3192771084337349, "acc_stderr,none": 0.03629335329947859}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.42, "acc_stderr,none": 0.07050835816716035}, "piqa": {"alias": "piqa", "acc,none": 0.5195865070729053, "acc_stderr,none": 0.011656869979288468, "acc_norm,none": 0.5043525571273123, "acc_norm_stderr,none": 0.01166538214464239}, "xnli_ar": {"alias": "xnli_ar", "acc,none": 0.3349397590361446, "acc_stderr,none": 0.009460223484996476}, "xnli_de": {"alias": "xnli_de", "acc,none": 0.3333333333333333, "acc_stderr,none": 0.009448900914617609}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.336144578313253, "acc_stderr,none": 0.009468634669293527}, "xnli_es": {"alias": "xnli_es", "acc,none": 0.334136546184739, "acc_stderr,none": 0.009454577602463623}, "xnli_fr": {"alias": "xnli_fr", "acc,none": 0.3329317269076305, "acc_stderr,none": 0.009446051001358226}, "xnli_hi": {"alias": "xnli_hi", "acc,none": 0.3333333333333333, "acc_stderr,none": 0.009448900914617609}, "xnli_ru": {"alias": "xnli_ru", "acc,none": 0.3409638554216867, "acc_stderr,none": 0.009501591178361543}, "xnli_sw": {"alias": "xnli_sw", "acc,none": 0.3337349397590361, "acc_stderr,none": 0.009451743112667057}, "xnli_th": {"alias": "xnli_th", "acc,none": 0.329718875502008, "acc_stderr,none": 0.009422973103706461}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.3333333333333333, "acc_stderr,none": 0.009448900914617609}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.336144578313253, "acc_stderr,none": 0.009468634669293523}} +{"created_at": "2025-06-25T21:03:57.143312", "global_step": 7500, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.19880546075085323, "acc_stderr,none": 0.011662850198175537, "acc_norm,none": 0.24829351535836178, "acc_norm_stderr,none": 0.012624912868089765}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.26052188552188554, "acc_stderr,none": 0.009006435890336588, "acc_norm,none": 0.2596801346801347, "acc_norm_stderr,none": 0.008996990428562217}, "belebele_arb_Arab": {"alias": "belebele_arb_Arab", "acc,none": 0.24, "acc_stderr,none": 0.014244019879792654, "acc_norm,none": 0.24, "acc_norm_stderr,none": 0.014244019879792654}, "belebele_ben_Beng": {"alias": "belebele_ben_Beng", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884506, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884506}, "belebele_deu_Latn": {"alias": "belebele_deu_Latn", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884509, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884509}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.23, "acc_stderr,none": 0.014035549969945589, "acc_norm,none": 0.23, "acc_norm_stderr,none": 0.014035549969945589}, "belebele_fra_Latn": {"alias": "belebele_fra_Latn", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_hin_Deva": {"alias": "belebele_hin_Deva", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884528, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884528}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.22777777777777777, "acc_stderr,none": 0.013987721523687973, "acc_norm,none": 0.22777777777777777, "acc_norm_stderr,none": 0.013987721523687973}, "belebele_jpn_Jpan": {"alias": "belebele_jpn_Jpan", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_kor_Hang": {"alias": "belebele_kor_Hang", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.01401170515888452, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.01401170515888452}, "belebele_rus_Cyrl": {"alias": "belebele_rus_Cyrl", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884504, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884504}, "belebele_spa_Latn": {"alias": "belebele_spa_Latn", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_swh_Latn": {"alias": "belebele_swh_Latn", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_tel_Telu": {"alias": "belebele_tel_Telu", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884526, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884526}, "belebele_tha_Thai": {"alias": "belebele_tha_Thai", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.22777777777777777, "acc_stderr,none": 0.013987721523687973, "acc_norm,none": 0.22777777777777777, "acc_norm_stderr,none": 0.013987721523687973}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.2593108942441745, "acc_stderr,none": 0.0043736082125610245, "acc_norm,none": 0.2608046205935073, "acc_norm_stderr,none": 0.004381761941552691}, "include_base_44_arabic": {"acc,none": 0.2210144927536232, "acc_stderr,none": 0.017675403904907037, "alias": "include_base_44_arabic"}, "include_base_44_arabic_few_shot_og_arts_humanities": {"alias": " - include_base_44_arabic_few_shot_og_arts_humanities", "acc,none": 0.24761904761904763, "acc_stderr,none": 0.04232473532055043}, "include_base_44_arabic_few_shot_og_business_commerce": {"alias": " - include_base_44_arabic_few_shot_og_business_commerce", "acc,none": 0.18292682926829268, "acc_stderr,none": 0.04295628044484376}, "include_base_44_arabic_few_shot_og_driving_license": {"alias": " - include_base_44_arabic_few_shot_og_driving_license", "acc,none": 0.26666666666666666, "acc_stderr,none": 0.04336290903919941}, "include_base_44_arabic_few_shot_og_general_knowledge": {"alias": " - include_base_44_arabic_few_shot_og_general_knowledge", "acc,none": 0.24761904761904763, "acc_stderr,none": 0.0423247353205504}, "include_base_44_arabic_few_shot_og_social_science": {"alias": " - include_base_44_arabic_few_shot_og_social_science", "acc,none": 0.1619047619047619, "acc_stderr,none": 0.036121005232589175}, "include_base_44_arabic_few_shot_og_stem": {"alias": " - include_base_44_arabic_few_shot_og_stem", "acc,none": 0.2, "acc_stderr,none": 0.057142857142857134}, "include_base_44_bengali": {"acc,none": 0.29014598540145986, "acc_stderr,none": 0.01939486736491474, "alias": "include_base_44_bengali"}, "include_base_44_bengali_few_shot_og_arts_humanities": {"alias": " - include_base_44_bengali_few_shot_og_arts_humanities", "acc,none": 0.3132530120481928, "acc_stderr,none": 0.036108050180310235}, "include_base_44_bengali_few_shot_og_general_knowledge": {"alias": " - include_base_44_bengali_few_shot_og_general_knowledge", "acc,none": 0.3253012048192771, "acc_stderr,none": 0.036471685236832294}, "include_base_44_bengali_few_shot_og_professional_certification": {"alias": " - include_base_44_bengali_few_shot_og_professional_certification", "acc,none": 0.2469879518072289, "acc_stderr,none": 0.03357351982064536}, "include_base_44_bengali_few_shot_og_stem": {"alias": " - include_base_44_bengali_few_shot_og_stem", "acc,none": 0.24, "acc_stderr,none": 0.06101187572589323}, "include_base_44_chinese": {"acc,none": 0.24587155963302754, "acc_stderr,none": 0.018517539247711934, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.049938324531150324}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.25287356321839083, "acc_stderr,none": 0.04687049503854671}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.323943661971831, "acc_stderr,none": 0.05593416612923642}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.23943661971830985, "acc_stderr,none": 0.05100514327793383}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.2413793103448276, "acc_stderr,none": 0.04614377668264891}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.051995626882956975}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.19718309859154928, "acc_stderr,none": 0.04755476905953276}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.1875, "acc_stderr,none": 0.10077822185373188}, "include_base_44_french": {"acc,none": 0.2529832935560859, "acc_stderr,none": 0.021175427759512762, "alias": "include_base_44_french"}, "include_base_44_french_few_shot_og_arts_humanities": {"alias": " - include_base_44_french_few_shot_og_arts_humanities", "acc,none": 0.22556390977443608, "acc_stderr,none": 0.025674657674902242}, "include_base_44_french_few_shot_og_driving_license": {"alias": " - include_base_44_french_few_shot_og_driving_license", "acc,none": 0.40425531914893614, "acc_stderr,none": 0.07235674844413013}, "include_base_44_french_few_shot_og_health_oriented_education": {"alias": " - include_base_44_french_few_shot_og_health_oriented_education", "acc,none": 0.1875, "acc_stderr,none": 0.10077822185373188}, "include_base_44_french_few_shot_og_social_science": {"alias": " - include_base_44_french_few_shot_og_social_science", "acc,none": 0.25675675675675674, "acc_stderr,none": 0.051128728837659454}, "include_base_44_french_few_shot_og_stem": {"alias": " - include_base_44_french_few_shot_og_stem", "acc,none": 0.3125, "acc_stderr,none": 0.11967838846954226}, "include_base_44_german": {"acc,none": 0.3237410071942446, "acc_stderr,none": 0.03810450206175078, "alias": "include_base_44_german"}, "include_base_44_german_few_shot_og_driving_license": {"alias": " - include_base_44_german_few_shot_og_driving_license", "acc,none": 0.6521739130434783, "acc_stderr,none": 0.10154334054280735}, "include_base_44_german_few_shot_og_social_science": {"alias": " - include_base_44_german_few_shot_og_social_science", "acc,none": 0.26373626373626374, "acc_stderr,none": 0.04644942852497396}, "include_base_44_german_few_shot_og_stem": {"alias": " - include_base_44_german_few_shot_og_stem", "acc,none": 0.24, "acc_stderr,none": 0.08717797887081347}, "include_base_44_hindi": {"acc,none": 0.23948811700182815, "acc_stderr,none": 0.01830198403573232, "alias": "include_base_44_hindi"}, "include_base_44_hindi_few_shot_og_applied_science": {"alias": " - include_base_44_hindi_few_shot_og_applied_science", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295698}, "include_base_44_hindi_few_shot_og_arts_humanities": {"alias": " - include_base_44_hindi_few_shot_og_arts_humanities", "acc,none": 0.17708333333333334, "acc_stderr,none": 0.03916561682056947}, "include_base_44_hindi_few_shot_og_driving_license": {"alias": " - include_base_44_hindi_few_shot_og_driving_license", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.04993832453115031}, "include_base_44_hindi_few_shot_og_general_knowledge": {"alias": " - include_base_44_hindi_few_shot_og_general_knowledge", "acc,none": 0.29577464788732394, "acc_stderr,none": 0.05454906121418899}, "include_base_44_hindi_few_shot_og_health_oriented_education": {"alias": " - include_base_44_hindi_few_shot_og_health_oriented_education", "acc,none": 0.2676056338028169, "acc_stderr,none": 0.05291406220869697}, "include_base_44_hindi_few_shot_og_professional_certification": {"alias": " - include_base_44_hindi_few_shot_og_professional_certification", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295697}, "include_base_44_hindi_few_shot_og_social_science": {"alias": " - include_base_44_hindi_few_shot_og_social_science", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295697}, "include_base_44_hindi_few_shot_og_stem": {"alias": " - include_base_44_hindi_few_shot_og_stem", "acc,none": 0.16, "acc_stderr,none": 0.0748331477354788}, "include_base_44_italian": {"acc,none": 0.2572992700729927, "acc_stderr,none": 0.018668571387726075, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.17142857142857143, "acc_stderr,none": 0.06463490595976273}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.2994011976047904, "acc_stderr,none": 0.03554736535384932}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.25, "acc_stderr,none": 0.1305582419667734}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.2903225806451613, "acc_stderr,none": 0.03657720706540911}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.20359281437125748, "acc_stderr,none": 0.031253219622833416}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.25, "acc_stderr,none": 0.1305582419667734}, "include_base_44_japanese": {"acc,none": 0.2634730538922156, "acc_stderr,none": 0.01972538101830729, "alias": "include_base_44_japanese"}, "include_base_44_japanese_few_shot_og_driving_license": {"alias": " - include_base_44_japanese_few_shot_og_driving_license", "acc,none": 0.24242424242424243, "acc_stderr,none": 0.04329004329004327}, "include_base_44_japanese_few_shot_og_medical_license": {"alias": " - include_base_44_japanese_few_shot_og_medical_license", "acc,none": 0.2835820895522388, "acc_stderr,none": 0.03187187537919797}, "include_base_44_japanese_few_shot_og_professional_certification": {"alias": " - include_base_44_japanese_few_shot_og_professional_certification", "acc,none": 0.2537313432835821, "acc_stderr,none": 0.030769444967296024}, "include_base_44_korean": {"acc,none": 0.234, "acc_stderr,none": 0.01896985560532149, "alias": "include_base_44_korean"}, "include_base_44_korean_few_shot_og_professional_certification": {"alias": " - include_base_44_korean_few_shot_og_professional_certification", "acc,none": 0.24, "acc_stderr,none": 0.027065293652239003}, "include_base_44_korean_few_shot_og_social_science": {"alias": " - include_base_44_korean_few_shot_og_social_science", "acc,none": 0.228, "acc_stderr,none": 0.026587432487268487}, "include_base_44_russian": {"acc,none": 0.266304347826087, "acc_stderr,none": 0.018662425786691965, "alias": "include_base_44_russian"}, "include_base_44_russian_few_shot_og_applied_science": {"alias": " - include_base_44_russian_few_shot_og_applied_science", "acc,none": 0.2028985507246377, "acc_stderr,none": 0.04876877147472662}, "include_base_44_russian_few_shot_og_arts_humanities": {"alias": " - include_base_44_russian_few_shot_og_arts_humanities", "acc,none": 0.2028985507246377, "acc_stderr,none": 0.0487687714747266}, "include_base_44_russian_few_shot_og_business_commerce": {"alias": " - include_base_44_russian_few_shot_og_business_commerce", "acc,none": 0.2608695652173913, "acc_stderr,none": 0.05324977701702559}, "include_base_44_russian_few_shot_og_driving_license": {"alias": " - include_base_44_russian_few_shot_og_driving_license", "acc,none": 0.21428571428571427, "acc_stderr,none": 0.07896725691322383}, "include_base_44_russian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_russian_few_shot_og_health_oriented_education", "acc,none": 0.38823529411764707, "acc_stderr,none": 0.05317409082203827}, "include_base_44_russian_few_shot_og_marine_license": {"alias": " - include_base_44_russian_few_shot_og_marine_license", "acc,none": 0.37681159420289856, "acc_stderr,none": 0.05876481248527006}, "include_base_44_russian_few_shot_og_social_science": {"alias": " - include_base_44_russian_few_shot_og_social_science", "acc,none": 0.2608695652173913, "acc_stderr,none": 0.05324977701702559}, "include_base_44_russian_few_shot_og_stem": {"alias": " - include_base_44_russian_few_shot_og_stem", "acc,none": 0.19148936170212766, "acc_stderr,none": 0.040801273706799875}, "include_base_44_spanish": {"acc,none": 0.2672727272727273, "acc_stderr,none": 0.018850411012442, "alias": "include_base_44_spanish"}, "include_base_44_spanish_few_shot_og_arts_humanities": {"alias": " - include_base_44_spanish_few_shot_og_arts_humanities", "acc,none": 0.228, "acc_stderr,none": 0.026587432487268466}, "include_base_44_spanish_few_shot_og_health_oriented_education": {"alias": " - include_base_44_spanish_few_shot_og_health_oriented_education", "acc,none": 0.32, "acc_stderr,none": 0.09521904571390466}, "include_base_44_spanish_few_shot_og_social_science": {"alias": " - include_base_44_spanish_few_shot_og_social_science", "acc,none": 0.288, "acc_stderr,none": 0.028697004587398222}, "include_base_44_spanish_few_shot_og_stem": {"alias": " - include_base_44_spanish_few_shot_og_stem", "acc,none": 0.4, "acc_stderr,none": 0.1}, "include_base_44_telugu": {"acc,none": 0.24087591240875914, "acc_stderr,none": 0.018261531404880172, "alias": "include_base_44_telugu"}, "include_base_44_telugu_few_shot_og_applied_science": {"alias": " - include_base_44_telugu_few_shot_og_applied_science", "acc,none": 0.2469879518072289, "acc_stderr,none": 0.03357351982064536}, "include_base_44_telugu_few_shot_og_arts_humanities": {"alias": " - include_base_44_telugu_few_shot_og_arts_humanities", "acc,none": 0.24083769633507854, "acc_stderr,none": 0.031020782537814432}, "include_base_44_telugu_few_shot_og_social_science": {"alias": " - include_base_44_telugu_few_shot_og_social_science", "acc,none": 0.21084337349397592, "acc_stderr,none": 0.0317555478662992}, "include_base_44_telugu_few_shot_og_stem": {"alias": " - include_base_44_telugu_few_shot_og_stem", "acc,none": 0.4, "acc_stderr,none": 0.1}, "include_base_44_turkish": {"acc,none": 0.29014598540145986, "acc_stderr,none": 0.01938786453909353, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.3192771084337349, "acc_stderr,none": 0.036293353299478595}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.25903614457831325, "acc_stderr,none": 0.034106466140718564}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.26506024096385544, "acc_stderr,none": 0.03436024037944966}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.38, "acc_stderr,none": 0.06934092056863767}, "piqa": {"alias": "piqa", "acc,none": 0.5174102285092492, "acc_stderr,none": 0.011658749823107691, "acc_norm,none": 0.5043525571273123, "acc_norm_stderr,none": 0.011665382144642394}, "xnli_ar": {"alias": "xnli_ar", "acc,none": 0.3497991967871486, "acc_stderr,none": 0.009559181474778286}, "xnli_de": {"alias": "xnli_de", "acc,none": 0.3325301204819277, "acc_stderr,none": 0.00944319336590334}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.3196787148594378, "acc_stderr,none": 0.009347628444867173}, "xnli_es": {"alias": "xnli_es", "acc,none": 0.334136546184739, "acc_stderr,none": 0.009454577602463623}, "xnli_fr": {"alias": "xnli_fr", "acc,none": 0.3333333333333333, "acc_stderr,none": 0.009448900914617614}, "xnli_hi": {"alias": "xnli_hi", "acc,none": 0.3353413654618474, "acc_stderr,none": 0.009463034891512711}, "xnli_ru": {"alias": "xnli_ru", "acc,none": 0.3437751004016064, "acc_stderr,none": 0.009520310502882934}, "xnli_sw": {"alias": "xnli_sw", "acc,none": 0.3469879518072289, "acc_stderr,none": 0.009541251561568397}, "xnli_th": {"alias": "xnli_th", "acc,none": 0.3393574297188755, "acc_stderr,none": 0.009490727635646755}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.3337349397590361, "acc_stderr,none": 0.009451743112667057}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.3236947791164659, "acc_stderr,none": 0.009378357180373081}} +{"created_at": "2025-06-25T21:30:35.657588", "global_step": 8000, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.20392491467576793, "acc_stderr,none": 0.011774262478702254, "acc_norm,none": 0.2508532423208191, "acc_norm_stderr,none": 0.01266819862131543}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.2622053872053872, "acc_stderr,none": 0.00902519799172484, "acc_norm,none": 0.26936026936026936, "acc_norm_stderr,none": 0.009103043207756989}, "belebele_arb_Arab": {"alias": "belebele_arb_Arab", "acc,none": 0.2644444444444444, "acc_stderr,none": 0.01470940541341315, "acc_norm,none": 0.2644444444444444, "acc_norm_stderr,none": 0.01470940541341315}, "belebele_ben_Beng": {"alias": "belebele_ben_Beng", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884506, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884506}, "belebele_deu_Latn": {"alias": "belebele_deu_Latn", "acc,none": 0.23, "acc_stderr,none": 0.014035549969945589, "acc_norm,none": 0.23, "acc_norm_stderr,none": 0.014035549969945589}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.24, "acc_stderr,none": 0.014244019879792664, "acc_norm,none": 0.24, "acc_norm_stderr,none": 0.014244019879792664}, "belebele_fra_Latn": {"alias": "belebele_fra_Latn", "acc,none": 0.2388888888888889, "acc_stderr,none": 0.014221393731276202, "acc_norm,none": 0.2388888888888889, "acc_norm_stderr,none": 0.014221393731276202}, "belebele_hin_Deva": {"alias": "belebele_hin_Deva", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884528, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884528}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.24333333333333335, "acc_stderr,none": 0.01431110796368306, "acc_norm,none": 0.24333333333333335, "acc_norm_stderr,none": 0.01431110796368306}, "belebele_jpn_Jpan": {"alias": "belebele_jpn_Jpan", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_kor_Hang": {"alias": "belebele_kor_Hang", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.01401170515888452, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.01401170515888452}, "belebele_rus_Cyrl": {"alias": "belebele_rus_Cyrl", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884504, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884504}, "belebele_spa_Latn": {"alias": "belebele_spa_Latn", "acc,none": 0.23, "acc_stderr,none": 0.014035549969945587, "acc_norm,none": 0.23, "acc_norm_stderr,none": 0.014035549969945587}, "belebele_swh_Latn": {"alias": "belebele_swh_Latn", "acc,none": 0.2311111111111111, "acc_stderr,none": 0.0140592566632182, "acc_norm,none": 0.2311111111111111, "acc_norm_stderr,none": 0.0140592566632182}, "belebele_tel_Telu": {"alias": "belebele_tel_Telu", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884526, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884526}, "belebele_tha_Thai": {"alias": "belebele_tha_Thai", "acc,none": 0.23222222222222222, "acc_stderr,none": 0.014082825936214571, "acc_norm,none": 0.23222222222222222, "acc_norm_stderr,none": 0.014082825936214571}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.23222222222222222, "acc_stderr,none": 0.014082825936214571, "acc_norm,none": 0.23222222222222222, "acc_norm_stderr,none": 0.014082825936214571}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.23, "acc_stderr,none": 0.014035549969945592, "acc_norm,none": 0.23, "acc_norm_stderr,none": 0.014035549969945592}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.2605058753236407, "acc_stderr,none": 0.004380136468543943, "acc_norm,none": 0.26329416450906196, "acc_norm_stderr,none": 0.004395205528158076}, "include_base_44_arabic": {"acc,none": 0.23369565217391305, "acc_stderr,none": 0.017996917092753018, "alias": "include_base_44_arabic"}, "include_base_44_arabic_few_shot_og_arts_humanities": {"alias": " - include_base_44_arabic_few_shot_og_arts_humanities", "acc,none": 0.19047619047619047, "acc_stderr,none": 0.038505120955363834}, "include_base_44_arabic_few_shot_og_business_commerce": {"alias": " - include_base_44_arabic_few_shot_og_business_commerce", "acc,none": 0.2682926829268293, "acc_stderr,none": 0.04923008705849577}, "include_base_44_arabic_few_shot_og_driving_license": {"alias": " - include_base_44_arabic_few_shot_og_driving_license", "acc,none": 0.19047619047619047, "acc_stderr,none": 0.03850512095536383}, "include_base_44_arabic_few_shot_og_general_knowledge": {"alias": " - include_base_44_arabic_few_shot_og_general_knowledge", "acc,none": 0.20952380952380953, "acc_stderr,none": 0.03990657150993186}, "include_base_44_arabic_few_shot_og_social_science": {"alias": " - include_base_44_arabic_few_shot_og_social_science", "acc,none": 0.3142857142857143, "acc_stderr,none": 0.045521571818039494}, "include_base_44_arabic_few_shot_og_stem": {"alias": " - include_base_44_arabic_few_shot_og_stem", "acc,none": 0.24, "acc_stderr,none": 0.061011875725893235}, "include_base_44_bengali": {"acc,none": 0.29014598540145986, "acc_stderr,none": 0.01939486736491474, "alias": "include_base_44_bengali"}, "include_base_44_bengali_few_shot_og_arts_humanities": {"alias": " - include_base_44_bengali_few_shot_og_arts_humanities", "acc,none": 0.3132530120481928, "acc_stderr,none": 0.036108050180310235}, "include_base_44_bengali_few_shot_og_general_knowledge": {"alias": " - include_base_44_bengali_few_shot_og_general_knowledge", "acc,none": 0.3253012048192771, "acc_stderr,none": 0.036471685236832294}, "include_base_44_bengali_few_shot_og_professional_certification": {"alias": " - include_base_44_bengali_few_shot_og_professional_certification", "acc,none": 0.2469879518072289, "acc_stderr,none": 0.03357351982064536}, "include_base_44_bengali_few_shot_og_stem": {"alias": " - include_base_44_bengali_few_shot_og_stem", "acc,none": 0.24, "acc_stderr,none": 0.06101187572589323}, "include_base_44_chinese": {"acc,none": 0.24403669724770644, "acc_stderr,none": 0.01846809435978522, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.049938324531150324}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.25287356321839083, "acc_stderr,none": 0.04687049503854671}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.323943661971831, "acc_stderr,none": 0.05593416612923642}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.04993832453115033}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.2413793103448276, "acc_stderr,none": 0.04614377668264891}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.051995626882956975}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.19718309859154928, "acc_stderr,none": 0.04755476905953276}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.1875, "acc_stderr,none": 0.10077822185373188}, "include_base_44_french": {"acc,none": 0.26730310262529833, "acc_stderr,none": 0.021475312554819347, "alias": "include_base_44_french"}, "include_base_44_french_few_shot_og_arts_humanities": {"alias": " - include_base_44_french_few_shot_og_arts_humanities", "acc,none": 0.24060150375939848, "acc_stderr,none": 0.026257966995706334}, "include_base_44_french_few_shot_og_driving_license": {"alias": " - include_base_44_french_few_shot_og_driving_license", "acc,none": 0.44680851063829785, "acc_stderr,none": 0.07330262843906576}, "include_base_44_french_few_shot_og_health_oriented_education": {"alias": " - include_base_44_french_few_shot_og_health_oriented_education", "acc,none": 0.125, "acc_stderr,none": 0.08539125638299665}, "include_base_44_french_few_shot_og_social_science": {"alias": " - include_base_44_french_few_shot_og_social_science", "acc,none": 0.2702702702702703, "acc_stderr,none": 0.05197789984508372}, "include_base_44_french_few_shot_og_stem": {"alias": " - include_base_44_french_few_shot_og_stem", "acc,none": 0.3125, "acc_stderr,none": 0.11967838846954226}, "include_base_44_german": {"acc,none": 0.30935251798561153, "acc_stderr,none": 0.038405111386177485, "alias": "include_base_44_german"}, "include_base_44_german_few_shot_og_driving_license": {"alias": " - include_base_44_german_few_shot_og_driving_license", "acc,none": 0.5652173913043478, "acc_stderr,none": 0.10568965974008646}, "include_base_44_german_few_shot_og_social_science": {"alias": " - include_base_44_german_few_shot_og_social_science", "acc,none": 0.26373626373626374, "acc_stderr,none": 0.04644942852497396}, "include_base_44_german_few_shot_og_stem": {"alias": " - include_base_44_german_few_shot_og_stem", "acc,none": 0.24, "acc_stderr,none": 0.08717797887081347}, "include_base_44_hindi": {"acc,none": 0.23948811700182815, "acc_stderr,none": 0.01830198403573232, "alias": "include_base_44_hindi"}, "include_base_44_hindi_few_shot_og_applied_science": {"alias": " - include_base_44_hindi_few_shot_og_applied_science", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295698}, "include_base_44_hindi_few_shot_og_arts_humanities": {"alias": " - include_base_44_hindi_few_shot_og_arts_humanities", "acc,none": 0.17708333333333334, "acc_stderr,none": 0.03916561682056947}, "include_base_44_hindi_few_shot_og_driving_license": {"alias": " - include_base_44_hindi_few_shot_og_driving_license", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.04993832453115031}, "include_base_44_hindi_few_shot_og_general_knowledge": {"alias": " - include_base_44_hindi_few_shot_og_general_knowledge", "acc,none": 0.29577464788732394, "acc_stderr,none": 0.05454906121418899}, "include_base_44_hindi_few_shot_og_health_oriented_education": {"alias": " - include_base_44_hindi_few_shot_og_health_oriented_education", "acc,none": 0.2676056338028169, "acc_stderr,none": 0.05291406220869697}, "include_base_44_hindi_few_shot_og_professional_certification": {"alias": " - include_base_44_hindi_few_shot_og_professional_certification", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295697}, "include_base_44_hindi_few_shot_og_social_science": {"alias": " - include_base_44_hindi_few_shot_og_social_science", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295697}, "include_base_44_hindi_few_shot_og_stem": {"alias": " - include_base_44_hindi_few_shot_og_stem", "acc,none": 0.16, "acc_stderr,none": 0.0748331477354788}, "include_base_44_italian": {"acc,none": 0.26094890510948904, "acc_stderr,none": 0.01866348095748335, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.14285714285714285, "acc_stderr,none": 0.060012003601200396}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.31736526946107785, "acc_stderr,none": 0.03612599731403395}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.25, "acc_stderr,none": 0.1305582419667734}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.3032258064516129, "acc_stderr,none": 0.03703980981843178}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.20359281437125748, "acc_stderr,none": 0.03125321962283342}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.08333333333333333, "acc_stderr,none": 0.08333333333333331}, "include_base_44_japanese": {"acc,none": 0.2634730538922156, "acc_stderr,none": 0.01972538101830729, "alias": "include_base_44_japanese"}, "include_base_44_japanese_few_shot_og_driving_license": {"alias": " - include_base_44_japanese_few_shot_og_driving_license", "acc,none": 0.24242424242424243, "acc_stderr,none": 0.04329004329004327}, "include_base_44_japanese_few_shot_og_medical_license": {"alias": " - include_base_44_japanese_few_shot_og_medical_license", "acc,none": 0.2835820895522388, "acc_stderr,none": 0.03187187537919797}, "include_base_44_japanese_few_shot_og_professional_certification": {"alias": " - include_base_44_japanese_few_shot_og_professional_certification", "acc,none": 0.2537313432835821, "acc_stderr,none": 0.030769444967296024}, "include_base_44_korean": {"acc,none": 0.232, "acc_stderr,none": 0.018914306860576838, "alias": "include_base_44_korean"}, "include_base_44_korean_few_shot_og_professional_certification": {"alias": " - include_base_44_korean_few_shot_og_professional_certification", "acc,none": 0.236, "acc_stderr,none": 0.026909337594953838}, "include_base_44_korean_few_shot_og_social_science": {"alias": " - include_base_44_korean_few_shot_og_social_science", "acc,none": 0.228, "acc_stderr,none": 0.026587432487268487}, "include_base_44_russian": {"acc,none": 0.266304347826087, "acc_stderr,none": 0.018662425786691965, "alias": "include_base_44_russian"}, "include_base_44_russian_few_shot_og_applied_science": {"alias": " - include_base_44_russian_few_shot_og_applied_science", "acc,none": 0.2028985507246377, "acc_stderr,none": 0.04876877147472662}, "include_base_44_russian_few_shot_og_arts_humanities": {"alias": " - include_base_44_russian_few_shot_og_arts_humanities", "acc,none": 0.2028985507246377, "acc_stderr,none": 0.0487687714747266}, "include_base_44_russian_few_shot_og_business_commerce": {"alias": " - include_base_44_russian_few_shot_og_business_commerce", "acc,none": 0.2608695652173913, "acc_stderr,none": 0.05324977701702559}, "include_base_44_russian_few_shot_og_driving_license": {"alias": " - include_base_44_russian_few_shot_og_driving_license", "acc,none": 0.21428571428571427, "acc_stderr,none": 0.07896725691322383}, "include_base_44_russian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_russian_few_shot_og_health_oriented_education", "acc,none": 0.38823529411764707, "acc_stderr,none": 0.05317409082203827}, "include_base_44_russian_few_shot_og_marine_license": {"alias": " - include_base_44_russian_few_shot_og_marine_license", "acc,none": 0.37681159420289856, "acc_stderr,none": 0.05876481248527006}, "include_base_44_russian_few_shot_og_social_science": {"alias": " - include_base_44_russian_few_shot_og_social_science", "acc,none": 0.2608695652173913, "acc_stderr,none": 0.05324977701702559}, "include_base_44_russian_few_shot_og_stem": {"alias": " - include_base_44_russian_few_shot_og_stem", "acc,none": 0.19148936170212766, "acc_stderr,none": 0.040801273706799875}, "include_base_44_spanish": {"acc,none": 0.26181818181818184, "acc_stderr,none": 0.01872527785203816, "alias": "include_base_44_spanish"}, "include_base_44_spanish_few_shot_og_arts_humanities": {"alias": " - include_base_44_spanish_few_shot_og_arts_humanities", "acc,none": 0.224, "acc_stderr,none": 0.026421361687347905}, "include_base_44_spanish_few_shot_og_health_oriented_education": {"alias": " - include_base_44_spanish_few_shot_og_health_oriented_education", "acc,none": 0.32, "acc_stderr,none": 0.09521904571390466}, "include_base_44_spanish_few_shot_og_social_science": {"alias": " - include_base_44_spanish_few_shot_og_social_science", "acc,none": 0.28, "acc_stderr,none": 0.028454148277832315}, "include_base_44_spanish_few_shot_og_stem": {"alias": " - include_base_44_spanish_few_shot_og_stem", "acc,none": 0.4, "acc_stderr,none": 0.1}, "include_base_44_telugu": {"acc,none": 0.24087591240875914, "acc_stderr,none": 0.018261531404880172, "alias": "include_base_44_telugu"}, "include_base_44_telugu_few_shot_og_applied_science": {"alias": " - include_base_44_telugu_few_shot_og_applied_science", "acc,none": 0.2469879518072289, "acc_stderr,none": 0.03357351982064536}, "include_base_44_telugu_few_shot_og_arts_humanities": {"alias": " - include_base_44_telugu_few_shot_og_arts_humanities", "acc,none": 0.24083769633507854, "acc_stderr,none": 0.031020782537814432}, "include_base_44_telugu_few_shot_og_social_science": {"alias": " - include_base_44_telugu_few_shot_og_social_science", "acc,none": 0.21084337349397592, "acc_stderr,none": 0.0317555478662992}, "include_base_44_telugu_few_shot_og_stem": {"alias": " - include_base_44_telugu_few_shot_og_stem", "acc,none": 0.4, "acc_stderr,none": 0.1}, "include_base_44_turkish": {"acc,none": 0.29014598540145986, "acc_stderr,none": 0.01938786453909353, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.3192771084337349, "acc_stderr,none": 0.036293353299478595}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.25903614457831325, "acc_stderr,none": 0.034106466140718564}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.26506024096385544, "acc_stderr,none": 0.03436024037944966}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.38, "acc_stderr,none": 0.06934092056863767}, "piqa": {"alias": "piqa", "acc,none": 0.5282916213275299, "acc_stderr,none": 0.01164713417274932, "acc_norm,none": 0.5070729053318824, "acc_norm_stderr,none": 0.011664656918145945}, "xnli_ar": {"alias": "xnli_ar", "acc,none": 0.3377510040160643, "acc_stderr,none": 0.009479742273956478}, "xnli_de": {"alias": "xnli_de", "acc,none": 0.3333333333333333, "acc_stderr,none": 0.009448900914617609}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.3417670682730924, "acc_stderr,none": 0.009506977398287627}, "xnli_es": {"alias": "xnli_es", "acc,none": 0.3325301204819277, "acc_stderr,none": 0.00944319336590334}, "xnli_fr": {"alias": "xnli_fr", "acc,none": 0.334136546184739, "acc_stderr,none": 0.009454577602463621}, "xnli_hi": {"alias": "xnli_hi", "acc,none": 0.3309236947791165, "acc_stderr,none": 0.009431685461463309}, "xnli_ru": {"alias": "xnli_ru", "acc,none": 0.3389558232931727, "acc_stderr,none": 0.00948799273220152}, "xnli_sw": {"alias": "xnli_sw", "acc,none": 0.3321285140562249, "acc_stderr,none": 0.009440328001240632}, "xnli_th": {"alias": "xnli_th", "acc,none": 0.3337349397590361, "acc_stderr,none": 0.009451743112667057}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.3333333333333333, "acc_stderr,none": 0.009448900914617609}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.3204819277108434, "acc_stderr,none": 0.009353837689339905}} +{"created_at": "2025-06-25T22:42:46.181711", "global_step": 8500, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.2030716723549488, "acc_stderr,none": 0.011755899303705582, "acc_norm,none": 0.2551194539249147, "acc_norm_stderr,none": 0.012739038695202104}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.2638888888888889, "acc_stderr,none": 0.009043789220055144, "acc_norm,none": 0.2668350168350168, "acc_norm_stderr,none": 0.009075915859267267}, "belebele_arb_Arab": {"alias": "belebele_arb_Arab", "acc,none": 0.22666666666666666, "acc_stderr,none": 0.013963598349030486, "acc_norm,none": 0.22666666666666666, "acc_norm_stderr,none": 0.013963598349030486}, "belebele_ben_Beng": {"alias": "belebele_ben_Beng", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884506, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884506}, "belebele_deu_Latn": {"alias": "belebele_deu_Latn", "acc,none": 0.2311111111111111, "acc_stderr,none": 0.014059256663218174, "acc_norm,none": 0.2311111111111111, "acc_norm_stderr,none": 0.014059256663218174}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.23, "acc_stderr,none": 0.014035549969945592, "acc_norm,none": 0.23, "acc_norm_stderr,none": 0.014035549969945592}, "belebele_fra_Latn": {"alias": "belebele_fra_Latn", "acc,none": 0.23555555555555555, "acc_stderr,none": 0.014152716079132648, "acc_norm,none": 0.23555555555555555, "acc_norm_stderr,none": 0.014152716079132648}, "belebele_hin_Deva": {"alias": "belebele_hin_Deva", "acc,none": 0.23, "acc_stderr,none": 0.014035549969945592, "acc_norm,none": 0.23, "acc_norm_stderr,none": 0.014035549969945592}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_jpn_Jpan": {"alias": "belebele_jpn_Jpan", "acc,none": 0.23, "acc_stderr,none": 0.014035549969945589, "acc_norm,none": 0.23, "acc_norm_stderr,none": 0.014035549969945589}, "belebele_kor_Hang": {"alias": "belebele_kor_Hang", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.01401170515888452, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.01401170515888452}, "belebele_rus_Cyrl": {"alias": "belebele_rus_Cyrl", "acc,none": 0.23777777777777778, "acc_stderr,none": 0.01419863480930818, "acc_norm,none": 0.23777777777777778, "acc_norm_stderr,none": 0.01419863480930818}, "belebele_spa_Latn": {"alias": "belebele_spa_Latn", "acc,none": 0.2311111111111111, "acc_stderr,none": 0.014059256663218198, "acc_norm,none": 0.2311111111111111, "acc_norm_stderr,none": 0.014059256663218198}, "belebele_swh_Latn": {"alias": "belebele_swh_Latn", "acc,none": 0.2311111111111111, "acc_stderr,none": 0.014059256663218197, "acc_norm,none": 0.2311111111111111, "acc_norm_stderr,none": 0.014059256663218197}, "belebele_tel_Telu": {"alias": "belebele_tel_Telu", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884526, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884526}, "belebele_tha_Thai": {"alias": "belebele_tha_Thai", "acc,none": 0.23, "acc_stderr,none": 0.014035549969945589, "acc_norm,none": 0.23, "acc_norm_stderr,none": 0.014035549969945589}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.23333333333333334, "acc_stderr,none": 0.014106258477755672, "acc_norm,none": 0.23333333333333334, "acc_norm_stderr,none": 0.014106258477755672}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.22666666666666666, "acc_stderr,none": 0.013963598349030493, "acc_norm,none": 0.22666666666666666, "acc_norm_stderr,none": 0.013963598349030493}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.26070503883688506, "acc_stderr,none": 0.004381220409641169, "acc_norm,none": 0.2667795259908385, "acc_norm_stderr,none": 0.004413722823053155}, "include_base_44_arabic": {"acc,none": 0.2246376811594203, "acc_stderr,none": 0.017793847427952127, "alias": "include_base_44_arabic"}, "include_base_44_arabic_few_shot_og_arts_humanities": {"alias": " - include_base_44_arabic_few_shot_og_arts_humanities", "acc,none": 0.2571428571428571, "acc_stderr,none": 0.04285714285714284}, "include_base_44_arabic_few_shot_og_business_commerce": {"alias": " - include_base_44_arabic_few_shot_og_business_commerce", "acc,none": 0.2073170731707317, "acc_stderr,none": 0.045042737502966584}, "include_base_44_arabic_few_shot_og_driving_license": {"alias": " - include_base_44_arabic_few_shot_og_driving_license", "acc,none": 0.2761904761904762, "acc_stderr,none": 0.04384295586918882}, "include_base_44_arabic_few_shot_og_general_knowledge": {"alias": " - include_base_44_arabic_few_shot_og_general_knowledge", "acc,none": 0.21904761904761905, "acc_stderr,none": 0.040556911537178254}, "include_base_44_arabic_few_shot_og_social_science": {"alias": " - include_base_44_arabic_few_shot_og_social_science", "acc,none": 0.18095238095238095, "acc_stderr,none": 0.03775026958386237}, "include_base_44_arabic_few_shot_og_stem": {"alias": " - include_base_44_arabic_few_shot_og_stem", "acc,none": 0.18, "acc_stderr,none": 0.054883922035138706}, "include_base_44_bengali": {"acc,none": 0.29014598540145986, "acc_stderr,none": 0.01939486736491474, "alias": "include_base_44_bengali"}, "include_base_44_bengali_few_shot_og_arts_humanities": {"alias": " - include_base_44_bengali_few_shot_og_arts_humanities", "acc,none": 0.3132530120481928, "acc_stderr,none": 0.036108050180310235}, "include_base_44_bengali_few_shot_og_general_knowledge": {"alias": " - include_base_44_bengali_few_shot_og_general_knowledge", "acc,none": 0.3253012048192771, "acc_stderr,none": 0.036471685236832294}, "include_base_44_bengali_few_shot_og_professional_certification": {"alias": " - include_base_44_bengali_few_shot_og_professional_certification", "acc,none": 0.2469879518072289, "acc_stderr,none": 0.03357351982064536}, "include_base_44_bengali_few_shot_og_stem": {"alias": " - include_base_44_bengali_few_shot_og_stem", "acc,none": 0.24, "acc_stderr,none": 0.06101187572589323}, "include_base_44_chinese": {"acc,none": 0.24036697247706423, "acc_stderr,none": 0.018368805303140653, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.049938324531150324}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.25287356321839083, "acc_stderr,none": 0.04687049503854671}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.323943661971831, "acc_stderr,none": 0.05593416612923642}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.2112676056338028, "acc_stderr,none": 0.04879016359348948}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.2413793103448276, "acc_stderr,none": 0.04614377668264891}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.23943661971830985, "acc_stderr,none": 0.05100514327793383}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.19718309859154928, "acc_stderr,none": 0.04755476905953276}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.1875, "acc_stderr,none": 0.10077822185373188}, "include_base_44_french": {"acc,none": 0.24821002386634844, "acc_stderr,none": 0.0210402189149351, "alias": "include_base_44_french"}, "include_base_44_french_few_shot_og_arts_humanities": {"alias": " - include_base_44_french_few_shot_og_arts_humanities", "acc,none": 0.22180451127819548, "acc_stderr,none": 0.025521524031416658}, "include_base_44_french_few_shot_og_driving_license": {"alias": " - include_base_44_french_few_shot_og_driving_license", "acc,none": 0.40425531914893614, "acc_stderr,none": 0.07235674844413013}, "include_base_44_french_few_shot_og_health_oriented_education": {"alias": " - include_base_44_french_few_shot_og_health_oriented_education", "acc,none": 0.1875, "acc_stderr,none": 0.10077822185373188}, "include_base_44_french_few_shot_og_social_science": {"alias": " - include_base_44_french_few_shot_og_social_science", "acc,none": 0.25675675675675674, "acc_stderr,none": 0.051128728837659454}, "include_base_44_french_few_shot_og_stem": {"alias": " - include_base_44_french_few_shot_og_stem", "acc,none": 0.25, "acc_stderr,none": 0.11180339887498948}, "include_base_44_german": {"acc,none": 0.3237410071942446, "acc_stderr,none": 0.03810450206175078, "alias": "include_base_44_german"}, "include_base_44_german_few_shot_og_driving_license": {"alias": " - include_base_44_german_few_shot_og_driving_license", "acc,none": 0.6521739130434783, "acc_stderr,none": 0.10154334054280735}, "include_base_44_german_few_shot_og_social_science": {"alias": " - include_base_44_german_few_shot_og_social_science", "acc,none": 0.26373626373626374, "acc_stderr,none": 0.04644942852497396}, "include_base_44_german_few_shot_og_stem": {"alias": " - include_base_44_german_few_shot_og_stem", "acc,none": 0.24, "acc_stderr,none": 0.08717797887081347}, "include_base_44_hindi": {"acc,none": 0.23948811700182815, "acc_stderr,none": 0.01830198403573232, "alias": "include_base_44_hindi"}, "include_base_44_hindi_few_shot_og_applied_science": {"alias": " - include_base_44_hindi_few_shot_og_applied_science", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295698}, "include_base_44_hindi_few_shot_og_arts_humanities": {"alias": " - include_base_44_hindi_few_shot_og_arts_humanities", "acc,none": 0.17708333333333334, "acc_stderr,none": 0.03916561682056947}, "include_base_44_hindi_few_shot_og_driving_license": {"alias": " - include_base_44_hindi_few_shot_og_driving_license", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.04993832453115031}, "include_base_44_hindi_few_shot_og_general_knowledge": {"alias": " - include_base_44_hindi_few_shot_og_general_knowledge", "acc,none": 0.29577464788732394, "acc_stderr,none": 0.05454906121418899}, "include_base_44_hindi_few_shot_og_health_oriented_education": {"alias": " - include_base_44_hindi_few_shot_og_health_oriented_education", "acc,none": 0.2676056338028169, "acc_stderr,none": 0.05291406220869697}, "include_base_44_hindi_few_shot_og_professional_certification": {"alias": " - include_base_44_hindi_few_shot_og_professional_certification", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295697}, "include_base_44_hindi_few_shot_og_social_science": {"alias": " - include_base_44_hindi_few_shot_og_social_science", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295697}, "include_base_44_hindi_few_shot_og_stem": {"alias": " - include_base_44_hindi_few_shot_og_stem", "acc,none": 0.16, "acc_stderr,none": 0.0748331477354788}, "include_base_44_italian": {"acc,none": 0.2591240875912409, "acc_stderr,none": 0.018704174880646497, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.17142857142857143, "acc_stderr,none": 0.06463490595976273}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.30538922155688625, "acc_stderr,none": 0.03574732654202544}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.25, "acc_stderr,none": 0.1305582419667734}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.2903225806451613, "acc_stderr,none": 0.03657720706540911}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.20359281437125748, "acc_stderr,none": 0.031253219622833416}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.25, "acc_stderr,none": 0.1305582419667734}, "include_base_44_japanese": {"acc,none": 0.25948103792415167, "acc_stderr,none": 0.0196352041123409, "alias": "include_base_44_japanese"}, "include_base_44_japanese_few_shot_og_driving_license": {"alias": " - include_base_44_japanese_few_shot_og_driving_license", "acc,none": 0.24242424242424243, "acc_stderr,none": 0.04329004329004327}, "include_base_44_japanese_few_shot_og_medical_license": {"alias": " - include_base_44_japanese_few_shot_og_medical_license", "acc,none": 0.2736318407960199, "acc_stderr,none": 0.03152439186555401}, "include_base_44_japanese_few_shot_og_professional_certification": {"alias": " - include_base_44_japanese_few_shot_og_professional_certification", "acc,none": 0.2537313432835821, "acc_stderr,none": 0.030769444967296024}, "include_base_44_korean": {"acc,none": 0.24, "acc_stderr,none": 0.01913805267630342, "alias": "include_base_44_korean"}, "include_base_44_korean_few_shot_og_professional_certification": {"alias": " - include_base_44_korean_few_shot_og_professional_certification", "acc,none": 0.24, "acc_stderr,none": 0.027065293652239003}, "include_base_44_korean_few_shot_og_social_science": {"alias": " - include_base_44_korean_few_shot_og_social_science", "acc,none": 0.24, "acc_stderr,none": 0.027065293652239003}, "include_base_44_russian": {"acc,none": 0.26811594202898553, "acc_stderr,none": 0.01877345113026232, "alias": "include_base_44_russian"}, "include_base_44_russian_few_shot_og_applied_science": {"alias": " - include_base_44_russian_few_shot_og_applied_science", "acc,none": 0.2028985507246377, "acc_stderr,none": 0.04876877147472662}, "include_base_44_russian_few_shot_og_arts_humanities": {"alias": " - include_base_44_russian_few_shot_og_arts_humanities", "acc,none": 0.2028985507246377, "acc_stderr,none": 0.0487687714747266}, "include_base_44_russian_few_shot_og_business_commerce": {"alias": " - include_base_44_russian_few_shot_og_business_commerce", "acc,none": 0.30434782608695654, "acc_stderr,none": 0.05579903894954331}, "include_base_44_russian_few_shot_og_driving_license": {"alias": " - include_base_44_russian_few_shot_og_driving_license", "acc,none": 0.21428571428571427, "acc_stderr,none": 0.07896725691322383}, "include_base_44_russian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_russian_few_shot_og_health_oriented_education", "acc,none": 0.36470588235294116, "acc_stderr,none": 0.05251932770420085}, "include_base_44_russian_few_shot_og_marine_license": {"alias": " - include_base_44_russian_few_shot_og_marine_license", "acc,none": 0.36231884057971014, "acc_stderr,none": 0.05828983090868436}, "include_base_44_russian_few_shot_og_social_science": {"alias": " - include_base_44_russian_few_shot_og_social_science", "acc,none": 0.2463768115942029, "acc_stderr,none": 0.05225436631107233}, "include_base_44_russian_few_shot_og_stem": {"alias": " - include_base_44_russian_few_shot_og_stem", "acc,none": 0.2127659574468085, "acc_stderr,none": 0.04243864702455468}, "include_base_44_spanish": {"acc,none": 0.2672727272727273, "acc_stderr,none": 0.018850411012442, "alias": "include_base_44_spanish"}, "include_base_44_spanish_few_shot_og_arts_humanities": {"alias": " - include_base_44_spanish_few_shot_og_arts_humanities", "acc,none": 0.228, "acc_stderr,none": 0.026587432487268466}, "include_base_44_spanish_few_shot_og_health_oriented_education": {"alias": " - include_base_44_spanish_few_shot_og_health_oriented_education", "acc,none": 0.32, "acc_stderr,none": 0.09521904571390466}, "include_base_44_spanish_few_shot_og_social_science": {"alias": " - include_base_44_spanish_few_shot_og_social_science", "acc,none": 0.288, "acc_stderr,none": 0.028697004587398222}, "include_base_44_spanish_few_shot_og_stem": {"alias": " - include_base_44_spanish_few_shot_og_stem", "acc,none": 0.4, "acc_stderr,none": 0.1}, "include_base_44_telugu": {"acc,none": 0.24087591240875914, "acc_stderr,none": 0.018261531404880172, "alias": "include_base_44_telugu"}, "include_base_44_telugu_few_shot_og_applied_science": {"alias": " - include_base_44_telugu_few_shot_og_applied_science", "acc,none": 0.2469879518072289, "acc_stderr,none": 0.03357351982064536}, "include_base_44_telugu_few_shot_og_arts_humanities": {"alias": " - include_base_44_telugu_few_shot_og_arts_humanities", "acc,none": 0.24083769633507854, "acc_stderr,none": 0.031020782537814432}, "include_base_44_telugu_few_shot_og_social_science": {"alias": " - include_base_44_telugu_few_shot_og_social_science", "acc,none": 0.21084337349397592, "acc_stderr,none": 0.0317555478662992}, "include_base_44_telugu_few_shot_og_stem": {"alias": " - include_base_44_telugu_few_shot_og_stem", "acc,none": 0.4, "acc_stderr,none": 0.1}, "include_base_44_turkish": {"acc,none": 0.2937956204379562, "acc_stderr,none": 0.019466916305380735, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.3192771084337349, "acc_stderr,none": 0.036293353299478595}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.25903614457831325, "acc_stderr,none": 0.034106466140718564}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.27710843373493976, "acc_stderr,none": 0.03484331592680588}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.38, "acc_stderr,none": 0.06934092056863767}, "piqa": {"alias": "piqa", "acc,none": 0.528835690968444, "acc_stderr,none": 0.011646407809944727, "acc_norm,none": 0.5043525571273123, "acc_norm_stderr,none": 0.011665382144642399}, "xnli_ar": {"alias": "xnli_ar", "acc,none": 0.3365461847389558, "acc_stderr,none": 0.009471423054177147}, "xnli_de": {"alias": "xnli_de", "acc,none": 0.3329317269076305, "acc_stderr,none": 0.009446051001358225}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.3257028112449799, "acc_stderr,none": 0.009393425164264}, "xnli_es": {"alias": "xnli_es", "acc,none": 0.3345381526104418, "acc_stderr,none": 0.009457404390939169}, "xnli_fr": {"alias": "xnli_fr", "acc,none": 0.3345381526104418, "acc_stderr,none": 0.009457404390939166}, "xnli_hi": {"alias": "xnli_hi", "acc,none": 0.3333333333333333, "acc_stderr,none": 0.009448900914617609}, "xnli_ru": {"alias": "xnli_ru", "acc,none": 0.3357429718875502, "acc_stderr,none": 0.009465838617337345}, "xnli_sw": {"alias": "xnli_sw", "acc,none": 0.3220883534136546, "acc_stderr,none": 0.009366160807072059}, "xnli_th": {"alias": "xnli_th", "acc,none": 0.336144578313253, "acc_stderr,none": 0.00946863466929353}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.3329317269076305, "acc_stderr,none": 0.009446051001358225}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.3313253012048193, "acc_stderr,none": 0.009434574056101968}} +{"created_at": "2025-06-25T22:49:53.951189", "global_step": 9000, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.197098976109215, "acc_stderr,none": 0.011625047669880626, "acc_norm,none": 0.24744027303754265, "acc_norm_stderr,none": 0.01261035266329267}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.26346801346801346, "acc_stderr,none": 0.009039157374497717, "acc_norm,none": 0.26304713804713803, "acc_norm_stderr,none": 0.009034514898865822}, "belebele_arb_Arab": {"alias": "belebele_arb_Arab", "acc,none": 0.23, "acc_stderr,none": 0.014035549969945592, "acc_norm,none": 0.23, "acc_norm_stderr,none": 0.014035549969945592}, "belebele_ben_Beng": {"alias": "belebele_ben_Beng", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884506, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884506}, "belebele_deu_Latn": {"alias": "belebele_deu_Latn", "acc,none": 0.22777777777777777, "acc_stderr,none": 0.013987721523687944, "acc_norm,none": 0.22777777777777777, "acc_norm_stderr,none": 0.013987721523687944}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.22777777777777777, "acc_stderr,none": 0.013987721523687968, "acc_norm,none": 0.22777777777777777, "acc_norm_stderr,none": 0.013987721523687968}, "belebele_fra_Latn": {"alias": "belebele_fra_Latn", "acc,none": 0.23, "acc_stderr,none": 0.014035549969945592, "acc_norm,none": 0.23, "acc_norm_stderr,none": 0.014035549969945592}, "belebele_hin_Deva": {"alias": "belebele_hin_Deva", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884528, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884528}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_jpn_Jpan": {"alias": "belebele_jpn_Jpan", "acc,none": 0.22666666666666666, "acc_stderr,none": 0.013963598349030494, "acc_norm,none": 0.22666666666666666, "acc_norm_stderr,none": 0.013963598349030494}, "belebele_kor_Hang": {"alias": "belebele_kor_Hang", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.01401170515888452, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.01401170515888452}, "belebele_rus_Cyrl": {"alias": "belebele_rus_Cyrl", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884504, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884504}, "belebele_spa_Latn": {"alias": "belebele_spa_Latn", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_swh_Latn": {"alias": "belebele_swh_Latn", "acc,none": 0.2311111111111111, "acc_stderr,none": 0.014059256663218198, "acc_norm,none": 0.2311111111111111, "acc_norm_stderr,none": 0.014059256663218198}, "belebele_tel_Telu": {"alias": "belebele_tel_Telu", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884526, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884526}, "belebele_tha_Thai": {"alias": "belebele_tha_Thai", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.22777777777777777, "acc_stderr,none": 0.01398772152368797, "acc_norm,none": 0.22777777777777777, "acc_norm_stderr,none": 0.01398772152368797}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.2583150766779526, "acc_stderr,none": 0.004368135676213557, "acc_norm,none": 0.26269667396932883, "acc_norm_stderr,none": 0.004391995637542121}, "include_base_44_arabic": {"acc,none": 0.22282608695652173, "acc_stderr,none": 0.01774412964371403, "alias": "include_base_44_arabic"}, "include_base_44_arabic_few_shot_og_arts_humanities": {"alias": " - include_base_44_arabic_few_shot_og_arts_humanities", "acc,none": 0.2761904761904762, "acc_stderr,none": 0.04384295586918882}, "include_base_44_arabic_few_shot_og_business_commerce": {"alias": " - include_base_44_arabic_few_shot_og_business_commerce", "acc,none": 0.17073170731707318, "acc_stderr,none": 0.04180826184428597}, "include_base_44_arabic_few_shot_og_driving_license": {"alias": " - include_base_44_arabic_few_shot_og_driving_license", "acc,none": 0.24761904761904763, "acc_stderr,none": 0.042324735320550415}, "include_base_44_arabic_few_shot_og_general_knowledge": {"alias": " - include_base_44_arabic_few_shot_og_general_knowledge", "acc,none": 0.21904761904761905, "acc_stderr,none": 0.040556911537178254}, "include_base_44_arabic_few_shot_og_social_science": {"alias": " - include_base_44_arabic_few_shot_og_social_science", "acc,none": 0.19047619047619047, "acc_stderr,none": 0.03850512095536381}, "include_base_44_arabic_few_shot_og_stem": {"alias": " - include_base_44_arabic_few_shot_og_stem", "acc,none": 0.22, "acc_stderr,none": 0.05917804336345138}, "include_base_44_bengali": {"acc,none": 0.29014598540145986, "acc_stderr,none": 0.01939486736491474, "alias": "include_base_44_bengali"}, "include_base_44_bengali_few_shot_og_arts_humanities": {"alias": " - include_base_44_bengali_few_shot_og_arts_humanities", "acc,none": 0.3132530120481928, "acc_stderr,none": 0.036108050180310235}, "include_base_44_bengali_few_shot_og_general_knowledge": {"alias": " - include_base_44_bengali_few_shot_og_general_knowledge", "acc,none": 0.3253012048192771, "acc_stderr,none": 0.036471685236832294}, "include_base_44_bengali_few_shot_og_professional_certification": {"alias": " - include_base_44_bengali_few_shot_og_professional_certification", "acc,none": 0.2469879518072289, "acc_stderr,none": 0.03357351982064536}, "include_base_44_bengali_few_shot_og_stem": {"alias": " - include_base_44_bengali_few_shot_og_stem", "acc,none": 0.24, "acc_stderr,none": 0.06101187572589323}, "include_base_44_chinese": {"acc,none": 0.24587155963302754, "acc_stderr,none": 0.018517539247711934, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.049938324531150324}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.25287356321839083, "acc_stderr,none": 0.04687049503854671}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.323943661971831, "acc_stderr,none": 0.05593416612923642}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.23943661971830985, "acc_stderr,none": 0.05100514327793383}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.2413793103448276, "acc_stderr,none": 0.04614377668264891}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.051995626882956975}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.19718309859154928, "acc_stderr,none": 0.04755476905953276}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.1875, "acc_stderr,none": 0.10077822185373188}, "include_base_44_french": {"acc,none": 0.26014319809069214, "acc_stderr,none": 0.021289866063418386, "alias": "include_base_44_french"}, "include_base_44_french_few_shot_og_arts_humanities": {"alias": " - include_base_44_french_few_shot_og_arts_humanities", "acc,none": 0.22932330827067668, "acc_stderr,none": 0.02582481829225878}, "include_base_44_french_few_shot_og_driving_license": {"alias": " - include_base_44_french_few_shot_og_driving_license", "acc,none": 0.44680851063829785, "acc_stderr,none": 0.07330262843906579}, "include_base_44_french_few_shot_og_health_oriented_education": {"alias": " - include_base_44_french_few_shot_og_health_oriented_education", "acc,none": 0.1875, "acc_stderr,none": 0.10077822185373188}, "include_base_44_french_few_shot_og_social_science": {"alias": " - include_base_44_french_few_shot_og_social_science", "acc,none": 0.25675675675675674, "acc_stderr,none": 0.051128728837659454}, "include_base_44_french_few_shot_og_stem": {"alias": " - include_base_44_french_few_shot_og_stem", "acc,none": 0.3125, "acc_stderr,none": 0.11967838846954226}, "include_base_44_german": {"acc,none": 0.33093525179856115, "acc_stderr,none": 0.03842353360413949, "alias": "include_base_44_german"}, "include_base_44_german_few_shot_og_driving_license": {"alias": " - include_base_44_german_few_shot_og_driving_license", "acc,none": 0.6521739130434783, "acc_stderr,none": 0.10154334054280735}, "include_base_44_german_few_shot_og_social_science": {"alias": " - include_base_44_german_few_shot_og_social_science", "acc,none": 0.27472527472527475, "acc_stderr,none": 0.04705213398778437}, "include_base_44_german_few_shot_og_stem": {"alias": " - include_base_44_german_few_shot_og_stem", "acc,none": 0.24, "acc_stderr,none": 0.08717797887081347}, "include_base_44_hindi": {"acc,none": 0.23948811700182815, "acc_stderr,none": 0.01830198403573232, "alias": "include_base_44_hindi"}, "include_base_44_hindi_few_shot_og_applied_science": {"alias": " - include_base_44_hindi_few_shot_og_applied_science", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295698}, "include_base_44_hindi_few_shot_og_arts_humanities": {"alias": " - include_base_44_hindi_few_shot_og_arts_humanities", "acc,none": 0.17708333333333334, "acc_stderr,none": 0.03916561682056947}, "include_base_44_hindi_few_shot_og_driving_license": {"alias": " - include_base_44_hindi_few_shot_og_driving_license", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.04993832453115031}, "include_base_44_hindi_few_shot_og_general_knowledge": {"alias": " - include_base_44_hindi_few_shot_og_general_knowledge", "acc,none": 0.29577464788732394, "acc_stderr,none": 0.05454906121418899}, "include_base_44_hindi_few_shot_og_health_oriented_education": {"alias": " - include_base_44_hindi_few_shot_og_health_oriented_education", "acc,none": 0.2676056338028169, "acc_stderr,none": 0.05291406220869697}, "include_base_44_hindi_few_shot_og_professional_certification": {"alias": " - include_base_44_hindi_few_shot_og_professional_certification", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295697}, "include_base_44_hindi_few_shot_og_social_science": {"alias": " - include_base_44_hindi_few_shot_og_social_science", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295697}, "include_base_44_hindi_few_shot_og_stem": {"alias": " - include_base_44_hindi_few_shot_og_stem", "acc,none": 0.16, "acc_stderr,none": 0.0748331477354788}, "include_base_44_italian": {"acc,none": 0.26277372262773724, "acc_stderr,none": 0.01881002094090936, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.17142857142857143, "acc_stderr,none": 0.06463490595976273}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.2994011976047904, "acc_stderr,none": 0.03554736535384932}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.25, "acc_stderr,none": 0.1305582419667734}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.2967741935483871, "acc_stderr,none": 0.03681290636819265}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.2155688622754491, "acc_stderr,none": 0.031916582528798025}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.25, "acc_stderr,none": 0.1305582419667734}, "include_base_44_japanese": {"acc,none": 0.2634730538922156, "acc_stderr,none": 0.01972538101830729, "alias": "include_base_44_japanese"}, "include_base_44_japanese_few_shot_og_driving_license": {"alias": " - include_base_44_japanese_few_shot_og_driving_license", "acc,none": 0.24242424242424243, "acc_stderr,none": 0.04329004329004327}, "include_base_44_japanese_few_shot_og_medical_license": {"alias": " - include_base_44_japanese_few_shot_og_medical_license", "acc,none": 0.2835820895522388, "acc_stderr,none": 0.03187187537919797}, "include_base_44_japanese_few_shot_og_professional_certification": {"alias": " - include_base_44_japanese_few_shot_og_professional_certification", "acc,none": 0.2537313432835821, "acc_stderr,none": 0.030769444967296024}, "include_base_44_korean": {"acc,none": 0.234, "acc_stderr,none": 0.01896985560532149, "alias": "include_base_44_korean"}, "include_base_44_korean_few_shot_og_professional_certification": {"alias": " - include_base_44_korean_few_shot_og_professional_certification", "acc,none": 0.24, "acc_stderr,none": 0.027065293652239003}, "include_base_44_korean_few_shot_og_social_science": {"alias": " - include_base_44_korean_few_shot_og_social_science", "acc,none": 0.228, "acc_stderr,none": 0.026587432487268487}, "include_base_44_russian": {"acc,none": 0.266304347826087, "acc_stderr,none": 0.018662425786691965, "alias": "include_base_44_russian"}, "include_base_44_russian_few_shot_og_applied_science": {"alias": " - include_base_44_russian_few_shot_og_applied_science", "acc,none": 0.2028985507246377, "acc_stderr,none": 0.04876877147472662}, "include_base_44_russian_few_shot_og_arts_humanities": {"alias": " - include_base_44_russian_few_shot_og_arts_humanities", "acc,none": 0.2028985507246377, "acc_stderr,none": 0.0487687714747266}, "include_base_44_russian_few_shot_og_business_commerce": {"alias": " - include_base_44_russian_few_shot_og_business_commerce", "acc,none": 0.2608695652173913, "acc_stderr,none": 0.05324977701702559}, "include_base_44_russian_few_shot_og_driving_license": {"alias": " - include_base_44_russian_few_shot_og_driving_license", "acc,none": 0.21428571428571427, "acc_stderr,none": 0.07896725691322383}, "include_base_44_russian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_russian_few_shot_og_health_oriented_education", "acc,none": 0.38823529411764707, "acc_stderr,none": 0.05317409082203827}, "include_base_44_russian_few_shot_og_marine_license": {"alias": " - include_base_44_russian_few_shot_og_marine_license", "acc,none": 0.37681159420289856, "acc_stderr,none": 0.05876481248527006}, "include_base_44_russian_few_shot_og_social_science": {"alias": " - include_base_44_russian_few_shot_og_social_science", "acc,none": 0.2608695652173913, "acc_stderr,none": 0.05324977701702559}, "include_base_44_russian_few_shot_og_stem": {"alias": " - include_base_44_russian_few_shot_og_stem", "acc,none": 0.19148936170212766, "acc_stderr,none": 0.040801273706799875}, "include_base_44_spanish": {"acc,none": 0.2672727272727273, "acc_stderr,none": 0.018860301050379735, "alias": "include_base_44_spanish"}, "include_base_44_spanish_few_shot_og_arts_humanities": {"alias": " - include_base_44_spanish_few_shot_og_arts_humanities", "acc,none": 0.232, "acc_stderr,none": 0.026750070374865164}, "include_base_44_spanish_few_shot_og_health_oriented_education": {"alias": " - include_base_44_spanish_few_shot_og_health_oriented_education", "acc,none": 0.32, "acc_stderr,none": 0.09521904571390466}, "include_base_44_spanish_few_shot_og_social_science": {"alias": " - include_base_44_spanish_few_shot_og_social_science", "acc,none": 0.284, "acc_stderr,none": 0.02857695873043741}, "include_base_44_spanish_few_shot_og_stem": {"alias": " - include_base_44_spanish_few_shot_og_stem", "acc,none": 0.4, "acc_stderr,none": 0.1}, "include_base_44_telugu": {"acc,none": 0.24087591240875914, "acc_stderr,none": 0.018267063370701327, "alias": "include_base_44_telugu"}, "include_base_44_telugu_few_shot_og_applied_science": {"alias": " - include_base_44_telugu_few_shot_og_applied_science", "acc,none": 0.24096385542168675, "acc_stderr,none": 0.03329394119073528}, "include_base_44_telugu_few_shot_og_arts_humanities": {"alias": " - include_base_44_telugu_few_shot_og_arts_humanities", "acc,none": 0.24083769633507854, "acc_stderr,none": 0.031020782537814432}, "include_base_44_telugu_few_shot_og_social_science": {"alias": " - include_base_44_telugu_few_shot_og_social_science", "acc,none": 0.21686746987951808, "acc_stderr,none": 0.03208284450356365}, "include_base_44_telugu_few_shot_og_stem": {"alias": " - include_base_44_telugu_few_shot_og_stem", "acc,none": 0.4, "acc_stderr,none": 0.1}, "include_base_44_turkish": {"acc,none": 0.28102189781021897, "acc_stderr,none": 0.019158773880514245, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.3253012048192771, "acc_stderr,none": 0.03647168523683228}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.23493975903614459, "acc_stderr,none": 0.03300533186128922}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.25301204819277107, "acc_stderr,none": 0.033844291552331346}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.38, "acc_stderr,none": 0.06934092056863768}, "piqa": {"alias": "piqa", "acc,none": 0.5195865070729053, "acc_stderr,none": 0.011656869979288465, "acc_norm,none": 0.501088139281828, "acc_norm_stderr,none": 0.011665796539540876}, "xnli_ar": {"alias": "xnli_ar", "acc,none": 0.3393574297188755, "acc_stderr,none": 0.009490727635646757}, "xnli_de": {"alias": "xnli_de", "acc,none": 0.3337349397590361, "acc_stderr,none": 0.009451743112667057}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.327710843373494, "acc_stderr,none": 0.009408296828870675}, "xnli_es": {"alias": "xnli_es", "acc,none": 0.3317269076305221, "acc_stderr,none": 0.009437454900329122}, "xnli_fr": {"alias": "xnli_fr", "acc,none": 0.3329317269076305, "acc_stderr,none": 0.009446051001358226}, "xnli_hi": {"alias": "xnli_hi", "acc,none": 0.3337349397590361, "acc_stderr,none": 0.009451743112667057}, "xnli_ru": {"alias": "xnli_ru", "acc,none": 0.3373493975903614, "acc_stderr,none": 0.009476976849778591}, "xnli_sw": {"alias": "xnli_sw", "acc,none": 0.3253012048192771, "acc_stderr,none": 0.009390427303093049}, "xnli_th": {"alias": "xnli_th", "acc,none": 0.334136546184739, "acc_stderr,none": 0.009454577602463626}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.3333333333333333, "acc_stderr,none": 0.009448900914617609}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.336144578313253, "acc_stderr,none": 0.00946863466929352}} +{"created_at": "2025-06-25T22:51:36.482810", "global_step": 9500, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.19539249146757678, "acc_stderr,none": 0.01158690718995291, "acc_norm,none": 0.2525597269624573, "acc_norm_stderr,none": 0.012696728980207706}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.26346801346801346, "acc_stderr,none": 0.009039157374497717, "acc_norm,none": 0.26557239057239057, "acc_norm_stderr,none": 0.009062210626971845}, "belebele_arb_Arab": {"alias": "belebele_arb_Arab", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_ben_Beng": {"alias": "belebele_ben_Beng", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884506, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884506}, "belebele_deu_Latn": {"alias": "belebele_deu_Latn", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884509, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884509}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_fra_Latn": {"alias": "belebele_fra_Latn", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_hin_Deva": {"alias": "belebele_hin_Deva", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884528, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884528}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_jpn_Jpan": {"alias": "belebele_jpn_Jpan", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_kor_Hang": {"alias": "belebele_kor_Hang", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.01401170515888452, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.01401170515888452}, "belebele_rus_Cyrl": {"alias": "belebele_rus_Cyrl", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884504, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884504}, "belebele_spa_Latn": {"alias": "belebele_spa_Latn", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_swh_Latn": {"alias": "belebele_swh_Latn", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_tel_Telu": {"alias": "belebele_tel_Telu", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884526, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884526}, "belebele_tha_Thai": {"alias": "belebele_tha_Thai", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.25960963951404104, "acc_stderr,none": 0.004375244237045127, "acc_norm,none": 0.2588129854610635, "acc_norm_stderr,none": 0.004370875625258998}, "include_base_44_arabic": {"acc,none": 0.2246376811594203, "acc_stderr,none": 0.01773798469050555, "alias": "include_base_44_arabic"}, "include_base_44_arabic_few_shot_og_arts_humanities": {"alias": " - include_base_44_arabic_few_shot_og_arts_humanities", "acc,none": 0.24761904761904763, "acc_stderr,none": 0.04232473532055043}, "include_base_44_arabic_few_shot_og_business_commerce": {"alias": " - include_base_44_arabic_few_shot_og_business_commerce", "acc,none": 0.17073170731707318, "acc_stderr,none": 0.04180826184428596}, "include_base_44_arabic_few_shot_og_driving_license": {"alias": " - include_base_44_arabic_few_shot_og_driving_license", "acc,none": 0.29523809523809524, "acc_stderr,none": 0.044729159560441434}, "include_base_44_arabic_few_shot_og_general_knowledge": {"alias": " - include_base_44_arabic_few_shot_og_general_knowledge", "acc,none": 0.24761904761904763, "acc_stderr,none": 0.0423247353205504}, "include_base_44_arabic_few_shot_og_social_science": {"alias": " - include_base_44_arabic_few_shot_og_social_science", "acc,none": 0.1619047619047619, "acc_stderr,none": 0.036121005232589175}, "include_base_44_arabic_few_shot_og_stem": {"alias": " - include_base_44_arabic_few_shot_og_stem", "acc,none": 0.2, "acc_stderr,none": 0.057142857142857134}, "include_base_44_bengali": {"acc,none": 0.29014598540145986, "acc_stderr,none": 0.01939486736491474, "alias": "include_base_44_bengali"}, "include_base_44_bengali_few_shot_og_arts_humanities": {"alias": " - include_base_44_bengali_few_shot_og_arts_humanities", "acc,none": 0.3132530120481928, "acc_stderr,none": 0.036108050180310235}, "include_base_44_bengali_few_shot_og_general_knowledge": {"alias": " - include_base_44_bengali_few_shot_og_general_knowledge", "acc,none": 0.3253012048192771, "acc_stderr,none": 0.036471685236832294}, "include_base_44_bengali_few_shot_og_professional_certification": {"alias": " - include_base_44_bengali_few_shot_og_professional_certification", "acc,none": 0.2469879518072289, "acc_stderr,none": 0.03357351982064536}, "include_base_44_bengali_few_shot_og_stem": {"alias": " - include_base_44_bengali_few_shot_og_stem", "acc,none": 0.24, "acc_stderr,none": 0.06101187572589323}, "include_base_44_chinese": {"acc,none": 0.24403669724770644, "acc_stderr,none": 0.018460275165923776, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.049938324531150324}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.25287356321839083, "acc_stderr,none": 0.04687049503854671}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.323943661971831, "acc_stderr,none": 0.05593416612923642}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.23943661971830985, "acc_stderr,none": 0.05100514327793383}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.2413793103448276, "acc_stderr,none": 0.04614377668264891}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.051995626882956975}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.18309859154929578, "acc_stderr,none": 0.04622514734921428}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.1875, "acc_stderr,none": 0.10077822185373188}, "include_base_44_french": {"acc,none": 0.2529832935560859, "acc_stderr,none": 0.021175427759512762, "alias": "include_base_44_french"}, "include_base_44_french_few_shot_og_arts_humanities": {"alias": " - include_base_44_french_few_shot_og_arts_humanities", "acc,none": 0.22556390977443608, "acc_stderr,none": 0.025674657674902242}, "include_base_44_french_few_shot_og_driving_license": {"alias": " - include_base_44_french_few_shot_og_driving_license", "acc,none": 0.40425531914893614, "acc_stderr,none": 0.07235674844413013}, "include_base_44_french_few_shot_og_health_oriented_education": {"alias": " - include_base_44_french_few_shot_og_health_oriented_education", "acc,none": 0.1875, "acc_stderr,none": 0.10077822185373188}, "include_base_44_french_few_shot_og_social_science": {"alias": " - include_base_44_french_few_shot_og_social_science", "acc,none": 0.25675675675675674, "acc_stderr,none": 0.051128728837659454}, "include_base_44_french_few_shot_og_stem": {"alias": " - include_base_44_french_few_shot_og_stem", "acc,none": 0.3125, "acc_stderr,none": 0.11967838846954226}, "include_base_44_german": {"acc,none": 0.3237410071942446, "acc_stderr,none": 0.03810450206175078, "alias": "include_base_44_german"}, "include_base_44_german_few_shot_og_driving_license": {"alias": " - include_base_44_german_few_shot_og_driving_license", "acc,none": 0.6521739130434783, "acc_stderr,none": 0.10154334054280735}, "include_base_44_german_few_shot_og_social_science": {"alias": " - include_base_44_german_few_shot_og_social_science", "acc,none": 0.26373626373626374, "acc_stderr,none": 0.04644942852497396}, "include_base_44_german_few_shot_og_stem": {"alias": " - include_base_44_german_few_shot_og_stem", "acc,none": 0.24, "acc_stderr,none": 0.08717797887081347}, "include_base_44_hindi": {"acc,none": 0.23948811700182815, "acc_stderr,none": 0.01830198403573232, "alias": "include_base_44_hindi"}, "include_base_44_hindi_few_shot_og_applied_science": {"alias": " - include_base_44_hindi_few_shot_og_applied_science", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295698}, "include_base_44_hindi_few_shot_og_arts_humanities": {"alias": " - include_base_44_hindi_few_shot_og_arts_humanities", "acc,none": 0.17708333333333334, "acc_stderr,none": 0.03916561682056947}, "include_base_44_hindi_few_shot_og_driving_license": {"alias": " - include_base_44_hindi_few_shot_og_driving_license", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.04993832453115031}, "include_base_44_hindi_few_shot_og_general_knowledge": {"alias": " - include_base_44_hindi_few_shot_og_general_knowledge", "acc,none": 0.29577464788732394, "acc_stderr,none": 0.05454906121418899}, "include_base_44_hindi_few_shot_og_health_oriented_education": {"alias": " - include_base_44_hindi_few_shot_og_health_oriented_education", "acc,none": 0.2676056338028169, "acc_stderr,none": 0.05291406220869697}, "include_base_44_hindi_few_shot_og_professional_certification": {"alias": " - include_base_44_hindi_few_shot_og_professional_certification", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295697}, "include_base_44_hindi_few_shot_og_social_science": {"alias": " - include_base_44_hindi_few_shot_og_social_science", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295697}, "include_base_44_hindi_few_shot_og_stem": {"alias": " - include_base_44_hindi_few_shot_og_stem", "acc,none": 0.16, "acc_stderr,none": 0.0748331477354788}, "include_base_44_italian": {"acc,none": 0.2572992700729927, "acc_stderr,none": 0.018668571387726075, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.17142857142857143, "acc_stderr,none": 0.06463490595976273}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.2994011976047904, "acc_stderr,none": 0.03554736535384932}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.25, "acc_stderr,none": 0.1305582419667734}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.2903225806451613, "acc_stderr,none": 0.03657720706540911}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.20359281437125748, "acc_stderr,none": 0.031253219622833416}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.25, "acc_stderr,none": 0.1305582419667734}, "include_base_44_japanese": {"acc,none": 0.2634730538922156, "acc_stderr,none": 0.01972538101830729, "alias": "include_base_44_japanese"}, "include_base_44_japanese_few_shot_og_driving_license": {"alias": " - include_base_44_japanese_few_shot_og_driving_license", "acc,none": 0.24242424242424243, "acc_stderr,none": 0.04329004329004327}, "include_base_44_japanese_few_shot_og_medical_license": {"alias": " - include_base_44_japanese_few_shot_og_medical_license", "acc,none": 0.2835820895522388, "acc_stderr,none": 0.03187187537919797}, "include_base_44_japanese_few_shot_og_professional_certification": {"alias": " - include_base_44_japanese_few_shot_og_professional_certification", "acc,none": 0.2537313432835821, "acc_stderr,none": 0.030769444967296024}, "include_base_44_korean": {"acc,none": 0.234, "acc_stderr,none": 0.01896985560532149, "alias": "include_base_44_korean"}, "include_base_44_korean_few_shot_og_professional_certification": {"alias": " - include_base_44_korean_few_shot_og_professional_certification", "acc,none": 0.24, "acc_stderr,none": 0.027065293652239003}, "include_base_44_korean_few_shot_og_social_science": {"alias": " - include_base_44_korean_few_shot_og_social_science", "acc,none": 0.228, "acc_stderr,none": 0.026587432487268487}, "include_base_44_russian": {"acc,none": 0.266304347826087, "acc_stderr,none": 0.018662425786691965, "alias": "include_base_44_russian"}, "include_base_44_russian_few_shot_og_applied_science": {"alias": " - include_base_44_russian_few_shot_og_applied_science", "acc,none": 0.2028985507246377, "acc_stderr,none": 0.04876877147472662}, "include_base_44_russian_few_shot_og_arts_humanities": {"alias": " - include_base_44_russian_few_shot_og_arts_humanities", "acc,none": 0.2028985507246377, "acc_stderr,none": 0.0487687714747266}, "include_base_44_russian_few_shot_og_business_commerce": {"alias": " - include_base_44_russian_few_shot_og_business_commerce", "acc,none": 0.2608695652173913, "acc_stderr,none": 0.05324977701702559}, "include_base_44_russian_few_shot_og_driving_license": {"alias": " - include_base_44_russian_few_shot_og_driving_license", "acc,none": 0.21428571428571427, "acc_stderr,none": 0.07896725691322383}, "include_base_44_russian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_russian_few_shot_og_health_oriented_education", "acc,none": 0.38823529411764707, "acc_stderr,none": 0.05317409082203827}, "include_base_44_russian_few_shot_og_marine_license": {"alias": " - include_base_44_russian_few_shot_og_marine_license", "acc,none": 0.37681159420289856, "acc_stderr,none": 0.05876481248527006}, "include_base_44_russian_few_shot_og_social_science": {"alias": " - include_base_44_russian_few_shot_og_social_science", "acc,none": 0.2608695652173913, "acc_stderr,none": 0.05324977701702559}, "include_base_44_russian_few_shot_og_stem": {"alias": " - include_base_44_russian_few_shot_og_stem", "acc,none": 0.19148936170212766, "acc_stderr,none": 0.040801273706799875}, "include_base_44_spanish": {"acc,none": 0.2672727272727273, "acc_stderr,none": 0.018850411012442, "alias": "include_base_44_spanish"}, "include_base_44_spanish_few_shot_og_arts_humanities": {"alias": " - include_base_44_spanish_few_shot_og_arts_humanities", "acc,none": 0.228, "acc_stderr,none": 0.026587432487268466}, "include_base_44_spanish_few_shot_og_health_oriented_education": {"alias": " - include_base_44_spanish_few_shot_og_health_oriented_education", "acc,none": 0.32, "acc_stderr,none": 0.09521904571390466}, "include_base_44_spanish_few_shot_og_social_science": {"alias": " - include_base_44_spanish_few_shot_og_social_science", "acc,none": 0.288, "acc_stderr,none": 0.028697004587398222}, "include_base_44_spanish_few_shot_og_stem": {"alias": " - include_base_44_spanish_few_shot_og_stem", "acc,none": 0.4, "acc_stderr,none": 0.1}, "include_base_44_telugu": {"acc,none": 0.24087591240875914, "acc_stderr,none": 0.018261531404880172, "alias": "include_base_44_telugu"}, "include_base_44_telugu_few_shot_og_applied_science": {"alias": " - include_base_44_telugu_few_shot_og_applied_science", "acc,none": 0.2469879518072289, "acc_stderr,none": 0.03357351982064536}, "include_base_44_telugu_few_shot_og_arts_humanities": {"alias": " - include_base_44_telugu_few_shot_og_arts_humanities", "acc,none": 0.24083769633507854, "acc_stderr,none": 0.031020782537814432}, "include_base_44_telugu_few_shot_og_social_science": {"alias": " - include_base_44_telugu_few_shot_og_social_science", "acc,none": 0.21084337349397592, "acc_stderr,none": 0.0317555478662992}, "include_base_44_telugu_few_shot_og_stem": {"alias": " - include_base_44_telugu_few_shot_og_stem", "acc,none": 0.4, "acc_stderr,none": 0.1}, "include_base_44_turkish": {"acc,none": 0.29014598540145986, "acc_stderr,none": 0.01938786453909353, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.3192771084337349, "acc_stderr,none": 0.036293353299478595}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.25903614457831325, "acc_stderr,none": 0.034106466140718564}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.26506024096385544, "acc_stderr,none": 0.03436024037944966}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.38, "acc_stderr,none": 0.06934092056863767}, "piqa": {"alias": "piqa", "acc,none": 0.515778019586507, "acc_stderr,none": 0.011660014400426178, "acc_norm,none": 0.49510337323177367, "acc_norm_stderr,none": 0.011665264730078142}, "xnli_ar": {"alias": "xnli_ar", "acc,none": 0.3389558232931727, "acc_stderr,none": 0.009487992732201522}, "xnli_de": {"alias": "xnli_de", "acc,none": 0.3329317269076305, "acc_stderr,none": 0.009446051001358225}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.3257028112449799, "acc_stderr,none": 0.009393425164263997}, "xnli_es": {"alias": "xnli_es", "acc,none": 0.3301204819277108, "acc_stderr,none": 0.009425884992430716}, "xnli_fr": {"alias": "xnli_fr", "acc,none": 0.3349397590361446, "acc_stderr,none": 0.009460223484996465}, "xnli_hi": {"alias": "xnli_hi", "acc,none": 0.3333333333333333, "acc_stderr,none": 0.009448900914617607}, "xnli_ru": {"alias": "xnli_ru", "acc,none": 0.3345381526104418, "acc_stderr,none": 0.009457404390939166}, "xnli_sw": {"alias": "xnli_sw", "acc,none": 0.3337349397590361, "acc_stderr,none": 0.009451743112667055}, "xnli_th": {"alias": "xnli_th", "acc,none": 0.3253012048192771, "acc_stderr,none": 0.00939042730309305}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.3333333333333333, "acc_stderr,none": 0.009448900914617609}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.3385542168674699, "acc_stderr,none": 0.009485250208516873}} +{"created_at": "2025-06-25T23:20:01.874173", "global_step": 10000, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.18600682593856654, "acc_stderr,none": 0.01137094018326672, "acc_norm,none": 0.25, "acc_norm_stderr,none": 0.012653835621466646}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.26725589225589225, "acc_stderr,none": 0.00908046324601747, "acc_norm,none": 0.2702020202020202, "acc_norm_stderr,none": 0.009112002229119853}, "belebele_arb_Arab": {"alias": "belebele_arb_Arab", "acc,none": 0.23, "acc_stderr,none": 0.014035549969945592, "acc_norm,none": 0.23, "acc_norm_stderr,none": 0.014035549969945592}, "belebele_ben_Beng": {"alias": "belebele_ben_Beng", "acc,none": 0.2311111111111111, "acc_stderr,none": 0.014059256663218174, "acc_norm,none": 0.2311111111111111, "acc_norm_stderr,none": 0.014059256663218174}, "belebele_deu_Latn": {"alias": "belebele_deu_Latn", "acc,none": 0.23, "acc_stderr,none": 0.014035549969945592, "acc_norm,none": 0.23, "acc_norm_stderr,none": 0.014035549969945592}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.22777777777777777, "acc_stderr,none": 0.013987721523687973, "acc_norm,none": 0.22777777777777777, "acc_norm_stderr,none": 0.013987721523687973}, "belebele_fra_Latn": {"alias": "belebele_fra_Latn", "acc,none": 0.23333333333333334, "acc_stderr,none": 0.014106258477755675, "acc_norm,none": 0.23333333333333334, "acc_norm_stderr,none": 0.014106258477755675}, "belebele_hin_Deva": {"alias": "belebele_hin_Deva", "acc,none": 0.22444444444444445, "acc_stderr,none": 0.013914930474237287, "acc_norm,none": 0.22444444444444445, "acc_norm_stderr,none": 0.013914930474237287}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.22666666666666666, "acc_stderr,none": 0.013963598349030491, "acc_norm,none": 0.22666666666666666, "acc_norm_stderr,none": 0.013963598349030491}, "belebele_jpn_Jpan": {"alias": "belebele_jpn_Jpan", "acc,none": 0.2544444444444444, "acc_stderr,none": 0.01452635475105518, "acc_norm,none": 0.2544444444444444, "acc_norm_stderr,none": 0.01452635475105518}, "belebele_kor_Hang": {"alias": "belebele_kor_Hang", "acc,none": 0.23444444444444446, "acc_stderr,none": 0.014129554968110771, "acc_norm,none": 0.23444444444444446, "acc_norm_stderr,none": 0.014129554968110771}, "belebele_rus_Cyrl": {"alias": "belebele_rus_Cyrl", "acc,none": 0.2311111111111111, "acc_stderr,none": 0.014059256663218167, "acc_norm,none": 0.2311111111111111, "acc_norm_stderr,none": 0.014059256663218167}, "belebele_spa_Latn": {"alias": "belebele_spa_Latn", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_swh_Latn": {"alias": "belebele_swh_Latn", "acc,none": 0.23, "acc_stderr,none": 0.014035549969945589, "acc_norm,none": 0.23, "acc_norm_stderr,none": 0.014035549969945589}, "belebele_tel_Telu": {"alias": "belebele_tel_Telu", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884525, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884525}, "belebele_tha_Thai": {"alias": "belebele_tha_Thai", "acc,none": 0.2222222222222222, "acc_stderr,none": 0.013865695626579385, "acc_norm,none": 0.2222222222222222, "acc_norm_stderr,none": 0.013865695626579385}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.23, "acc_stderr,none": 0.014035549969945592, "acc_norm,none": 0.23, "acc_norm_stderr,none": 0.014035549969945592}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.2644444444444444, "acc_stderr,none": 0.014709405413413153, "acc_norm,none": 0.2644444444444444, "acc_norm_stderr,none": 0.014709405413413153}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.2586138219478192, "acc_stderr,none": 0.004369780529824011, "acc_norm,none": 0.2605058753236407, "acc_norm_stderr,none": 0.00438013646854394}, "include_base_44_arabic": {"acc,none": 0.22644927536231885, "acc_stderr,none": 0.017798331996230314, "alias": "include_base_44_arabic"}, "include_base_44_arabic_few_shot_og_arts_humanities": {"alias": " - include_base_44_arabic_few_shot_og_arts_humanities", "acc,none": 0.24761904761904763, "acc_stderr,none": 0.04232473532055043}, "include_base_44_arabic_few_shot_og_business_commerce": {"alias": " - include_base_44_arabic_few_shot_og_business_commerce", "acc,none": 0.18292682926829268, "acc_stderr,none": 0.04295628044484376}, "include_base_44_arabic_few_shot_og_driving_license": {"alias": " - include_base_44_arabic_few_shot_og_driving_license", "acc,none": 0.29523809523809524, "acc_stderr,none": 0.044729159560441434}, "include_base_44_arabic_few_shot_og_general_knowledge": {"alias": " - include_base_44_arabic_few_shot_og_general_knowledge", "acc,none": 0.24761904761904763, "acc_stderr,none": 0.0423247353205504}, "include_base_44_arabic_few_shot_og_social_science": {"alias": " - include_base_44_arabic_few_shot_og_social_science", "acc,none": 0.1619047619047619, "acc_stderr,none": 0.036121005232589175}, "include_base_44_arabic_few_shot_og_stem": {"alias": " - include_base_44_arabic_few_shot_og_stem", "acc,none": 0.2, "acc_stderr,none": 0.057142857142857134}, "include_base_44_bengali": {"acc,none": 0.25364963503649635, "acc_stderr,none": 0.01853412132130243, "alias": "include_base_44_bengali"}, "include_base_44_bengali_few_shot_og_arts_humanities": {"alias": " - include_base_44_bengali_few_shot_og_arts_humanities", "acc,none": 0.30120481927710846, "acc_stderr,none": 0.0357160923005348}, "include_base_44_bengali_few_shot_og_general_knowledge": {"alias": " - include_base_44_bengali_few_shot_og_general_knowledge", "acc,none": 0.27710843373493976, "acc_stderr,none": 0.03484331592680588}, "include_base_44_bengali_few_shot_og_professional_certification": {"alias": " - include_base_44_bengali_few_shot_og_professional_certification", "acc,none": 0.21686746987951808, "acc_stderr,none": 0.03208284450356365}, "include_base_44_bengali_few_shot_og_stem": {"alias": " - include_base_44_bengali_few_shot_og_stem", "acc,none": 0.14, "acc_stderr,none": 0.0495695759225642}, "include_base_44_chinese": {"acc,none": 0.25504587155963304, "acc_stderr,none": 0.0187320050184817, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.23943661971830985, "acc_stderr,none": 0.05100514327793383}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.25287356321839083, "acc_stderr,none": 0.04687049503854671}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.352112676056338, "acc_stderr,none": 0.0570875692519562}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.23943661971830985, "acc_stderr,none": 0.05100514327793383}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.2413793103448276, "acc_stderr,none": 0.04614377668264891}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.051995626882956975}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.04993832453115031}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.1875, "acc_stderr,none": 0.10077822185373188}, "include_base_44_french": {"acc,none": 0.2577565632458234, "acc_stderr,none": 0.020917183976518608, "alias": "include_base_44_french"}, "include_base_44_french_few_shot_og_arts_humanities": {"alias": " - include_base_44_french_few_shot_og_arts_humanities", "acc,none": 0.22556390977443608, "acc_stderr,none": 0.025674657674902242}, "include_base_44_french_few_shot_og_driving_license": {"alias": " - include_base_44_french_few_shot_og_driving_license", "acc,none": 0.5319148936170213, "acc_stderr,none": 0.07357064625618347}, "include_base_44_french_few_shot_og_health_oriented_education": {"alias": " - include_base_44_french_few_shot_og_health_oriented_education", "acc,none": 0.125, "acc_stderr,none": 0.08539125638299665}, "include_base_44_french_few_shot_og_social_science": {"alias": " - include_base_44_french_few_shot_og_social_science", "acc,none": 0.21621621621621623, "acc_stderr,none": 0.04818155789706758}, "include_base_44_french_few_shot_og_stem": {"alias": " - include_base_44_french_few_shot_og_stem", "acc,none": 0.3125, "acc_stderr,none": 0.11967838846954226}, "include_base_44_german": {"acc,none": 0.33093525179856115, "acc_stderr,none": 0.03842353360413949, "alias": "include_base_44_german"}, "include_base_44_german_few_shot_og_driving_license": {"alias": " - include_base_44_german_few_shot_og_driving_license", "acc,none": 0.6521739130434783, "acc_stderr,none": 0.10154334054280735}, "include_base_44_german_few_shot_og_social_science": {"alias": " - include_base_44_german_few_shot_og_social_science", "acc,none": 0.27472527472527475, "acc_stderr,none": 0.04705213398778437}, "include_base_44_german_few_shot_og_stem": {"alias": " - include_base_44_german_few_shot_og_stem", "acc,none": 0.24, "acc_stderr,none": 0.08717797887081347}, "include_base_44_hindi": {"acc,none": 0.2340036563071298, "acc_stderr,none": 0.018200630873415544, "alias": "include_base_44_hindi"}, "include_base_44_hindi_few_shot_og_applied_science": {"alias": " - include_base_44_hindi_few_shot_og_applied_science", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.04993832453115031}, "include_base_44_hindi_few_shot_og_arts_humanities": {"alias": " - include_base_44_hindi_few_shot_og_arts_humanities", "acc,none": 0.1875, "acc_stderr,none": 0.040045204719824215}, "include_base_44_hindi_few_shot_og_driving_license": {"alias": " - include_base_44_hindi_few_shot_og_driving_license", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295697}, "include_base_44_hindi_few_shot_og_general_knowledge": {"alias": " - include_base_44_hindi_few_shot_og_general_knowledge", "acc,none": 0.2676056338028169, "acc_stderr,none": 0.05291406220869697}, "include_base_44_hindi_few_shot_og_health_oriented_education": {"alias": " - include_base_44_hindi_few_shot_og_health_oriented_education", "acc,none": 0.23943661971830985, "acc_stderr,none": 0.05100514327793383}, "include_base_44_hindi_few_shot_og_professional_certification": {"alias": " - include_base_44_hindi_few_shot_og_professional_certification", "acc,none": 0.23943661971830985, "acc_stderr,none": 0.05100514327793383}, "include_base_44_hindi_few_shot_og_social_science": {"alias": " - include_base_44_hindi_few_shot_og_social_science", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295697}, "include_base_44_hindi_few_shot_og_stem": {"alias": " - include_base_44_hindi_few_shot_og_stem", "acc,none": 0.2, "acc_stderr,none": 0.08164965809277261}, "include_base_44_italian": {"acc,none": 0.26277372262773724, "acc_stderr,none": 0.01873243086226229, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.14285714285714285, "acc_stderr,none": 0.06001200360120039}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.31137724550898205, "acc_stderr,none": 0.03594016584565771}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.25, "acc_stderr,none": 0.1305582419667734}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.3096774193548387, "acc_stderr,none": 0.0372580781179416}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.20359281437125748, "acc_stderr,none": 0.031253219622833416}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.16666666666666666, "acc_stderr,none": 0.1123666437438737}, "include_base_44_japanese": {"acc,none": 0.26746506986027946, "acc_stderr,none": 0.019829992942641272, "alias": "include_base_44_japanese"}, "include_base_44_japanese_few_shot_og_driving_license": {"alias": " - include_base_44_japanese_few_shot_og_driving_license", "acc,none": 0.25252525252525254, "acc_stderr,none": 0.043887175543277195}, "include_base_44_japanese_few_shot_og_medical_license": {"alias": " - include_base_44_japanese_few_shot_og_medical_license", "acc,none": 0.263681592039801, "acc_stderr,none": 0.031157150869355547}, "include_base_44_japanese_few_shot_og_professional_certification": {"alias": " - include_base_44_japanese_few_shot_og_professional_certification", "acc,none": 0.27860696517412936, "acc_stderr,none": 0.031700561834973086}, "include_base_44_korean": {"acc,none": 0.242, "acc_stderr,none": 0.01918206994119551, "alias": "include_base_44_korean"}, "include_base_44_korean_few_shot_og_professional_certification": {"alias": " - include_base_44_korean_few_shot_og_professional_certification", "acc,none": 0.256, "acc_stderr,none": 0.027657108718204905}, "include_base_44_korean_few_shot_og_social_science": {"alias": " - include_base_44_korean_few_shot_og_social_science", "acc,none": 0.228, "acc_stderr,none": 0.026587432487268487}, "include_base_44_russian": {"acc,none": 0.26811594202898553, "acc_stderr,none": 0.018783458185885605, "alias": "include_base_44_russian"}, "include_base_44_russian_few_shot_og_applied_science": {"alias": " - include_base_44_russian_few_shot_og_applied_science", "acc,none": 0.2463768115942029, "acc_stderr,none": 0.052254366311072324}, "include_base_44_russian_few_shot_og_arts_humanities": {"alias": " - include_base_44_russian_few_shot_og_arts_humanities", "acc,none": 0.2028985507246377, "acc_stderr,none": 0.0487687714747266}, "include_base_44_russian_few_shot_og_business_commerce": {"alias": " - include_base_44_russian_few_shot_og_business_commerce", "acc,none": 0.2608695652173913, "acc_stderr,none": 0.05324977701702559}, "include_base_44_russian_few_shot_og_driving_license": {"alias": " - include_base_44_russian_few_shot_og_driving_license", "acc,none": 0.21428571428571427, "acc_stderr,none": 0.07896725691322383}, "include_base_44_russian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_russian_few_shot_og_health_oriented_education", "acc,none": 0.36470588235294116, "acc_stderr,none": 0.05251932770420085}, "include_base_44_russian_few_shot_og_marine_license": {"alias": " - include_base_44_russian_few_shot_og_marine_license", "acc,none": 0.36231884057971014, "acc_stderr,none": 0.05828983090868436}, "include_base_44_russian_few_shot_og_social_science": {"alias": " - include_base_44_russian_few_shot_og_social_science", "acc,none": 0.2753623188405797, "acc_stderr,none": 0.05416992765191318}, "include_base_44_russian_few_shot_og_stem": {"alias": " - include_base_44_russian_few_shot_og_stem", "acc,none": 0.19148936170212766, "acc_stderr,none": 0.040801273706799875}, "include_base_44_spanish": {"acc,none": 0.26545454545454544, "acc_stderr,none": 0.018812569040743833, "alias": "include_base_44_spanish"}, "include_base_44_spanish_few_shot_og_arts_humanities": {"alias": " - include_base_44_spanish_few_shot_og_arts_humanities", "acc,none": 0.228, "acc_stderr,none": 0.02658743248726847}, "include_base_44_spanish_few_shot_og_health_oriented_education": {"alias": " - include_base_44_spanish_few_shot_og_health_oriented_education", "acc,none": 0.32, "acc_stderr,none": 0.09521904571390466}, "include_base_44_spanish_few_shot_og_social_science": {"alias": " - include_base_44_spanish_few_shot_og_social_science", "acc,none": 0.284, "acc_stderr,none": 0.02857695873043741}, "include_base_44_spanish_few_shot_og_stem": {"alias": " - include_base_44_spanish_few_shot_og_stem", "acc,none": 0.4, "acc_stderr,none": 0.1}, "include_base_44_telugu": {"acc,none": 0.24817518248175183, "acc_stderr,none": 0.018516277478674566, "alias": "include_base_44_telugu"}, "include_base_44_telugu_few_shot_og_applied_science": {"alias": " - include_base_44_telugu_few_shot_og_applied_science", "acc,none": 0.25301204819277107, "acc_stderr,none": 0.03384429155233137}, "include_base_44_telugu_few_shot_og_arts_humanities": {"alias": " - include_base_44_telugu_few_shot_og_arts_humanities", "acc,none": 0.24083769633507854, "acc_stderr,none": 0.031020782537814432}, "include_base_44_telugu_few_shot_og_social_science": {"alias": " - include_base_44_telugu_few_shot_og_social_science", "acc,none": 0.2469879518072289, "acc_stderr,none": 0.03357351982064536}, "include_base_44_telugu_few_shot_og_stem": {"alias": " - include_base_44_telugu_few_shot_og_stem", "acc,none": 0.28, "acc_stderr,none": 0.09165151389911678}, "include_base_44_turkish": {"acc,none": 0.28832116788321166, "acc_stderr,none": 0.019335157990420678, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.3253012048192771, "acc_stderr,none": 0.03647168523683228}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.25903614457831325, "acc_stderr,none": 0.034106466140718564}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.25301204819277107, "acc_stderr,none": 0.033844291552331346}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.38, "acc_stderr,none": 0.06934092056863767}, "piqa": {"alias": "piqa", "acc,none": 0.5239390642002176, "acc_stderr,none": 0.01165244562107926, "acc_norm,none": 0.499455930359086, "acc_norm_stderr,none": 0.011665817258899173}, "xnli_ar": {"alias": "xnli_ar", "acc,none": 0.3365461847389558, "acc_stderr,none": 0.009471423054177155}, "xnli_de": {"alias": "xnli_de", "acc,none": 0.3329317269076305, "acc_stderr,none": 0.009446051001358225}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.3265060240963855, "acc_stderr,none": 0.009399397335962111}, "xnli_es": {"alias": "xnli_es", "acc,none": 0.3365461847389558, "acc_stderr,none": 0.009471423054177133}, "xnli_fr": {"alias": "xnli_fr", "acc,none": 0.3305220883534137, "acc_stderr,none": 0.00942878910928983}, "xnli_hi": {"alias": "xnli_hi", "acc,none": 0.3349397590361446, "acc_stderr,none": 0.00946022348499647}, "xnli_ru": {"alias": "xnli_ru", "acc,none": 0.3353413654618474, "acc_stderr,none": 0.009463034891512708}, "xnli_sw": {"alias": "xnli_sw", "acc,none": 0.3389558232931727, "acc_stderr,none": 0.00948799273220153}, "xnli_th": {"alias": "xnli_th", "acc,none": 0.3397590361445783, "acc_stderr,none": 0.009493454925438252}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.3333333333333333, "acc_stderr,none": 0.009448900914617609}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.3385542168674699, "acc_stderr,none": 0.00948525020851688}} +{"created_at": "2025-06-25T23:44:19.238794", "global_step": 10500, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.197098976109215, "acc_stderr,none": 0.011625047669880624, "acc_norm,none": 0.2645051194539249, "acc_norm_stderr,none": 0.012889272949313368}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.26641414141414144, "acc_stderr,none": 0.009071357971078683, "acc_norm,none": 0.2668350168350168, "acc_norm_stderr,none": 0.009075915859267267}, "belebele_arb_Arab": {"alias": "belebele_arb_Arab", "acc,none": 0.2577777777777778, "acc_stderr,none": 0.014588474089651682, "acc_norm,none": 0.2577777777777778, "acc_norm_stderr,none": 0.014588474089651682}, "belebele_ben_Beng": {"alias": "belebele_ben_Beng", "acc,none": 0.22555555555555556, "acc_stderr,none": 0.013939334910458148, "acc_norm,none": 0.22555555555555556, "acc_norm_stderr,none": 0.013939334910458148}, "belebele_deu_Latn": {"alias": "belebele_deu_Latn", "acc,none": 0.23222222222222222, "acc_stderr,none": 0.014082825936214571, "acc_norm,none": 0.23222222222222222, "acc_norm_stderr,none": 0.014082825936214571}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.22777777777777777, "acc_stderr,none": 0.013987721523687973, "acc_norm,none": 0.22777777777777777, "acc_norm_stderr,none": 0.013987721523687973}, "belebele_fra_Latn": {"alias": "belebele_fra_Latn", "acc,none": 0.23, "acc_stderr,none": 0.014035549969945592, "acc_norm,none": 0.23, "acc_norm_stderr,none": 0.014035549969945592}, "belebele_hin_Deva": {"alias": "belebele_hin_Deva", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884528, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884528}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884533, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884533}, "belebele_jpn_Jpan": {"alias": "belebele_jpn_Jpan", "acc,none": 0.22777777777777777, "acc_stderr,none": 0.013987721523687973, "acc_norm,none": 0.22777777777777777, "acc_norm_stderr,none": 0.013987721523687973}, "belebele_kor_Hang": {"alias": "belebele_kor_Hang", "acc,none": 0.23444444444444446, "acc_stderr,none": 0.014129554968110771, "acc_norm,none": 0.23444444444444446, "acc_norm_stderr,none": 0.014129554968110771}, "belebele_rus_Cyrl": {"alias": "belebele_rus_Cyrl", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884499, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884499}, "belebele_spa_Latn": {"alias": "belebele_spa_Latn", "acc,none": 0.22666666666666666, "acc_stderr,none": 0.013963598349030491, "acc_norm,none": 0.22666666666666666, "acc_norm_stderr,none": 0.013963598349030491}, "belebele_swh_Latn": {"alias": "belebele_swh_Latn", "acc,none": 0.22333333333333333, "acc_stderr,none": 0.013890384297198704, "acc_norm,none": 0.22333333333333333, "acc_norm_stderr,none": 0.013890384297198704}, "belebele_tel_Telu": {"alias": "belebele_tel_Telu", "acc,none": 0.23, "acc_stderr,none": 0.014035549969945594, "acc_norm,none": 0.23, "acc_norm_stderr,none": 0.014035549969945594}, "belebele_tha_Thai": {"alias": "belebele_tha_Thai", "acc,none": 0.22555555555555556, "acc_stderr,none": 0.013939334910458123, "acc_norm,none": 0.22555555555555556, "acc_norm_stderr,none": 0.013939334910458123}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.22555555555555556, "acc_stderr,none": 0.01393933491045812, "acc_norm,none": 0.22555555555555556, "acc_norm_stderr,none": 0.01393933491045812}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.2599083847839076, "acc_stderr,none": 0.004376877619234113, "acc_norm,none": 0.25791674965146383, "acc_norm_stderr,none": 0.004365938407209601}, "include_base_44_arabic": {"acc,none": 0.21195652173913043, "acc_stderr,none": 0.017450688370571964, "alias": "include_base_44_arabic"}, "include_base_44_arabic_few_shot_og_arts_humanities": {"alias": " - include_base_44_arabic_few_shot_og_arts_humanities", "acc,none": 0.2571428571428571, "acc_stderr,none": 0.04285714285714284}, "include_base_44_arabic_few_shot_og_business_commerce": {"alias": " - include_base_44_arabic_few_shot_og_business_commerce", "acc,none": 0.1951219512195122, "acc_stderr,none": 0.04403272848041175}, "include_base_44_arabic_few_shot_og_driving_license": {"alias": " - include_base_44_arabic_few_shot_og_driving_license", "acc,none": 0.20952380952380953, "acc_stderr,none": 0.039906571509931855}, "include_base_44_arabic_few_shot_og_general_knowledge": {"alias": " - include_base_44_arabic_few_shot_og_general_knowledge", "acc,none": 0.22857142857142856, "acc_stderr,none": 0.04117581097845101}, "include_base_44_arabic_few_shot_og_social_science": {"alias": " - include_base_44_arabic_few_shot_og_social_science", "acc,none": 0.18095238095238095, "acc_stderr,none": 0.03775026958386237}, "include_base_44_arabic_few_shot_og_stem": {"alias": " - include_base_44_arabic_few_shot_og_stem", "acc,none": 0.18, "acc_stderr,none": 0.054883922035138706}, "include_base_44_bengali": {"acc,none": 0.2864963503649635, "acc_stderr,none": 0.019319704992530596, "alias": "include_base_44_bengali"}, "include_base_44_bengali_few_shot_og_arts_humanities": {"alias": " - include_base_44_bengali_few_shot_og_arts_humanities", "acc,none": 0.3132530120481928, "acc_stderr,none": 0.036108050180310235}, "include_base_44_bengali_few_shot_og_general_knowledge": {"alias": " - include_base_44_bengali_few_shot_og_general_knowledge", "acc,none": 0.3192771084337349, "acc_stderr,none": 0.03629335329947859}, "include_base_44_bengali_few_shot_og_professional_certification": {"alias": " - include_base_44_bengali_few_shot_og_professional_certification", "acc,none": 0.24096385542168675, "acc_stderr,none": 0.0332939411907353}, "include_base_44_bengali_few_shot_og_stem": {"alias": " - include_base_44_bengali_few_shot_og_stem", "acc,none": 0.24, "acc_stderr,none": 0.06101187572589323}, "include_base_44_chinese": {"acc,none": 0.23669724770642203, "acc_stderr,none": 0.018257497395695214, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.2112676056338028, "acc_stderr,none": 0.04879016359348948}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.25287356321839083, "acc_stderr,none": 0.04687049503854671}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.323943661971831, "acc_stderr,none": 0.05593416612923642}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.04993832453115033}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.21839080459770116, "acc_stderr,none": 0.044551545932103705}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.051995626882956975}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.18309859154929578, "acc_stderr,none": 0.04622514734921428}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.1875, "acc_stderr,none": 0.10077822185373188}, "include_base_44_french": {"acc,none": 0.2458233890214797, "acc_stderr,none": 0.021111179988702867, "alias": "include_base_44_french"}, "include_base_44_french_few_shot_og_arts_humanities": {"alias": " - include_base_44_french_few_shot_og_arts_humanities", "acc,none": 0.22932330827067668, "acc_stderr,none": 0.02582481829225878}, "include_base_44_french_few_shot_og_driving_license": {"alias": " - include_base_44_french_few_shot_og_driving_license", "acc,none": 0.2978723404255319, "acc_stderr,none": 0.06742861107915606}, "include_base_44_french_few_shot_og_health_oriented_education": {"alias": " - include_base_44_french_few_shot_og_health_oriented_education", "acc,none": 0.1875, "acc_stderr,none": 0.10077822185373188}, "include_base_44_french_few_shot_og_social_science": {"alias": " - include_base_44_french_few_shot_og_social_science", "acc,none": 0.2702702702702703, "acc_stderr,none": 0.05197789984508372}, "include_base_44_french_few_shot_og_stem": {"alias": " - include_base_44_french_few_shot_og_stem", "acc,none": 0.3125, "acc_stderr,none": 0.11967838846954226}, "include_base_44_german": {"acc,none": 0.33093525179856115, "acc_stderr,none": 0.03779345144233894, "alias": "include_base_44_german"}, "include_base_44_german_few_shot_og_driving_license": {"alias": " - include_base_44_german_few_shot_og_driving_license", "acc,none": 0.6956521739130435, "acc_stderr,none": 0.09810018692482896}, "include_base_44_german_few_shot_og_social_science": {"alias": " - include_base_44_german_few_shot_og_social_science", "acc,none": 0.27472527472527475, "acc_stderr,none": 0.04705213398778437}, "include_base_44_german_few_shot_og_stem": {"alias": " - include_base_44_german_few_shot_og_stem", "acc,none": 0.2, "acc_stderr,none": 0.08164965809277261}, "include_base_44_hindi": {"acc,none": 0.24314442413162707, "acc_stderr,none": 0.01838272096327221, "alias": "include_base_44_hindi"}, "include_base_44_hindi_few_shot_og_applied_science": {"alias": " - include_base_44_hindi_few_shot_og_applied_science", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295698}, "include_base_44_hindi_few_shot_og_arts_humanities": {"alias": " - include_base_44_hindi_few_shot_og_arts_humanities", "acc,none": 0.17708333333333334, "acc_stderr,none": 0.03916561682056947}, "include_base_44_hindi_few_shot_og_driving_license": {"alias": " - include_base_44_hindi_few_shot_og_driving_license", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.04993832453115031}, "include_base_44_hindi_few_shot_og_general_knowledge": {"alias": " - include_base_44_hindi_few_shot_og_general_knowledge", "acc,none": 0.30985915492957744, "acc_stderr,none": 0.05527159681788331}, "include_base_44_hindi_few_shot_og_health_oriented_education": {"alias": " - include_base_44_hindi_few_shot_og_health_oriented_education", "acc,none": 0.2676056338028169, "acc_stderr,none": 0.05291406220869697}, "include_base_44_hindi_few_shot_og_professional_certification": {"alias": " - include_base_44_hindi_few_shot_og_professional_certification", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295697}, "include_base_44_hindi_few_shot_og_social_science": {"alias": " - include_base_44_hindi_few_shot_og_social_science", "acc,none": 0.2676056338028169, "acc_stderr,none": 0.05291406220869697}, "include_base_44_hindi_few_shot_og_stem": {"alias": " - include_base_44_hindi_few_shot_og_stem", "acc,none": 0.16, "acc_stderr,none": 0.0748331477354788}, "include_base_44_italian": {"acc,none": 0.26277372262773724, "acc_stderr,none": 0.01885392883048743, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.22857142857142856, "acc_stderr,none": 0.07201440432144052}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.2874251497005988, "acc_stderr,none": 0.035125586477990835}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.25, "acc_stderr,none": 0.1305582419667734}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.2903225806451613, "acc_stderr,none": 0.03657720706540911}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.2275449101796407, "acc_stderr,none": 0.03253989433108519}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.16666666666666666, "acc_stderr,none": 0.11236664374387367}, "include_base_44_japanese": {"acc,none": 0.2654690618762475, "acc_stderr,none": 0.01977662567439727, "alias": "include_base_44_japanese"}, "include_base_44_japanese_few_shot_og_driving_license": {"alias": " - include_base_44_japanese_few_shot_og_driving_license", "acc,none": 0.25252525252525254, "acc_stderr,none": 0.043887175543277195}, "include_base_44_japanese_few_shot_og_medical_license": {"alias": " - include_base_44_japanese_few_shot_og_medical_license", "acc,none": 0.2835820895522388, "acc_stderr,none": 0.03187187537919797}, "include_base_44_japanese_few_shot_og_professional_certification": {"alias": " - include_base_44_japanese_few_shot_og_professional_certification", "acc,none": 0.2537313432835821, "acc_stderr,none": 0.030769444967296024}, "include_base_44_korean": {"acc,none": 0.23, "acc_stderr,none": 0.01885263076461715, "alias": "include_base_44_korean"}, "include_base_44_korean_few_shot_og_professional_certification": {"alias": " - include_base_44_korean_few_shot_og_professional_certification", "acc,none": 0.24, "acc_stderr,none": 0.027065293652239003}, "include_base_44_korean_few_shot_og_social_science": {"alias": " - include_base_44_korean_few_shot_og_social_science", "acc,none": 0.22, "acc_stderr,none": 0.02625179282460584}, "include_base_44_russian": {"acc,none": 0.26268115942028986, "acc_stderr,none": 0.018657087122955083, "alias": "include_base_44_russian"}, "include_base_44_russian_few_shot_og_applied_science": {"alias": " - include_base_44_russian_few_shot_og_applied_science", "acc,none": 0.21739130434782608, "acc_stderr,none": 0.0500194557943937}, "include_base_44_russian_few_shot_og_arts_humanities": {"alias": " - include_base_44_russian_few_shot_og_arts_humanities", "acc,none": 0.2028985507246377, "acc_stderr,none": 0.0487687714747266}, "include_base_44_russian_few_shot_og_business_commerce": {"alias": " - include_base_44_russian_few_shot_og_business_commerce", "acc,none": 0.2898550724637681, "acc_stderr,none": 0.05501859437429688}, "include_base_44_russian_few_shot_og_driving_license": {"alias": " - include_base_44_russian_few_shot_og_driving_license", "acc,none": 0.17857142857142858, "acc_stderr,none": 0.07370704611510596}, "include_base_44_russian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_russian_few_shot_og_health_oriented_education", "acc,none": 0.3764705882352941, "acc_stderr,none": 0.052863310306265295}, "include_base_44_russian_few_shot_og_marine_license": {"alias": " - include_base_44_russian_few_shot_og_marine_license", "acc,none": 0.30434782608695654, "acc_stderr,none": 0.0557990389495433}, "include_base_44_russian_few_shot_og_social_science": {"alias": " - include_base_44_russian_few_shot_og_social_science", "acc,none": 0.2898550724637681, "acc_stderr,none": 0.05501859437429687}, "include_base_44_russian_few_shot_og_stem": {"alias": " - include_base_44_russian_few_shot_og_stem", "acc,none": 0.19148936170212766, "acc_stderr,none": 0.040801273706799875}, "include_base_44_spanish": {"acc,none": 0.2672727272727273, "acc_stderr,none": 0.018850411012442, "alias": "include_base_44_spanish"}, "include_base_44_spanish_few_shot_og_arts_humanities": {"alias": " - include_base_44_spanish_few_shot_og_arts_humanities", "acc,none": 0.228, "acc_stderr,none": 0.026587432487268466}, "include_base_44_spanish_few_shot_og_health_oriented_education": {"alias": " - include_base_44_spanish_few_shot_og_health_oriented_education", "acc,none": 0.32, "acc_stderr,none": 0.09521904571390466}, "include_base_44_spanish_few_shot_og_social_science": {"alias": " - include_base_44_spanish_few_shot_og_social_science", "acc,none": 0.288, "acc_stderr,none": 0.028697004587398222}, "include_base_44_spanish_few_shot_og_stem": {"alias": " - include_base_44_spanish_few_shot_og_stem", "acc,none": 0.4, "acc_stderr,none": 0.1}, "include_base_44_telugu": {"acc,none": 0.24087591240875914, "acc_stderr,none": 0.018267063370701327, "alias": "include_base_44_telugu"}, "include_base_44_telugu_few_shot_og_applied_science": {"alias": " - include_base_44_telugu_few_shot_og_applied_science", "acc,none": 0.24096385542168675, "acc_stderr,none": 0.03329394119073528}, "include_base_44_telugu_few_shot_og_arts_humanities": {"alias": " - include_base_44_telugu_few_shot_og_arts_humanities", "acc,none": 0.24083769633507854, "acc_stderr,none": 0.031020782537814432}, "include_base_44_telugu_few_shot_og_social_science": {"alias": " - include_base_44_telugu_few_shot_og_social_science", "acc,none": 0.21686746987951808, "acc_stderr,none": 0.03208284450356365}, "include_base_44_telugu_few_shot_og_stem": {"alias": " - include_base_44_telugu_few_shot_og_stem", "acc,none": 0.4, "acc_stderr,none": 0.1}, "include_base_44_turkish": {"acc,none": 0.28284671532846717, "acc_stderr,none": 0.019189610708418278, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.3192771084337349, "acc_stderr,none": 0.036293353299478595}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.25903614457831325, "acc_stderr,none": 0.034106466140718564}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.23493975903614459, "acc_stderr,none": 0.03300533186128922}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.4, "acc_stderr,none": 0.06998542122237651}, "piqa": {"alias": "piqa", "acc,none": 0.5206746463547334, "acc_stderr,none": 0.011655846995729703, "acc_norm,none": 0.5016322089227421, "acc_norm_stderr,none": 0.011665762007194873}, "xnli_ar": {"alias": "xnli_ar", "acc,none": 0.3273092369477912, "acc_stderr,none": 0.00940533815661493}, "xnli_de": {"alias": "xnli_de", "acc,none": 0.3349397590361446, "acc_stderr,none": 0.009460223484996469}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.3301204819277108, "acc_stderr,none": 0.009425884992430727}, "xnli_es": {"alias": "xnli_es", "acc,none": 0.3333333333333333, "acc_stderr,none": 0.009448900914617617}, "xnli_fr": {"alias": "xnli_fr", "acc,none": 0.3317269076305221, "acc_stderr,none": 0.009437454900329122}, "xnli_hi": {"alias": "xnli_hi", "acc,none": 0.3329317269076305, "acc_stderr,none": 0.009446051001358226}, "xnli_ru": {"alias": "xnli_ru", "acc,none": 0.3369477911646586, "acc_stderr,none": 0.009474203778757712}, "xnli_sw": {"alias": "xnli_sw", "acc,none": 0.3196787148594378, "acc_stderr,none": 0.009347628444867175}, "xnli_th": {"alias": "xnli_th", "acc,none": 0.3289156626506024, "acc_stderr,none": 0.009417125981806726}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.3329317269076305, "acc_stderr,none": 0.009446051001358225}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.342570281124498, "acc_stderr,none": 0.009512333319470385}} +{"created_at": "2025-06-26T00:08:40.419672", "global_step": 11000, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.20392491467576793, "acc_stderr,none": 0.011774262478702257, "acc_norm,none": 0.2568259385665529, "acc_norm_stderr,none": 0.0127669237941168}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.265993265993266, "acc_stderr,none": 0.009066789565615692, "acc_norm,none": 0.2697811447811448, "acc_norm_stderr,none": 0.009107527914671064}, "belebele_arb_Arab": {"alias": "belebele_arb_Arab", "acc,none": 0.2811111111111111, "acc_stderr,none": 0.014993051943690182, "acc_norm,none": 0.2811111111111111, "acc_norm_stderr,none": 0.014993051943690182}, "belebele_ben_Beng": {"alias": "belebele_ben_Beng", "acc,none": 0.23777777777777778, "acc_stderr,none": 0.014198634809308183, "acc_norm,none": 0.23777777777777778, "acc_norm_stderr,none": 0.014198634809308183}, "belebele_deu_Latn": {"alias": "belebele_deu_Latn", "acc,none": 0.23666666666666666, "acc_stderr,none": 0.014175742474391958, "acc_norm,none": 0.23666666666666666, "acc_norm_stderr,none": 0.014175742474391958}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.2222222222222222, "acc_stderr,none": 0.013865695626579395, "acc_norm,none": 0.2222222222222222, "acc_norm_stderr,none": 0.013865695626579395}, "belebele_fra_Latn": {"alias": "belebele_fra_Latn", "acc,none": 0.23333333333333334, "acc_stderr,none": 0.014106258477755675, "acc_norm,none": 0.23333333333333334, "acc_norm_stderr,none": 0.014106258477755675}, "belebele_hin_Deva": {"alias": "belebele_hin_Deva", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884528, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884528}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.23333333333333334, "acc_stderr,none": 0.014106258477755672, "acc_norm,none": 0.23333333333333334, "acc_norm_stderr,none": 0.014106258477755672}, "belebele_jpn_Jpan": {"alias": "belebele_jpn_Jpan", "acc,none": 0.22666666666666666, "acc_stderr,none": 0.013963598349030494, "acc_norm,none": 0.22666666666666666, "acc_norm_stderr,none": 0.013963598349030494}, "belebele_kor_Hang": {"alias": "belebele_kor_Hang", "acc,none": 0.23, "acc_stderr,none": 0.014035549969945585, "acc_norm,none": 0.23, "acc_norm_stderr,none": 0.014035549969945585}, "belebele_rus_Cyrl": {"alias": "belebele_rus_Cyrl", "acc,none": 0.2633333333333333, "acc_stderr,none": 0.014689553047342515, "acc_norm,none": 0.2633333333333333, "acc_norm_stderr,none": 0.014689553047342515}, "belebele_spa_Latn": {"alias": "belebele_spa_Latn", "acc,none": 0.23222222222222222, "acc_stderr,none": 0.014082825936214573, "acc_norm,none": 0.23222222222222222, "acc_norm_stderr,none": 0.014082825936214573}, "belebele_swh_Latn": {"alias": "belebele_swh_Latn", "acc,none": 0.23555555555555555, "acc_stderr,none": 0.014152716079132646, "acc_norm,none": 0.23555555555555555, "acc_norm_stderr,none": 0.014152716079132646}, "belebele_tel_Telu": {"alias": "belebele_tel_Telu", "acc,none": 0.21666666666666667, "acc_stderr,none": 0.01374008783070017, "acc_norm,none": 0.21666666666666667, "acc_norm_stderr,none": 0.01374008783070017}, "belebele_tha_Thai": {"alias": "belebele_tha_Thai", "acc,none": 0.22666666666666666, "acc_stderr,none": 0.013963598349030494, "acc_norm,none": 0.22666666666666666, "acc_norm_stderr,none": 0.013963598349030494}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.23333333333333334, "acc_stderr,none": 0.014106258477755672, "acc_norm,none": 0.23333333333333334, "acc_norm_stderr,none": 0.014106258477755672}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.25692093208524197, "acc_stderr,none": 0.004360424536145123, "acc_norm,none": 0.2610037841067516, "acc_norm_stderr,none": 0.004382844128643436}, "include_base_44_arabic": {"acc,none": 0.21557971014492755, "acc_stderr,none": 0.017427645296376887, "alias": "include_base_44_arabic"}, "include_base_44_arabic_few_shot_og_arts_humanities": {"alias": " - include_base_44_arabic_few_shot_og_arts_humanities", "acc,none": 0.2571428571428571, "acc_stderr,none": 0.042857142857142844}, "include_base_44_arabic_few_shot_og_business_commerce": {"alias": " - include_base_44_arabic_few_shot_og_business_commerce", "acc,none": 0.13414634146341464, "acc_stderr,none": 0.037867719817024706}, "include_base_44_arabic_few_shot_og_driving_license": {"alias": " - include_base_44_arabic_few_shot_og_driving_license", "acc,none": 0.19047619047619047, "acc_stderr,none": 0.03850512095536383}, "include_base_44_arabic_few_shot_og_general_knowledge": {"alias": " - include_base_44_arabic_few_shot_og_general_knowledge", "acc,none": 0.2, "acc_stderr,none": 0.03922322702763677}, "include_base_44_arabic_few_shot_og_social_science": {"alias": " - include_base_44_arabic_few_shot_og_social_science", "acc,none": 0.3047619047619048, "acc_stderr,none": 0.0451367671816831}, "include_base_44_arabic_few_shot_og_stem": {"alias": " - include_base_44_arabic_few_shot_og_stem", "acc,none": 0.16, "acc_stderr,none": 0.05237229365663815}, "include_base_44_bengali": {"acc,none": 0.24817518248175183, "acc_stderr,none": 0.018514813201136245, "alias": "include_base_44_bengali"}, "include_base_44_bengali_few_shot_og_arts_humanities": {"alias": " - include_base_44_bengali_few_shot_og_arts_humanities", "acc,none": 0.25903614457831325, "acc_stderr,none": 0.03410646614071854}, "include_base_44_bengali_few_shot_og_general_knowledge": {"alias": " - include_base_44_bengali_few_shot_og_general_knowledge", "acc,none": 0.23493975903614459, "acc_stderr,none": 0.03300533186128922}, "include_base_44_bengali_few_shot_og_professional_certification": {"alias": " - include_base_44_bengali_few_shot_og_professional_certification", "acc,none": 0.2469879518072289, "acc_stderr,none": 0.03357351982064536}, "include_base_44_bengali_few_shot_og_stem": {"alias": " - include_base_44_bengali_few_shot_og_stem", "acc,none": 0.26, "acc_stderr,none": 0.06266203485560375}, "include_base_44_chinese": {"acc,none": 0.24770642201834864, "acc_stderr,none": 0.01858757677894914, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.23943661971830985, "acc_stderr,none": 0.05100514327793383}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.25287356321839083, "acc_stderr,none": 0.04687049503854671}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.30985915492957744, "acc_stderr,none": 0.05527159681788331}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295697}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.2413793103448276, "acc_stderr,none": 0.04614377668264891}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.23943661971830985, "acc_stderr,none": 0.05100514327793383}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.2112676056338028, "acc_stderr,none": 0.0487901635934895}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.1875, "acc_stderr,none": 0.10077822185373188}, "include_base_44_french": {"acc,none": 0.2577565632458234, "acc_stderr,none": 0.021332545955047924, "alias": "include_base_44_french"}, "include_base_44_french_few_shot_og_arts_humanities": {"alias": " - include_base_44_french_few_shot_og_arts_humanities", "acc,none": 0.23308270676691728, "acc_stderr,none": 0.025972057450276594}, "include_base_44_french_few_shot_og_driving_license": {"alias": " - include_base_44_french_few_shot_og_driving_license", "acc,none": 0.3617021276595745, "acc_stderr,none": 0.07084485475872632}, "include_base_44_french_few_shot_og_health_oriented_education": {"alias": " - include_base_44_french_few_shot_og_health_oriented_education", "acc,none": 0.125, "acc_stderr,none": 0.08539125638299665}, "include_base_44_french_few_shot_og_social_science": {"alias": " - include_base_44_french_few_shot_og_social_science", "acc,none": 0.28378378378378377, "acc_stderr,none": 0.05276603149821338}, "include_base_44_french_few_shot_og_stem": {"alias": " - include_base_44_french_few_shot_og_stem", "acc,none": 0.375, "acc_stderr,none": 0.125}, "include_base_44_german": {"acc,none": 0.30935251798561153, "acc_stderr,none": 0.036667388459489275, "alias": "include_base_44_german"}, "include_base_44_german_few_shot_og_driving_license": {"alias": " - include_base_44_german_few_shot_og_driving_license", "acc,none": 0.6956521739130435, "acc_stderr,none": 0.09810018692482896}, "include_base_44_german_few_shot_og_social_science": {"alias": " - include_base_44_german_few_shot_og_social_science", "acc,none": 0.25274725274725274, "acc_stderr,none": 0.045809518537328904}, "include_base_44_german_few_shot_og_stem": {"alias": " - include_base_44_german_few_shot_og_stem", "acc,none": 0.16, "acc_stderr,none": 0.0748331477354788}, "include_base_44_hindi": {"acc,none": 0.2376599634369287, "acc_stderr,none": 0.0182549406677916, "alias": "include_base_44_hindi"}, "include_base_44_hindi_few_shot_og_applied_science": {"alias": " - include_base_44_hindi_few_shot_og_applied_science", "acc,none": 0.23943661971830985, "acc_stderr,none": 0.05100514327793383}, "include_base_44_hindi_few_shot_og_arts_humanities": {"alias": " - include_base_44_hindi_few_shot_og_arts_humanities", "acc,none": 0.17708333333333334, "acc_stderr,none": 0.03916561682056947}, "include_base_44_hindi_few_shot_og_driving_license": {"alias": " - include_base_44_hindi_few_shot_og_driving_license", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.04993832453115031}, "include_base_44_hindi_few_shot_og_general_knowledge": {"alias": " - include_base_44_hindi_few_shot_og_general_knowledge", "acc,none": 0.29577464788732394, "acc_stderr,none": 0.05454906121418899}, "include_base_44_hindi_few_shot_og_health_oriented_education": {"alias": " - include_base_44_hindi_few_shot_og_health_oriented_education", "acc,none": 0.2676056338028169, "acc_stderr,none": 0.05291406220869697}, "include_base_44_hindi_few_shot_og_professional_certification": {"alias": " - include_base_44_hindi_few_shot_og_professional_certification", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295697}, "include_base_44_hindi_few_shot_og_social_science": {"alias": " - include_base_44_hindi_few_shot_og_social_science", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295697}, "include_base_44_hindi_few_shot_og_stem": {"alias": " - include_base_44_hindi_few_shot_og_stem", "acc,none": 0.16, "acc_stderr,none": 0.0748331477354788}, "include_base_44_italian": {"acc,none": 0.27007299270072993, "acc_stderr,none": 0.018955985156037563, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.22857142857142856, "acc_stderr,none": 0.0720144043214405}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.30538922155688625, "acc_stderr,none": 0.03574732654202544}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.25, "acc_stderr,none": 0.1305582419667734}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.3096774193548387, "acc_stderr,none": 0.0372580781179416}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.20359281437125748, "acc_stderr,none": 0.031253219622833416}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.3333333333333333, "acc_stderr,none": 0.14213381090374033}, "include_base_44_japanese": {"acc,none": 0.24750499001996007, "acc_stderr,none": 0.019314095715962036, "alias": "include_base_44_japanese"}, "include_base_44_japanese_few_shot_og_driving_license": {"alias": " - include_base_44_japanese_few_shot_og_driving_license", "acc,none": 0.2222222222222222, "acc_stderr,none": 0.04199605255658083}, "include_base_44_japanese_few_shot_og_medical_license": {"alias": " - include_base_44_japanese_few_shot_og_medical_license", "acc,none": 0.2736318407960199, "acc_stderr,none": 0.03152439186555401}, "include_base_44_japanese_few_shot_og_professional_certification": {"alias": " - include_base_44_japanese_few_shot_og_professional_certification", "acc,none": 0.23383084577114427, "acc_stderr,none": 0.029929415408348398}, "include_base_44_korean": {"acc,none": 0.232, "acc_stderr,none": 0.01891175873560934, "alias": "include_base_44_korean"}, "include_base_44_korean_few_shot_og_professional_certification": {"alias": " - include_base_44_korean_few_shot_og_professional_certification", "acc,none": 0.24, "acc_stderr,none": 0.027065293652239003}, "include_base_44_korean_few_shot_og_social_science": {"alias": " - include_base_44_korean_few_shot_og_social_science", "acc,none": 0.224, "acc_stderr,none": 0.02642136168734791}, "include_base_44_russian": {"acc,none": 0.26811594202898553, "acc_stderr,none": 0.018862992245791285, "alias": "include_base_44_russian"}, "include_base_44_russian_few_shot_og_applied_science": {"alias": " - include_base_44_russian_few_shot_og_applied_science", "acc,none": 0.17391304347826086, "acc_stderr,none": 0.04596470781486299}, "include_base_44_russian_few_shot_og_arts_humanities": {"alias": " - include_base_44_russian_few_shot_og_arts_humanities", "acc,none": 0.30434782608695654, "acc_stderr,none": 0.0557990389495433}, "include_base_44_russian_few_shot_og_business_commerce": {"alias": " - include_base_44_russian_few_shot_og_business_commerce", "acc,none": 0.2753623188405797, "acc_stderr,none": 0.0541699276519132}, "include_base_44_russian_few_shot_og_driving_license": {"alias": " - include_base_44_russian_few_shot_og_driving_license", "acc,none": 0.2857142857142857, "acc_stderr,none": 0.08694008849288351}, "include_base_44_russian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_russian_few_shot_og_health_oriented_education", "acc,none": 0.35294117647058826, "acc_stderr,none": 0.0521414859075246}, "include_base_44_russian_few_shot_og_marine_license": {"alias": " - include_base_44_russian_few_shot_og_marine_license", "acc,none": 0.2753623188405797, "acc_stderr,none": 0.05416992765191319}, "include_base_44_russian_few_shot_og_social_science": {"alias": " - include_base_44_russian_few_shot_og_social_science", "acc,none": 0.2318840579710145, "acc_stderr,none": 0.05117930441535768}, "include_base_44_russian_few_shot_og_stem": {"alias": " - include_base_44_russian_few_shot_og_stem", "acc,none": 0.24468085106382978, "acc_stderr,none": 0.044578326884394524}, "include_base_44_spanish": {"acc,none": 0.27090909090909093, "acc_stderr,none": 0.018926219860336543, "alias": "include_base_44_spanish"}, "include_base_44_spanish_few_shot_og_arts_humanities": {"alias": " - include_base_44_spanish_few_shot_og_arts_humanities", "acc,none": 0.232, "acc_stderr,none": 0.026750070374865164}, "include_base_44_spanish_few_shot_og_health_oriented_education": {"alias": " - include_base_44_spanish_few_shot_og_health_oriented_education", "acc,none": 0.36, "acc_stderr,none": 0.09797958971132713}, "include_base_44_spanish_few_shot_og_social_science": {"alias": " - include_base_44_spanish_few_shot_og_social_science", "acc,none": 0.288, "acc_stderr,none": 0.028697004587398222}, "include_base_44_spanish_few_shot_og_stem": {"alias": " - include_base_44_spanish_few_shot_og_stem", "acc,none": 0.4, "acc_stderr,none": 0.1}, "include_base_44_telugu": {"acc,none": 0.2755474452554745, "acc_stderr,none": 0.019095894323760333, "alias": "include_base_44_telugu"}, "include_base_44_telugu_few_shot_og_applied_science": {"alias": " - include_base_44_telugu_few_shot_og_applied_science", "acc,none": 0.3132530120481928, "acc_stderr,none": 0.03610805018031024}, "include_base_44_telugu_few_shot_og_arts_humanities": {"alias": " - include_base_44_telugu_few_shot_og_arts_humanities", "acc,none": 0.23036649214659685, "acc_stderr,none": 0.03054744122652054}, "include_base_44_telugu_few_shot_og_social_science": {"alias": " - include_base_44_telugu_few_shot_og_social_science", "acc,none": 0.28313253012048195, "acc_stderr,none": 0.03507295431370518}, "include_base_44_telugu_few_shot_og_stem": {"alias": " - include_base_44_telugu_few_shot_og_stem", "acc,none": 0.32, "acc_stderr,none": 0.09521904571390466}, "include_base_44_turkish": {"acc,none": 0.2664233576642336, "acc_stderr,none": 0.018875486243314074, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.3192771084337349, "acc_stderr,none": 0.036293353299478595}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.2710843373493976, "acc_stderr,none": 0.03460579907553027}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.22289156626506024, "acc_stderr,none": 0.03240004825594689}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.22, "acc_stderr,none": 0.05917804336345138}, "piqa": {"alias": "piqa", "acc,none": 0.5223068552774756, "acc_stderr,none": 0.011654208652596471, "acc_norm,none": 0.5021762785636561, "acc_norm_stderr,none": 0.011665713661738873}, "xnli_ar": {"alias": "xnli_ar", "acc,none": 0.327710843373494, "acc_stderr,none": 0.009408296828870672}, "xnli_de": {"alias": "xnli_de", "acc,none": 0.334136546184739, "acc_stderr,none": 0.009454577602463621}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.3405622489959839, "acc_stderr,none": 0.009498886690274443}, "xnli_es": {"alias": "xnli_es", "acc,none": 0.3329317269076305, "acc_stderr,none": 0.009446051001358226}, "xnli_fr": {"alias": "xnli_fr", "acc,none": 0.3337349397590361, "acc_stderr,none": 0.009451743112667053}, "xnli_hi": {"alias": "xnli_hi", "acc,none": 0.3345381526104418, "acc_stderr,none": 0.009457404390939166}, "xnli_ru": {"alias": "xnli_ru", "acc,none": 0.3357429718875502, "acc_stderr,none": 0.009465838617337342}, "xnli_sw": {"alias": "xnli_sw", "acc,none": 0.3353413654618474, "acc_stderr,none": 0.009463034891512699}, "xnli_th": {"alias": "xnli_th", "acc,none": 0.3349397590361446, "acc_stderr,none": 0.009460223484996465}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.3333333333333333, "acc_stderr,none": 0.009448900914617609}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.3377510040160643, "acc_stderr,none": 0.009479742273956471}} +{"created_at": "2025-06-26T19:27:22.424140", "global_step": 11500, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.20392491467576793, "acc_stderr,none": 0.011774262478702256, "acc_norm,none": 0.24658703071672355, "acc_norm_stderr,none": 0.01259572626879013}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.26641414141414144, "acc_stderr,none": 0.009071357971078685, "acc_norm,none": 0.2689393939393939, "acc_norm_stderr,none": 0.00909854809300917}, "belebele_arb_Arab": {"alias": "belebele_arb_Arab", "acc,none": 0.24222222222222223, "acc_stderr,none": 0.014288876375699816, "acc_norm,none": 0.24222222222222223, "acc_norm_stderr,none": 0.014288876375699816}, "belebele_ben_Beng": {"alias": "belebele_ben_Beng", "acc,none": 0.22333333333333333, "acc_stderr,none": 0.013890384297198745, "acc_norm,none": 0.22333333333333333, "acc_norm_stderr,none": 0.013890384297198745}, "belebele_deu_Latn": {"alias": "belebele_deu_Latn", "acc,none": 0.23, "acc_stderr,none": 0.014035549969945592, "acc_norm,none": 0.23, "acc_norm_stderr,none": 0.014035549969945592}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_fra_Latn": {"alias": "belebele_fra_Latn", "acc,none": 0.22666666666666666, "acc_stderr,none": 0.013963598349030493, "acc_norm,none": 0.22666666666666666, "acc_norm_stderr,none": 0.013963598349030493}, "belebele_hin_Deva": {"alias": "belebele_hin_Deva", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884528, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884528}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.23777777777777778, "acc_stderr,none": 0.0141986348093082, "acc_norm,none": 0.23777777777777778, "acc_norm_stderr,none": 0.0141986348093082}, "belebele_jpn_Jpan": {"alias": "belebele_jpn_Jpan", "acc,none": 0.23222222222222222, "acc_stderr,none": 0.014082825936214571, "acc_norm,none": 0.23222222222222222, "acc_norm_stderr,none": 0.014082825936214571}, "belebele_kor_Hang": {"alias": "belebele_kor_Hang", "acc,none": 0.26, "acc_stderr,none": 0.014629271097998376, "acc_norm,none": 0.26, "acc_norm_stderr,none": 0.014629271097998376}, "belebele_rus_Cyrl": {"alias": "belebele_rus_Cyrl", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884504, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884504}, "belebele_spa_Latn": {"alias": "belebele_spa_Latn", "acc,none": 0.2311111111111111, "acc_stderr,none": 0.014059256663218197, "acc_norm,none": 0.2311111111111111, "acc_norm_stderr,none": 0.014059256663218197}, "belebele_swh_Latn": {"alias": "belebele_swh_Latn", "acc,none": 0.22777777777777777, "acc_stderr,none": 0.013987721523687973, "acc_norm,none": 0.22777777777777777, "acc_norm_stderr,none": 0.013987721523687973}, "belebele_tel_Telu": {"alias": "belebele_tel_Telu", "acc,none": 0.25, "acc_stderr,none": 0.014441782171967503, "acc_norm,none": 0.25, "acc_norm_stderr,none": 0.014441782171967503}, "belebele_tha_Thai": {"alias": "belebele_tha_Thai", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.22555555555555556, "acc_stderr,none": 0.013939334910458118, "acc_norm,none": 0.22555555555555556, "acc_norm_stderr,none": 0.013939334910458118}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.2311111111111111, "acc_stderr,none": 0.014059256663218198, "acc_norm,none": 0.2311111111111111, "acc_norm_stderr,none": 0.014059256663218198}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.25871340370444135, "acc_stderr,none": 0.004370328224831784, "acc_norm,none": 0.2604062935670185, "acc_norm_stderr,none": 0.004379594059141039}, "include_base_44_arabic": {"acc,none": 0.2210144927536232, "acc_stderr,none": 0.017650477575789053, "alias": "include_base_44_arabic"}, "include_base_44_arabic_few_shot_og_arts_humanities": {"alias": " - include_base_44_arabic_few_shot_og_arts_humanities", "acc,none": 0.23809523809523808, "acc_stderr,none": 0.04176466758604901}, "include_base_44_arabic_few_shot_og_business_commerce": {"alias": " - include_base_44_arabic_few_shot_og_business_commerce", "acc,none": 0.17073170731707318, "acc_stderr,none": 0.04180826184428596}, "include_base_44_arabic_few_shot_og_driving_license": {"alias": " - include_base_44_arabic_few_shot_og_driving_license", "acc,none": 0.2857142857142857, "acc_stderr,none": 0.04429811949614585}, "include_base_44_arabic_few_shot_og_general_knowledge": {"alias": " - include_base_44_arabic_few_shot_og_general_knowledge", "acc,none": 0.24761904761904763, "acc_stderr,none": 0.0423247353205504}, "include_base_44_arabic_few_shot_og_social_science": {"alias": " - include_base_44_arabic_few_shot_og_social_science", "acc,none": 0.1619047619047619, "acc_stderr,none": 0.036121005232589175}, "include_base_44_arabic_few_shot_og_stem": {"alias": " - include_base_44_arabic_few_shot_og_stem", "acc,none": 0.2, "acc_stderr,none": 0.057142857142857134}, "include_base_44_bengali": {"acc,none": 0.2718978102189781, "acc_stderr,none": 0.018902081830111907, "alias": "include_base_44_bengali"}, "include_base_44_bengali_few_shot_og_arts_humanities": {"alias": " - include_base_44_bengali_few_shot_og_arts_humanities", "acc,none": 0.3614457831325301, "acc_stderr,none": 0.037400593820293204}, "include_base_44_bengali_few_shot_og_general_knowledge": {"alias": " - include_base_44_bengali_few_shot_og_general_knowledge", "acc,none": 0.24096385542168675, "acc_stderr,none": 0.03329394119073529}, "include_base_44_bengali_few_shot_og_professional_certification": {"alias": " - include_base_44_bengali_few_shot_og_professional_certification", "acc,none": 0.23493975903614459, "acc_stderr,none": 0.03300533186128922}, "include_base_44_bengali_few_shot_og_stem": {"alias": " - include_base_44_bengali_few_shot_og_stem", "acc,none": 0.2, "acc_stderr,none": 0.05714285714285715}, "include_base_44_chinese": {"acc,none": 0.25137614678899084, "acc_stderr,none": 0.018641749804857444, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.049938324531150324}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.25287356321839083, "acc_stderr,none": 0.04687049503854671}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.30985915492957744, "acc_stderr,none": 0.05527159681788331}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.04993832453115033}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.25287356321839083, "acc_stderr,none": 0.04687049503854671}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.30985915492957744, "acc_stderr,none": 0.055271596817883316}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.19718309859154928, "acc_stderr,none": 0.04755476905953276}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.1875, "acc_stderr,none": 0.10077822185373188}, "include_base_44_french": {"acc,none": 0.2529832935560859, "acc_stderr,none": 0.021175427759512762, "alias": "include_base_44_french"}, "include_base_44_french_few_shot_og_arts_humanities": {"alias": " - include_base_44_french_few_shot_og_arts_humanities", "acc,none": 0.22556390977443608, "acc_stderr,none": 0.025674657674902242}, "include_base_44_french_few_shot_og_driving_license": {"alias": " - include_base_44_french_few_shot_og_driving_license", "acc,none": 0.40425531914893614, "acc_stderr,none": 0.07235674844413013}, "include_base_44_french_few_shot_og_health_oriented_education": {"alias": " - include_base_44_french_few_shot_og_health_oriented_education", "acc,none": 0.1875, "acc_stderr,none": 0.10077822185373188}, "include_base_44_french_few_shot_og_social_science": {"alias": " - include_base_44_french_few_shot_og_social_science", "acc,none": 0.25675675675675674, "acc_stderr,none": 0.051128728837659454}, "include_base_44_french_few_shot_og_stem": {"alias": " - include_base_44_french_few_shot_og_stem", "acc,none": 0.3125, "acc_stderr,none": 0.11967838846954226}, "include_base_44_german": {"acc,none": 0.3237410071942446, "acc_stderr,none": 0.03810450206175078, "alias": "include_base_44_german"}, "include_base_44_german_few_shot_og_driving_license": {"alias": " - include_base_44_german_few_shot_og_driving_license", "acc,none": 0.6521739130434783, "acc_stderr,none": 0.10154334054280735}, "include_base_44_german_few_shot_og_social_science": {"alias": " - include_base_44_german_few_shot_og_social_science", "acc,none": 0.26373626373626374, "acc_stderr,none": 0.04644942852497396}, "include_base_44_german_few_shot_og_stem": {"alias": " - include_base_44_german_few_shot_og_stem", "acc,none": 0.24, "acc_stderr,none": 0.08717797887081347}, "include_base_44_hindi": {"acc,none": 0.2376599634369287, "acc_stderr,none": 0.018241074072633454, "alias": "include_base_44_hindi"}, "include_base_44_hindi_few_shot_og_applied_science": {"alias": " - include_base_44_hindi_few_shot_og_applied_science", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295698}, "include_base_44_hindi_few_shot_og_arts_humanities": {"alias": " - include_base_44_hindi_few_shot_og_arts_humanities", "acc,none": 0.16666666666666666, "acc_stderr,none": 0.03823595564509367}, "include_base_44_hindi_few_shot_og_driving_license": {"alias": " - include_base_44_hindi_few_shot_og_driving_license", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.04993832453115031}, "include_base_44_hindi_few_shot_og_general_knowledge": {"alias": " - include_base_44_hindi_few_shot_og_general_knowledge", "acc,none": 0.29577464788732394, "acc_stderr,none": 0.05454906121418899}, "include_base_44_hindi_few_shot_og_health_oriented_education": {"alias": " - include_base_44_hindi_few_shot_og_health_oriented_education", "acc,none": 0.2676056338028169, "acc_stderr,none": 0.05291406220869697}, "include_base_44_hindi_few_shot_og_professional_certification": {"alias": " - include_base_44_hindi_few_shot_og_professional_certification", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295697}, "include_base_44_hindi_few_shot_og_social_science": {"alias": " - include_base_44_hindi_few_shot_og_social_science", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295697}, "include_base_44_hindi_few_shot_og_stem": {"alias": " - include_base_44_hindi_few_shot_og_stem", "acc,none": 0.16, "acc_stderr,none": 0.0748331477354788}, "include_base_44_italian": {"acc,none": 0.26094890510948904, "acc_stderr,none": 0.01875615464189523, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.17142857142857143, "acc_stderr,none": 0.06463490595976273}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.30538922155688625, "acc_stderr,none": 0.035747326542025434}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.16666666666666666, "acc_stderr,none": 0.11236664374387367}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.2903225806451613, "acc_stderr,none": 0.0365772070654091}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.2155688622754491, "acc_stderr,none": 0.031916582528798025}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.25, "acc_stderr,none": 0.1305582419667734}, "include_base_44_japanese": {"acc,none": 0.25948103792415167, "acc_stderr,none": 0.019641788698334647, "alias": "include_base_44_japanese"}, "include_base_44_japanese_few_shot_og_driving_license": {"alias": " - include_base_44_japanese_few_shot_og_driving_license", "acc,none": 0.26262626262626265, "acc_stderr,none": 0.04445287676983945}, "include_base_44_japanese_few_shot_og_medical_license": {"alias": " - include_base_44_japanese_few_shot_og_medical_license", "acc,none": 0.263681592039801, "acc_stderr,none": 0.03115715086935554}, "include_base_44_japanese_few_shot_og_professional_certification": {"alias": " - include_base_44_japanese_few_shot_og_professional_certification", "acc,none": 0.2537313432835821, "acc_stderr,none": 0.030769444967296024}, "include_base_44_korean": {"acc,none": 0.272, "acc_stderr,none": 0.019920323217971003, "alias": "include_base_44_korean"}, "include_base_44_korean_few_shot_og_professional_certification": {"alias": " - include_base_44_korean_few_shot_og_professional_certification", "acc,none": 0.292, "acc_stderr,none": 0.02881432040220565}, "include_base_44_korean_few_shot_og_social_science": {"alias": " - include_base_44_korean_few_shot_og_social_science", "acc,none": 0.252, "acc_stderr,none": 0.027513851933031352}, "include_base_44_russian": {"acc,none": 0.266304347826087, "acc_stderr,none": 0.018662425786691965, "alias": "include_base_44_russian"}, "include_base_44_russian_few_shot_og_applied_science": {"alias": " - include_base_44_russian_few_shot_og_applied_science", "acc,none": 0.2028985507246377, "acc_stderr,none": 0.04876877147472662}, "include_base_44_russian_few_shot_og_arts_humanities": {"alias": " - include_base_44_russian_few_shot_og_arts_humanities", "acc,none": 0.2028985507246377, "acc_stderr,none": 0.0487687714747266}, "include_base_44_russian_few_shot_og_business_commerce": {"alias": " - include_base_44_russian_few_shot_og_business_commerce", "acc,none": 0.2608695652173913, "acc_stderr,none": 0.05324977701702559}, "include_base_44_russian_few_shot_og_driving_license": {"alias": " - include_base_44_russian_few_shot_og_driving_license", "acc,none": 0.21428571428571427, "acc_stderr,none": 0.07896725691322383}, "include_base_44_russian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_russian_few_shot_og_health_oriented_education", "acc,none": 0.38823529411764707, "acc_stderr,none": 0.05317409082203827}, "include_base_44_russian_few_shot_og_marine_license": {"alias": " - include_base_44_russian_few_shot_og_marine_license", "acc,none": 0.37681159420289856, "acc_stderr,none": 0.05876481248527006}, "include_base_44_russian_few_shot_og_social_science": {"alias": " - include_base_44_russian_few_shot_og_social_science", "acc,none": 0.2608695652173913, "acc_stderr,none": 0.05324977701702559}, "include_base_44_russian_few_shot_og_stem": {"alias": " - include_base_44_russian_few_shot_og_stem", "acc,none": 0.19148936170212766, "acc_stderr,none": 0.040801273706799875}, "include_base_44_spanish": {"acc,none": 0.2672727272727273, "acc_stderr,none": 0.018850411012442, "alias": "include_base_44_spanish"}, "include_base_44_spanish_few_shot_og_arts_humanities": {"alias": " - include_base_44_spanish_few_shot_og_arts_humanities", "acc,none": 0.228, "acc_stderr,none": 0.026587432487268466}, "include_base_44_spanish_few_shot_og_health_oriented_education": {"alias": " - include_base_44_spanish_few_shot_og_health_oriented_education", "acc,none": 0.32, "acc_stderr,none": 0.09521904571390466}, "include_base_44_spanish_few_shot_og_social_science": {"alias": " - include_base_44_spanish_few_shot_og_social_science", "acc,none": 0.288, "acc_stderr,none": 0.028697004587398222}, "include_base_44_spanish_few_shot_og_stem": {"alias": " - include_base_44_spanish_few_shot_og_stem", "acc,none": 0.4, "acc_stderr,none": 0.1}, "include_base_44_telugu": {"acc,none": 0.2664233576642336, "acc_stderr,none": 0.018907175587108607, "alias": "include_base_44_telugu"}, "include_base_44_telugu_few_shot_og_applied_science": {"alias": " - include_base_44_telugu_few_shot_og_applied_science", "acc,none": 0.24096385542168675, "acc_stderr,none": 0.033293941190735296}, "include_base_44_telugu_few_shot_og_arts_humanities": {"alias": " - include_base_44_telugu_few_shot_og_arts_humanities", "acc,none": 0.2513089005235602, "acc_stderr,none": 0.03146867590755503}, "include_base_44_telugu_few_shot_og_social_science": {"alias": " - include_base_44_telugu_few_shot_og_social_science", "acc,none": 0.3132530120481928, "acc_stderr,none": 0.03610805018031023}, "include_base_44_telugu_few_shot_og_stem": {"alias": " - include_base_44_telugu_few_shot_og_stem", "acc,none": 0.24, "acc_stderr,none": 0.08717797887081347}, "include_base_44_turkish": {"acc,none": 0.29014598540145986, "acc_stderr,none": 0.01938786453909353, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.3192771084337349, "acc_stderr,none": 0.036293353299478595}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.25903614457831325, "acc_stderr,none": 0.034106466140718564}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.26506024096385544, "acc_stderr,none": 0.03436024037944966}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.38, "acc_stderr,none": 0.06934092056863767}, "piqa": {"alias": "piqa", "acc,none": 0.5233949945593036, "acc_stderr,none": 0.011653047155927788, "acc_norm,none": 0.4961915125136017, "acc_norm_stderr,none": 0.011665485744746797}, "xnli_ar": {"alias": "xnli_ar", "acc,none": 0.3421686746987952, "acc_stderr,none": 0.009509659143015634}, "xnli_de": {"alias": "xnli_de", "acc,none": 0.3345381526104418, "acc_stderr,none": 0.009457404390939166}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.3393574297188755, "acc_stderr,none": 0.009490727635646755}, "xnli_es": {"alias": "xnli_es", "acc,none": 0.3329317269076305, "acc_stderr,none": 0.009446051001358226}, "xnli_fr": {"alias": "xnli_fr", "acc,none": 0.3333333333333333, "acc_stderr,none": 0.009448900914617614}, "xnli_hi": {"alias": "xnli_hi", "acc,none": 0.3317269076305221, "acc_stderr,none": 0.009437454900329123}, "xnli_ru": {"alias": "xnli_ru", "acc,none": 0.3345381526104418, "acc_stderr,none": 0.009457404390939166}, "xnli_sw": {"alias": "xnli_sw", "acc,none": 0.3405622489959839, "acc_stderr,none": 0.009498886690274452}, "xnli_th": {"alias": "xnli_th", "acc,none": 0.3281124497991968, "acc_stderr,none": 0.009411247685593385}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.3333333333333333, "acc_stderr,none": 0.009448900914617609}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.336144578313253, "acc_stderr,none": 0.009468634669293532}} +{"created_at": "2025-06-26T19:54:11.674954", "global_step": 12000, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.2098976109215017, "acc_stderr,none": 0.01190054874804744, "acc_norm,none": 0.25, "acc_norm_stderr,none": 0.012653835621466646}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.2689393939393939, "acc_stderr,none": 0.009098548093009175, "acc_norm,none": 0.25252525252525254, "acc_norm_stderr,none": 0.008914948991495702}, "belebele_arb_Arab": {"alias": "belebele_arb_Arab", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_ben_Beng": {"alias": "belebele_ben_Beng", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884506, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884506}, "belebele_deu_Latn": {"alias": "belebele_deu_Latn", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884509, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884509}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_fra_Latn": {"alias": "belebele_fra_Latn", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_hin_Deva": {"alias": "belebele_hin_Deva", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884528, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884528}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_jpn_Jpan": {"alias": "belebele_jpn_Jpan", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_kor_Hang": {"alias": "belebele_kor_Hang", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.01401170515888452, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.01401170515888452}, "belebele_rus_Cyrl": {"alias": "belebele_rus_Cyrl", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884504, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884504}, "belebele_spa_Latn": {"alias": "belebele_spa_Latn", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_swh_Latn": {"alias": "belebele_swh_Latn", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_tel_Telu": {"alias": "belebele_tel_Telu", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884526, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884526}, "belebele_tha_Thai": {"alias": "belebele_tha_Thai", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.2591117307309301, "acc_stderr,none": 0.0043725160601647455, "acc_norm,none": 0.2650866361282613, "acc_norm_stderr,none": 0.004404772735765975}, "include_base_44_arabic": {"acc,none": 0.2246376811594203, "acc_stderr,none": 0.01773798469050555, "alias": "include_base_44_arabic"}, "include_base_44_arabic_few_shot_og_arts_humanities": {"alias": " - include_base_44_arabic_few_shot_og_arts_humanities", "acc,none": 0.24761904761904763, "acc_stderr,none": 0.04232473532055043}, "include_base_44_arabic_few_shot_og_business_commerce": {"alias": " - include_base_44_arabic_few_shot_og_business_commerce", "acc,none": 0.17073170731707318, "acc_stderr,none": 0.04180826184428596}, "include_base_44_arabic_few_shot_og_driving_license": {"alias": " - include_base_44_arabic_few_shot_og_driving_license", "acc,none": 0.29523809523809524, "acc_stderr,none": 0.044729159560441434}, "include_base_44_arabic_few_shot_og_general_knowledge": {"alias": " - include_base_44_arabic_few_shot_og_general_knowledge", "acc,none": 0.24761904761904763, "acc_stderr,none": 0.0423247353205504}, "include_base_44_arabic_few_shot_og_social_science": {"alias": " - include_base_44_arabic_few_shot_og_social_science", "acc,none": 0.1619047619047619, "acc_stderr,none": 0.036121005232589175}, "include_base_44_arabic_few_shot_og_stem": {"alias": " - include_base_44_arabic_few_shot_og_stem", "acc,none": 0.2, "acc_stderr,none": 0.057142857142857134}, "include_base_44_bengali": {"acc,none": 0.29014598540145986, "acc_stderr,none": 0.01939486736491474, "alias": "include_base_44_bengali"}, "include_base_44_bengali_few_shot_og_arts_humanities": {"alias": " - include_base_44_bengali_few_shot_og_arts_humanities", "acc,none": 0.3132530120481928, "acc_stderr,none": 0.036108050180310235}, "include_base_44_bengali_few_shot_og_general_knowledge": {"alias": " - include_base_44_bengali_few_shot_og_general_knowledge", "acc,none": 0.3253012048192771, "acc_stderr,none": 0.036471685236832294}, "include_base_44_bengali_few_shot_og_professional_certification": {"alias": " - include_base_44_bengali_few_shot_og_professional_certification", "acc,none": 0.2469879518072289, "acc_stderr,none": 0.03357351982064536}, "include_base_44_bengali_few_shot_og_stem": {"alias": " - include_base_44_bengali_few_shot_og_stem", "acc,none": 0.24, "acc_stderr,none": 0.06101187572589323}, "include_base_44_chinese": {"acc,none": 0.24587155963302754, "acc_stderr,none": 0.018517539247711934, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.049938324531150324}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.25287356321839083, "acc_stderr,none": 0.04687049503854671}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.323943661971831, "acc_stderr,none": 0.05593416612923642}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.23943661971830985, "acc_stderr,none": 0.05100514327793383}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.2413793103448276, "acc_stderr,none": 0.04614377668264891}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.051995626882956975}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.19718309859154928, "acc_stderr,none": 0.04755476905953276}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.1875, "acc_stderr,none": 0.10077822185373188}, "include_base_44_french": {"acc,none": 0.2529832935560859, "acc_stderr,none": 0.021175427759512762, "alias": "include_base_44_french"}, "include_base_44_french_few_shot_og_arts_humanities": {"alias": " - include_base_44_french_few_shot_og_arts_humanities", "acc,none": 0.22556390977443608, "acc_stderr,none": 0.025674657674902242}, "include_base_44_french_few_shot_og_driving_license": {"alias": " - include_base_44_french_few_shot_og_driving_license", "acc,none": 0.40425531914893614, "acc_stderr,none": 0.07235674844413013}, "include_base_44_french_few_shot_og_health_oriented_education": {"alias": " - include_base_44_french_few_shot_og_health_oriented_education", "acc,none": 0.1875, "acc_stderr,none": 0.10077822185373188}, "include_base_44_french_few_shot_og_social_science": {"alias": " - include_base_44_french_few_shot_og_social_science", "acc,none": 0.25675675675675674, "acc_stderr,none": 0.051128728837659454}, "include_base_44_french_few_shot_og_stem": {"alias": " - include_base_44_french_few_shot_og_stem", "acc,none": 0.3125, "acc_stderr,none": 0.11967838846954226}, "include_base_44_german": {"acc,none": 0.3237410071942446, "acc_stderr,none": 0.03810450206175078, "alias": "include_base_44_german"}, "include_base_44_german_few_shot_og_driving_license": {"alias": " - include_base_44_german_few_shot_og_driving_license", "acc,none": 0.6521739130434783, "acc_stderr,none": 0.10154334054280735}, "include_base_44_german_few_shot_og_social_science": {"alias": " - include_base_44_german_few_shot_og_social_science", "acc,none": 0.26373626373626374, "acc_stderr,none": 0.04644942852497396}, "include_base_44_german_few_shot_og_stem": {"alias": " - include_base_44_german_few_shot_og_stem", "acc,none": 0.24, "acc_stderr,none": 0.08717797887081347}, "include_base_44_hindi": {"acc,none": 0.23948811700182815, "acc_stderr,none": 0.01830198403573232, "alias": "include_base_44_hindi"}, "include_base_44_hindi_few_shot_og_applied_science": {"alias": " - include_base_44_hindi_few_shot_og_applied_science", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295698}, "include_base_44_hindi_few_shot_og_arts_humanities": {"alias": " - include_base_44_hindi_few_shot_og_arts_humanities", "acc,none": 0.17708333333333334, "acc_stderr,none": 0.03916561682056947}, "include_base_44_hindi_few_shot_og_driving_license": {"alias": " - include_base_44_hindi_few_shot_og_driving_license", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.04993832453115031}, "include_base_44_hindi_few_shot_og_general_knowledge": {"alias": " - include_base_44_hindi_few_shot_og_general_knowledge", "acc,none": 0.29577464788732394, "acc_stderr,none": 0.05454906121418899}, "include_base_44_hindi_few_shot_og_health_oriented_education": {"alias": " - include_base_44_hindi_few_shot_og_health_oriented_education", "acc,none": 0.2676056338028169, "acc_stderr,none": 0.05291406220869697}, "include_base_44_hindi_few_shot_og_professional_certification": {"alias": " - include_base_44_hindi_few_shot_og_professional_certification", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295697}, "include_base_44_hindi_few_shot_og_social_science": {"alias": " - include_base_44_hindi_few_shot_og_social_science", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295697}, "include_base_44_hindi_few_shot_og_stem": {"alias": " - include_base_44_hindi_few_shot_og_stem", "acc,none": 0.16, "acc_stderr,none": 0.0748331477354788}, "include_base_44_italian": {"acc,none": 0.2572992700729927, "acc_stderr,none": 0.018668571387726075, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.17142857142857143, "acc_stderr,none": 0.06463490595976273}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.2994011976047904, "acc_stderr,none": 0.03554736535384932}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.25, "acc_stderr,none": 0.1305582419667734}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.2903225806451613, "acc_stderr,none": 0.03657720706540911}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.20359281437125748, "acc_stderr,none": 0.031253219622833416}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.25, "acc_stderr,none": 0.1305582419667734}, "include_base_44_japanese": {"acc,none": 0.2634730538922156, "acc_stderr,none": 0.01972538101830729, "alias": "include_base_44_japanese"}, "include_base_44_japanese_few_shot_og_driving_license": {"alias": " - include_base_44_japanese_few_shot_og_driving_license", "acc,none": 0.24242424242424243, "acc_stderr,none": 0.04329004329004327}, "include_base_44_japanese_few_shot_og_medical_license": {"alias": " - include_base_44_japanese_few_shot_og_medical_license", "acc,none": 0.2835820895522388, "acc_stderr,none": 0.03187187537919797}, "include_base_44_japanese_few_shot_og_professional_certification": {"alias": " - include_base_44_japanese_few_shot_og_professional_certification", "acc,none": 0.2537313432835821, "acc_stderr,none": 0.030769444967296024}, "include_base_44_korean": {"acc,none": 0.234, "acc_stderr,none": 0.01896985560532149, "alias": "include_base_44_korean"}, "include_base_44_korean_few_shot_og_professional_certification": {"alias": " - include_base_44_korean_few_shot_og_professional_certification", "acc,none": 0.24, "acc_stderr,none": 0.027065293652239003}, "include_base_44_korean_few_shot_og_social_science": {"alias": " - include_base_44_korean_few_shot_og_social_science", "acc,none": 0.228, "acc_stderr,none": 0.026587432487268487}, "include_base_44_russian": {"acc,none": 0.266304347826087, "acc_stderr,none": 0.018662425786691965, "alias": "include_base_44_russian"}, "include_base_44_russian_few_shot_og_applied_science": {"alias": " - include_base_44_russian_few_shot_og_applied_science", "acc,none": 0.2028985507246377, "acc_stderr,none": 0.04876877147472662}, "include_base_44_russian_few_shot_og_arts_humanities": {"alias": " - include_base_44_russian_few_shot_og_arts_humanities", "acc,none": 0.2028985507246377, "acc_stderr,none": 0.0487687714747266}, "include_base_44_russian_few_shot_og_business_commerce": {"alias": " - include_base_44_russian_few_shot_og_business_commerce", "acc,none": 0.2608695652173913, "acc_stderr,none": 0.05324977701702559}, "include_base_44_russian_few_shot_og_driving_license": {"alias": " - include_base_44_russian_few_shot_og_driving_license", "acc,none": 0.21428571428571427, "acc_stderr,none": 0.07896725691322383}, "include_base_44_russian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_russian_few_shot_og_health_oriented_education", "acc,none": 0.38823529411764707, "acc_stderr,none": 0.05317409082203827}, "include_base_44_russian_few_shot_og_marine_license": {"alias": " - include_base_44_russian_few_shot_og_marine_license", "acc,none": 0.37681159420289856, "acc_stderr,none": 0.05876481248527006}, "include_base_44_russian_few_shot_og_social_science": {"alias": " - include_base_44_russian_few_shot_og_social_science", "acc,none": 0.2608695652173913, "acc_stderr,none": 0.05324977701702559}, "include_base_44_russian_few_shot_og_stem": {"alias": " - include_base_44_russian_few_shot_og_stem", "acc,none": 0.19148936170212766, "acc_stderr,none": 0.040801273706799875}, "include_base_44_spanish": {"acc,none": 0.2672727272727273, "acc_stderr,none": 0.018850411012442, "alias": "include_base_44_spanish"}, "include_base_44_spanish_few_shot_og_arts_humanities": {"alias": " - include_base_44_spanish_few_shot_og_arts_humanities", "acc,none": 0.228, "acc_stderr,none": 0.026587432487268466}, "include_base_44_spanish_few_shot_og_health_oriented_education": {"alias": " - include_base_44_spanish_few_shot_og_health_oriented_education", "acc,none": 0.32, "acc_stderr,none": 0.09521904571390466}, "include_base_44_spanish_few_shot_og_social_science": {"alias": " - include_base_44_spanish_few_shot_og_social_science", "acc,none": 0.288, "acc_stderr,none": 0.028697004587398222}, "include_base_44_spanish_few_shot_og_stem": {"alias": " - include_base_44_spanish_few_shot_og_stem", "acc,none": 0.4, "acc_stderr,none": 0.1}, "include_base_44_telugu": {"acc,none": 0.23905109489051096, "acc_stderr,none": 0.01820778411435149, "alias": "include_base_44_telugu"}, "include_base_44_telugu_few_shot_og_applied_science": {"alias": " - include_base_44_telugu_few_shot_og_applied_science", "acc,none": 0.2469879518072289, "acc_stderr,none": 0.03357351982064536}, "include_base_44_telugu_few_shot_og_arts_humanities": {"alias": " - include_base_44_telugu_few_shot_og_arts_humanities", "acc,none": 0.24083769633507854, "acc_stderr,none": 0.031020782537814432}, "include_base_44_telugu_few_shot_og_social_science": {"alias": " - include_base_44_telugu_few_shot_og_social_science", "acc,none": 0.20481927710843373, "acc_stderr,none": 0.03141784291663926}, "include_base_44_telugu_few_shot_og_stem": {"alias": " - include_base_44_telugu_few_shot_og_stem", "acc,none": 0.4, "acc_stderr,none": 0.1}, "include_base_44_turkish": {"acc,none": 0.29014598540145986, "acc_stderr,none": 0.01938786453909353, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.3192771084337349, "acc_stderr,none": 0.036293353299478595}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.25903614457831325, "acc_stderr,none": 0.034106466140718564}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.26506024096385544, "acc_stderr,none": 0.03436024037944966}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.38, "acc_stderr,none": 0.06934092056863767}, "piqa": {"alias": "piqa", "acc,none": 0.5272034820457019, "acc_stderr,none": 0.01164854526242902, "acc_norm,none": 0.5021762785636561, "acc_norm_stderr,none": 0.011665713661738873}, "xnli_ar": {"alias": "xnli_ar", "acc,none": 0.3385542168674699, "acc_stderr,none": 0.009485250208516873}, "xnli_de": {"alias": "xnli_de", "acc,none": 0.3317269076305221, "acc_stderr,none": 0.009437454900329122}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.3393574297188755, "acc_stderr,none": 0.009490727635646755}, "xnli_es": {"alias": "xnli_es", "acc,none": 0.327710843373494, "acc_stderr,none": 0.009408296828870672}, "xnli_fr": {"alias": "xnli_fr", "acc,none": 0.3305220883534137, "acc_stderr,none": 0.009428789109289834}, "xnli_hi": {"alias": "xnli_hi", "acc,none": 0.3437751004016064, "acc_stderr,none": 0.009520310502882934}, "xnli_ru": {"alias": "xnli_ru", "acc,none": 0.3333333333333333, "acc_stderr,none": 0.00944890091461761}, "xnli_sw": {"alias": "xnli_sw", "acc,none": 0.3257028112449799, "acc_stderr,none": 0.009393425164263997}, "xnli_th": {"alias": "xnli_th", "acc,none": 0.3381526104417671, "acc_stderr,none": 0.009482500057981028}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.3329317269076305, "acc_stderr,none": 0.009446051001358223}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.3465863453815261, "acc_stderr,none": 0.009538660220458992}} +{"created_at": "2025-06-26T20:46:55.068767", "global_step": 13000, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.19539249146757678, "acc_stderr,none": 0.01158690718995291, "acc_norm,none": 0.24914675767918087, "acc_norm_stderr,none": 0.012639407111926433}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.2680976430976431, "acc_stderr,none": 0.009089526578213696, "acc_norm,none": 0.26430976430976433, "acc_norm_stderr,none": 0.009048410451863012}, "belebele_arb_Arab": {"alias": "belebele_arb_Arab", "acc,none": 0.2311111111111111, "acc_stderr,none": 0.014059256663218198, "acc_norm,none": 0.2311111111111111, "acc_norm_stderr,none": 0.014059256663218198}, "belebele_ben_Beng": {"alias": "belebele_ben_Beng", "acc,none": 0.24666666666666667, "acc_stderr,none": 0.014377023375409368, "acc_norm,none": 0.24666666666666667, "acc_norm_stderr,none": 0.014377023375409368}, "belebele_deu_Latn": {"alias": "belebele_deu_Latn", "acc,none": 0.22777777777777777, "acc_stderr,none": 0.01398772152368794, "acc_norm,none": 0.22777777777777777, "acc_norm_stderr,none": 0.01398772152368794}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.23, "acc_stderr,none": 0.014035549969945589, "acc_norm,none": 0.23, "acc_norm_stderr,none": 0.014035549969945589}, "belebele_fra_Latn": {"alias": "belebele_fra_Latn", "acc,none": 0.22555555555555556, "acc_stderr,none": 0.013939334910458118, "acc_norm,none": 0.22555555555555556, "acc_norm_stderr,none": 0.013939334910458118}, "belebele_hin_Deva": {"alias": "belebele_hin_Deva", "acc,none": 0.23333333333333334, "acc_stderr,none": 0.014106258477755675, "acc_norm,none": 0.23333333333333334, "acc_norm_stderr,none": 0.014106258477755675}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.23, "acc_stderr,none": 0.014035549969945592, "acc_norm,none": 0.23, "acc_norm_stderr,none": 0.014035549969945592}, "belebele_jpn_Jpan": {"alias": "belebele_jpn_Jpan", "acc,none": 0.23, "acc_stderr,none": 0.014035549969945594, "acc_norm,none": 0.23, "acc_norm_stderr,none": 0.014035549969945594}, "belebele_kor_Hang": {"alias": "belebele_kor_Hang", "acc,none": 0.23222222222222222, "acc_stderr,none": 0.014082825936214571, "acc_norm,none": 0.23222222222222222, "acc_norm_stderr,none": 0.014082825936214571}, "belebele_rus_Cyrl": {"alias": "belebele_rus_Cyrl", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884504, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884504}, "belebele_spa_Latn": {"alias": "belebele_spa_Latn", "acc,none": 0.23, "acc_stderr,none": 0.014035549969945589, "acc_norm,none": 0.23, "acc_norm_stderr,none": 0.014035549969945589}, "belebele_swh_Latn": {"alias": "belebele_swh_Latn", "acc,none": 0.23, "acc_stderr,none": 0.014035549969945592, "acc_norm,none": 0.23, "acc_norm_stderr,none": 0.014035549969945592}, "belebele_tel_Telu": {"alias": "belebele_tel_Telu", "acc,none": 0.25333333333333335, "acc_stderr,none": 0.014505399844356804, "acc_norm,none": 0.25333333333333335, "acc_norm_stderr,none": 0.014505399844356804}, "belebele_tha_Thai": {"alias": "belebele_tha_Thai", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.01401170515888453, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.01401170515888453}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.23444444444444446, "acc_stderr,none": 0.014129554968110773, "acc_norm,none": 0.23444444444444446, "acc_norm_stderr,none": 0.014129554968110773}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884526, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884526}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.25801633140808605, "acc_stderr,none": 0.004366488167386392, "acc_norm,none": 0.25941047600079664, "acc_norm_stderr,none": 0.004374153847826759}, "include_base_44_arabic": {"acc,none": 0.21739130434782608, "acc_stderr,none": 0.017508473714101164, "alias": "include_base_44_arabic"}, "include_base_44_arabic_few_shot_og_arts_humanities": {"alias": " - include_base_44_arabic_few_shot_og_arts_humanities", "acc,none": 0.22857142857142856, "acc_stderr,none": 0.041175810978451}, "include_base_44_arabic_few_shot_og_business_commerce": {"alias": " - include_base_44_arabic_few_shot_og_business_commerce", "acc,none": 0.17073170731707318, "acc_stderr,none": 0.04180826184428596}, "include_base_44_arabic_few_shot_og_driving_license": {"alias": " - include_base_44_arabic_few_shot_og_driving_license", "acc,none": 0.29523809523809524, "acc_stderr,none": 0.044729159560441434}, "include_base_44_arabic_few_shot_og_general_knowledge": {"alias": " - include_base_44_arabic_few_shot_og_general_knowledge", "acc,none": 0.24761904761904763, "acc_stderr,none": 0.0423247353205504}, "include_base_44_arabic_few_shot_og_social_science": {"alias": " - include_base_44_arabic_few_shot_og_social_science", "acc,none": 0.14285714285714285, "acc_stderr,none": 0.03431317581537582}, "include_base_44_arabic_few_shot_og_stem": {"alias": " - include_base_44_arabic_few_shot_og_stem", "acc,none": 0.2, "acc_stderr,none": 0.057142857142857134}, "include_base_44_bengali": {"acc,none": 0.28284671532846717, "acc_stderr,none": 0.01925860843685212, "alias": "include_base_44_bengali"}, "include_base_44_bengali_few_shot_og_arts_humanities": {"alias": " - include_base_44_bengali_few_shot_og_arts_humanities", "acc,none": 0.28313253012048195, "acc_stderr,none": 0.03507295431370518}, "include_base_44_bengali_few_shot_og_general_knowledge": {"alias": " - include_base_44_bengali_few_shot_og_general_knowledge", "acc,none": 0.3253012048192771, "acc_stderr,none": 0.03647168523683229}, "include_base_44_bengali_few_shot_og_professional_certification": {"alias": " - include_base_44_bengali_few_shot_og_professional_certification", "acc,none": 0.24096385542168675, "acc_stderr,none": 0.0332939411907353}, "include_base_44_bengali_few_shot_og_stem": {"alias": " - include_base_44_bengali_few_shot_og_stem", "acc,none": 0.28, "acc_stderr,none": 0.06414269805898186}, "include_base_44_chinese": {"acc,none": 0.24954128440366974, "acc_stderr,none": 0.018504249716979615, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.051995626882956975}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.22988505747126436, "acc_stderr,none": 0.045371581852507746}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.39436619718309857, "acc_stderr,none": 0.058412510854444266}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.04993832453115033}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.21839080459770116, "acc_stderr,none": 0.044551545932103705}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.19718309859154928, "acc_stderr,none": 0.04755476905953275}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.23943661971830985, "acc_stderr,none": 0.05100514327793383}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.25, "acc_stderr,none": 0.11180339887498948}, "include_base_44_french": {"acc,none": 0.2529832935560859, "acc_stderr,none": 0.021175427759512762, "alias": "include_base_44_french"}, "include_base_44_french_few_shot_og_arts_humanities": {"alias": " - include_base_44_french_few_shot_og_arts_humanities", "acc,none": 0.22556390977443608, "acc_stderr,none": 0.025674657674902242}, "include_base_44_french_few_shot_og_driving_license": {"alias": " - include_base_44_french_few_shot_og_driving_license", "acc,none": 0.40425531914893614, "acc_stderr,none": 0.07235674844413013}, "include_base_44_french_few_shot_og_health_oriented_education": {"alias": " - include_base_44_french_few_shot_og_health_oriented_education", "acc,none": 0.1875, "acc_stderr,none": 0.10077822185373188}, "include_base_44_french_few_shot_og_social_science": {"alias": " - include_base_44_french_few_shot_og_social_science", "acc,none": 0.25675675675675674, "acc_stderr,none": 0.051128728837659454}, "include_base_44_french_few_shot_og_stem": {"alias": " - include_base_44_french_few_shot_og_stem", "acc,none": 0.3125, "acc_stderr,none": 0.11967838846954226}, "include_base_44_german": {"acc,none": 0.31654676258992803, "acc_stderr,none": 0.03776738815260182, "alias": "include_base_44_german"}, "include_base_44_german_few_shot_og_driving_license": {"alias": " - include_base_44_german_few_shot_og_driving_license", "acc,none": 0.6521739130434783, "acc_stderr,none": 0.10154334054280735}, "include_base_44_german_few_shot_og_social_science": {"alias": " - include_base_44_german_few_shot_og_social_science", "acc,none": 0.25274725274725274, "acc_stderr,none": 0.045809518537328904}, "include_base_44_german_few_shot_og_stem": {"alias": " - include_base_44_german_few_shot_og_stem", "acc,none": 0.24, "acc_stderr,none": 0.08717797887081347}, "include_base_44_hindi": {"acc,none": 0.25594149908592323, "acc_stderr,none": 0.018687850038733054, "alias": "include_base_44_hindi"}, "include_base_44_hindi_few_shot_og_applied_science": {"alias": " - include_base_44_hindi_few_shot_og_applied_science", "acc,none": 0.2112676056338028, "acc_stderr,none": 0.048790163593489484}, "include_base_44_hindi_few_shot_og_arts_humanities": {"alias": " - include_base_44_hindi_few_shot_og_arts_humanities", "acc,none": 0.22916666666666666, "acc_stderr,none": 0.04312153028672574}, "include_base_44_hindi_few_shot_og_driving_license": {"alias": " - include_base_44_hindi_few_shot_og_driving_license", "acc,none": 0.30985915492957744, "acc_stderr,none": 0.055271596817883316}, "include_base_44_hindi_few_shot_og_general_knowledge": {"alias": " - include_base_44_hindi_few_shot_og_general_knowledge", "acc,none": 0.30985915492957744, "acc_stderr,none": 0.05527159681788331}, "include_base_44_hindi_few_shot_og_health_oriented_education": {"alias": " - include_base_44_hindi_few_shot_og_health_oriented_education", "acc,none": 0.2112676056338028, "acc_stderr,none": 0.04879016359348947}, "include_base_44_hindi_few_shot_og_professional_certification": {"alias": " - include_base_44_hindi_few_shot_og_professional_certification", "acc,none": 0.30985915492957744, "acc_stderr,none": 0.05527159681788331}, "include_base_44_hindi_few_shot_og_social_science": {"alias": " - include_base_44_hindi_few_shot_og_social_science", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.051995626882956975}, "include_base_44_hindi_few_shot_og_stem": {"alias": " - include_base_44_hindi_few_shot_og_stem", "acc,none": 0.16, "acc_stderr,none": 0.0748331477354788}, "include_base_44_italian": {"acc,none": 0.2572992700729927, "acc_stderr,none": 0.018671255573601643, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.2, "acc_stderr,none": 0.06859943405700354}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.2994011976047904, "acc_stderr,none": 0.03554736535384932}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.25, "acc_stderr,none": 0.1305582419667734}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.2903225806451613, "acc_stderr,none": 0.03657720706540911}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.19760479041916168, "acc_stderr,none": 0.030905719167240605}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.25, "acc_stderr,none": 0.1305582419667734}, "include_base_44_japanese": {"acc,none": 0.2654690618762475, "acc_stderr,none": 0.01978542332360139, "alias": "include_base_44_japanese"}, "include_base_44_japanese_few_shot_og_driving_license": {"alias": " - include_base_44_japanese_few_shot_og_driving_license", "acc,none": 0.26262626262626265, "acc_stderr,none": 0.04445287676983945}, "include_base_44_japanese_few_shot_og_medical_license": {"alias": " - include_base_44_japanese_few_shot_og_medical_license", "acc,none": 0.2736318407960199, "acc_stderr,none": 0.031524391865554016}, "include_base_44_japanese_few_shot_og_professional_certification": {"alias": " - include_base_44_japanese_few_shot_og_professional_certification", "acc,none": 0.25870646766169153, "acc_stderr,none": 0.030965903123573026}, "include_base_44_korean": {"acc,none": 0.234, "acc_stderr,none": 0.01896985560532149, "alias": "include_base_44_korean"}, "include_base_44_korean_few_shot_og_professional_certification": {"alias": " - include_base_44_korean_few_shot_og_professional_certification", "acc,none": 0.24, "acc_stderr,none": 0.027065293652239003}, "include_base_44_korean_few_shot_og_social_science": {"alias": " - include_base_44_korean_few_shot_og_social_science", "acc,none": 0.228, "acc_stderr,none": 0.026587432487268487}, "include_base_44_russian": {"acc,none": 0.266304347826087, "acc_stderr,none": 0.018662425786691965, "alias": "include_base_44_russian"}, "include_base_44_russian_few_shot_og_applied_science": {"alias": " - include_base_44_russian_few_shot_og_applied_science", "acc,none": 0.2028985507246377, "acc_stderr,none": 0.04876877147472662}, "include_base_44_russian_few_shot_og_arts_humanities": {"alias": " - include_base_44_russian_few_shot_og_arts_humanities", "acc,none": 0.2028985507246377, "acc_stderr,none": 0.0487687714747266}, "include_base_44_russian_few_shot_og_business_commerce": {"alias": " - include_base_44_russian_few_shot_og_business_commerce", "acc,none": 0.2608695652173913, "acc_stderr,none": 0.05324977701702559}, "include_base_44_russian_few_shot_og_driving_license": {"alias": " - include_base_44_russian_few_shot_og_driving_license", "acc,none": 0.21428571428571427, "acc_stderr,none": 0.07896725691322383}, "include_base_44_russian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_russian_few_shot_og_health_oriented_education", "acc,none": 0.38823529411764707, "acc_stderr,none": 0.05317409082203827}, "include_base_44_russian_few_shot_og_marine_license": {"alias": " - include_base_44_russian_few_shot_og_marine_license", "acc,none": 0.37681159420289856, "acc_stderr,none": 0.05876481248527006}, "include_base_44_russian_few_shot_og_social_science": {"alias": " - include_base_44_russian_few_shot_og_social_science", "acc,none": 0.2608695652173913, "acc_stderr,none": 0.05324977701702559}, "include_base_44_russian_few_shot_og_stem": {"alias": " - include_base_44_russian_few_shot_og_stem", "acc,none": 0.19148936170212766, "acc_stderr,none": 0.040801273706799875}, "include_base_44_spanish": {"acc,none": 0.2545454545454545, "acc_stderr,none": 0.018549822211975583, "alias": "include_base_44_spanish"}, "include_base_44_spanish_few_shot_og_arts_humanities": {"alias": " - include_base_44_spanish_few_shot_og_arts_humanities", "acc,none": 0.216, "acc_stderr,none": 0.02607865766373273}, "include_base_44_spanish_few_shot_og_health_oriented_education": {"alias": " - include_base_44_spanish_few_shot_og_health_oriented_education", "acc,none": 0.28, "acc_stderr,none": 0.0916515138991168}, "include_base_44_spanish_few_shot_og_social_science": {"alias": " - include_base_44_spanish_few_shot_og_social_science", "acc,none": 0.276, "acc_stderr,none": 0.028328537274211352}, "include_base_44_spanish_few_shot_og_stem": {"alias": " - include_base_44_spanish_few_shot_og_stem", "acc,none": 0.4, "acc_stderr,none": 0.1}, "include_base_44_telugu": {"acc,none": 0.2354014598540146, "acc_stderr,none": 0.018106520918387454, "alias": "include_base_44_telugu"}, "include_base_44_telugu_few_shot_og_applied_science": {"alias": " - include_base_44_telugu_few_shot_og_applied_science", "acc,none": 0.19879518072289157, "acc_stderr,none": 0.03106939026078942}, "include_base_44_telugu_few_shot_og_arts_humanities": {"alias": " - include_base_44_telugu_few_shot_og_arts_humanities", "acc,none": 0.21465968586387435, "acc_stderr,none": 0.029787042849701996}, "include_base_44_telugu_few_shot_og_social_science": {"alias": " - include_base_44_telugu_few_shot_og_social_science", "acc,none": 0.29518072289156627, "acc_stderr,none": 0.03550920185689629}, "include_base_44_telugu_few_shot_og_stem": {"alias": " - include_base_44_telugu_few_shot_og_stem", "acc,none": 0.24, "acc_stderr,none": 0.08717797887081347}, "include_base_44_turkish": {"acc,none": 0.2937956204379562, "acc_stderr,none": 0.019446934104893485, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.3192771084337349, "acc_stderr,none": 0.036293353299478595}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.25903614457831325, "acc_stderr,none": 0.034106466140718564}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.2710843373493976, "acc_stderr,none": 0.034605799075530255}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.4, "acc_stderr,none": 0.06998542122237651}, "piqa": {"alias": "piqa", "acc,none": 0.5179542981501633, "acc_stderr,none": 0.011658300623287153, "acc_norm,none": 0.4940152339499456, "acc_norm_stderr,none": 0.011664988455853323}, "xnli_ar": {"alias": "xnli_ar", "acc,none": 0.3449799196787149, "acc_stderr,none": 0.009528219800053311}, "xnli_de": {"alias": "xnli_de", "acc,none": 0.3337349397590361, "acc_stderr,none": 0.009451743112667057}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.327710843373494, "acc_stderr,none": 0.009408296828870672}, "xnli_es": {"alias": "xnli_es", "acc,none": 0.3337349397590361, "acc_stderr,none": 0.009451743112667053}, "xnli_fr": {"alias": "xnli_fr", "acc,none": 0.3325301204819277, "acc_stderr,none": 0.009443193365903341}, "xnli_hi": {"alias": "xnli_hi", "acc,none": 0.3353413654618474, "acc_stderr,none": 0.009463034891512708}, "xnli_ru": {"alias": "xnli_ru", "acc,none": 0.3329317269076305, "acc_stderr,none": 0.009446051001358225}, "xnli_sw": {"alias": "xnli_sw", "acc,none": 0.329718875502008, "acc_stderr,none": 0.009422973103706463}, "xnli_th": {"alias": "xnli_th", "acc,none": 0.3449799196787149, "acc_stderr,none": 0.009528219800053313}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.3337349397590361, "acc_stderr,none": 0.009451743112667055}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.3393574297188755, "acc_stderr,none": 0.009490727635646755}} +{"created_at": "2025-06-26T21:13:46.758089", "global_step": 13500, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.20136518771331058, "acc_stderr,none": 0.011718927477444269, "acc_norm,none": 0.25170648464163825, "acc_norm_stderr,none": 0.012682496334042963}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.265993265993266, "acc_stderr,none": 0.009066789565615692, "acc_norm,none": 0.2668350168350168, "acc_norm_stderr,none": 0.009075915859267264}, "belebele_arb_Arab": {"alias": "belebele_arb_Arab", "acc,none": 0.23444444444444446, "acc_stderr,none": 0.014129554968110771, "acc_norm,none": 0.23444444444444446, "acc_norm_stderr,none": 0.014129554968110771}, "belebele_ben_Beng": {"alias": "belebele_ben_Beng", "acc,none": 0.28, "acc_stderr,none": 0.014974951276705699, "acc_norm,none": 0.28, "acc_norm_stderr,none": 0.014974951276705699}, "belebele_deu_Latn": {"alias": "belebele_deu_Latn", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884509, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884509}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_fra_Latn": {"alias": "belebele_fra_Latn", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_hin_Deva": {"alias": "belebele_hin_Deva", "acc,none": 0.2388888888888889, "acc_stderr,none": 0.014221393731276202, "acc_norm,none": 0.2388888888888889, "acc_norm_stderr,none": 0.014221393731276202}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_jpn_Jpan": {"alias": "belebele_jpn_Jpan", "acc,none": 0.23, "acc_stderr,none": 0.014035549969945592, "acc_norm,none": 0.23, "acc_norm_stderr,none": 0.014035549969945592}, "belebele_kor_Hang": {"alias": "belebele_kor_Hang", "acc,none": 0.2311111111111111, "acc_stderr,none": 0.014059256663218176, "acc_norm,none": 0.2311111111111111, "acc_norm_stderr,none": 0.014059256663218176}, "belebele_rus_Cyrl": {"alias": "belebele_rus_Cyrl", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884504, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884504}, "belebele_spa_Latn": {"alias": "belebele_spa_Latn", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_swh_Latn": {"alias": "belebele_swh_Latn", "acc,none": 0.23, "acc_stderr,none": 0.014035549969945592, "acc_norm,none": 0.23, "acc_norm_stderr,none": 0.014035549969945592}, "belebele_tel_Telu": {"alias": "belebele_tel_Telu", "acc,none": 0.2544444444444444, "acc_stderr,none": 0.014526354751055175, "acc_norm,none": 0.2544444444444444, "acc_norm_stderr,none": 0.014526354751055175}, "belebele_tha_Thai": {"alias": "belebele_tha_Thai", "acc,none": 0.23555555555555555, "acc_stderr,none": 0.014152716079132655, "acc_norm,none": 0.23555555555555555, "acc_norm_stderr,none": 0.014152716079132655}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.23, "acc_stderr,none": 0.014035549969945592, "acc_norm,none": 0.23, "acc_norm_stderr,none": 0.014035549969945592}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.2592113124875523, "acc_stderr,none": 0.004373062283376512, "acc_norm,none": 0.25941047600079664, "acc_norm_stderr,none": 0.004374153847826759}, "include_base_44_arabic": {"acc,none": 0.22282608695652173, "acc_stderr,none": 0.017692290186962837, "alias": "include_base_44_arabic"}, "include_base_44_arabic_few_shot_og_arts_humanities": {"alias": " - include_base_44_arabic_few_shot_og_arts_humanities", "acc,none": 0.24761904761904763, "acc_stderr,none": 0.04232473532055043}, "include_base_44_arabic_few_shot_og_business_commerce": {"alias": " - include_base_44_arabic_few_shot_og_business_commerce", "acc,none": 0.18292682926829268, "acc_stderr,none": 0.04295628044484376}, "include_base_44_arabic_few_shot_og_driving_license": {"alias": " - include_base_44_arabic_few_shot_og_driving_license", "acc,none": 0.29523809523809524, "acc_stderr,none": 0.044729159560441434}, "include_base_44_arabic_few_shot_og_general_knowledge": {"alias": " - include_base_44_arabic_few_shot_og_general_knowledge", "acc,none": 0.23809523809523808, "acc_stderr,none": 0.04176466758604904}, "include_base_44_arabic_few_shot_og_social_science": {"alias": " - include_base_44_arabic_few_shot_og_social_science", "acc,none": 0.1619047619047619, "acc_stderr,none": 0.036121005232589175}, "include_base_44_arabic_few_shot_og_stem": {"alias": " - include_base_44_arabic_few_shot_og_stem", "acc,none": 0.18, "acc_stderr,none": 0.0548839220351387}, "include_base_44_bengali": {"acc,none": 0.2664233576642336, "acc_stderr,none": 0.018922959767677546, "alias": "include_base_44_bengali"}, "include_base_44_bengali_few_shot_og_arts_humanities": {"alias": " - include_base_44_bengali_few_shot_og_arts_humanities", "acc,none": 0.28313253012048195, "acc_stderr,none": 0.03507295431370518}, "include_base_44_bengali_few_shot_og_general_knowledge": {"alias": " - include_base_44_bengali_few_shot_og_general_knowledge", "acc,none": 0.26506024096385544, "acc_stderr,none": 0.03436024037944967}, "include_base_44_bengali_few_shot_og_professional_certification": {"alias": " - include_base_44_bengali_few_shot_og_professional_certification", "acc,none": 0.23493975903614459, "acc_stderr,none": 0.03300533186128922}, "include_base_44_bengali_few_shot_og_stem": {"alias": " - include_base_44_bengali_few_shot_og_stem", "acc,none": 0.32, "acc_stderr,none": 0.06663945022680343}, "include_base_44_chinese": {"acc,none": 0.24587155963302754, "acc_stderr,none": 0.018517539247711934, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.049938324531150324}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.25287356321839083, "acc_stderr,none": 0.04687049503854671}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.323943661971831, "acc_stderr,none": 0.05593416612923642}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.23943661971830985, "acc_stderr,none": 0.05100514327793383}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.2413793103448276, "acc_stderr,none": 0.04614377668264891}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.051995626882956975}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.19718309859154928, "acc_stderr,none": 0.04755476905953276}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.1875, "acc_stderr,none": 0.10077822185373188}, "include_base_44_french": {"acc,none": 0.2529832935560859, "acc_stderr,none": 0.021175427759512762, "alias": "include_base_44_french"}, "include_base_44_french_few_shot_og_arts_humanities": {"alias": " - include_base_44_french_few_shot_og_arts_humanities", "acc,none": 0.22556390977443608, "acc_stderr,none": 0.025674657674902242}, "include_base_44_french_few_shot_og_driving_license": {"alias": " - include_base_44_french_few_shot_og_driving_license", "acc,none": 0.40425531914893614, "acc_stderr,none": 0.07235674844413013}, "include_base_44_french_few_shot_og_health_oriented_education": {"alias": " - include_base_44_french_few_shot_og_health_oriented_education", "acc,none": 0.1875, "acc_stderr,none": 0.10077822185373188}, "include_base_44_french_few_shot_og_social_science": {"alias": " - include_base_44_french_few_shot_og_social_science", "acc,none": 0.25675675675675674, "acc_stderr,none": 0.051128728837659454}, "include_base_44_french_few_shot_og_stem": {"alias": " - include_base_44_french_few_shot_og_stem", "acc,none": 0.3125, "acc_stderr,none": 0.11967838846954226}, "include_base_44_german": {"acc,none": 0.3237410071942446, "acc_stderr,none": 0.03810450206175078, "alias": "include_base_44_german"}, "include_base_44_german_few_shot_og_driving_license": {"alias": " - include_base_44_german_few_shot_og_driving_license", "acc,none": 0.6521739130434783, "acc_stderr,none": 0.10154334054280735}, "include_base_44_german_few_shot_og_social_science": {"alias": " - include_base_44_german_few_shot_og_social_science", "acc,none": 0.26373626373626374, "acc_stderr,none": 0.04644942852497396}, "include_base_44_german_few_shot_og_stem": {"alias": " - include_base_44_german_few_shot_og_stem", "acc,none": 0.24, "acc_stderr,none": 0.08717797887081347}, "include_base_44_hindi": {"acc,none": 0.25594149908592323, "acc_stderr,none": 0.01869444244723015, "alias": "include_base_44_hindi"}, "include_base_44_hindi_few_shot_og_applied_science": {"alias": " - include_base_44_hindi_few_shot_og_applied_science", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.04993832453115033}, "include_base_44_hindi_few_shot_og_arts_humanities": {"alias": " - include_base_44_hindi_few_shot_og_arts_humanities", "acc,none": 0.23958333333333334, "acc_stderr,none": 0.043791751172569386}, "include_base_44_hindi_few_shot_og_driving_license": {"alias": " - include_base_44_hindi_few_shot_og_driving_license", "acc,none": 0.2676056338028169, "acc_stderr,none": 0.05291406220869697}, "include_base_44_hindi_few_shot_og_general_knowledge": {"alias": " - include_base_44_hindi_few_shot_og_general_knowledge", "acc,none": 0.3380281690140845, "acc_stderr,none": 0.05653887739133513}, "include_base_44_hindi_few_shot_og_health_oriented_education": {"alias": " - include_base_44_hindi_few_shot_og_health_oriented_education", "acc,none": 0.19718309859154928, "acc_stderr,none": 0.047554769059532744}, "include_base_44_hindi_few_shot_og_professional_certification": {"alias": " - include_base_44_hindi_few_shot_og_professional_certification", "acc,none": 0.30985915492957744, "acc_stderr,none": 0.05527159681788331}, "include_base_44_hindi_few_shot_og_social_science": {"alias": " - include_base_44_hindi_few_shot_og_social_science", "acc,none": 0.23943661971830985, "acc_stderr,none": 0.05100514327793383}, "include_base_44_hindi_few_shot_og_stem": {"alias": " - include_base_44_hindi_few_shot_og_stem", "acc,none": 0.2, "acc_stderr,none": 0.08164965809277261}, "include_base_44_italian": {"acc,none": 0.2572992700729927, "acc_stderr,none": 0.018668571387726075, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.17142857142857143, "acc_stderr,none": 0.06463490595976273}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.2994011976047904, "acc_stderr,none": 0.03554736535384932}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.25, "acc_stderr,none": 0.1305582419667734}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.2903225806451613, "acc_stderr,none": 0.03657720706540911}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.20359281437125748, "acc_stderr,none": 0.031253219622833416}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.25, "acc_stderr,none": 0.1305582419667734}, "include_base_44_japanese": {"acc,none": 0.2654690618762475, "acc_stderr,none": 0.019774853452111045, "alias": "include_base_44_japanese"}, "include_base_44_japanese_few_shot_og_driving_license": {"alias": " - include_base_44_japanese_few_shot_og_driving_license", "acc,none": 0.24242424242424243, "acc_stderr,none": 0.04329004329004327}, "include_base_44_japanese_few_shot_og_medical_license": {"alias": " - include_base_44_japanese_few_shot_og_medical_license", "acc,none": 0.2835820895522388, "acc_stderr,none": 0.03187187537919797}, "include_base_44_japanese_few_shot_og_professional_certification": {"alias": " - include_base_44_japanese_few_shot_og_professional_certification", "acc,none": 0.25870646766169153, "acc_stderr,none": 0.03096590312357302}, "include_base_44_korean": {"acc,none": 0.23, "acc_stderr,none": 0.018857742649408614, "alias": "include_base_44_korean"}, "include_base_44_korean_few_shot_og_professional_certification": {"alias": " - include_base_44_korean_few_shot_og_professional_certification", "acc,none": 0.232, "acc_stderr,none": 0.02675007037486516}, "include_base_44_korean_few_shot_og_social_science": {"alias": " - include_base_44_korean_few_shot_og_social_science", "acc,none": 0.228, "acc_stderr,none": 0.026587432487268487}, "include_base_44_russian": {"acc,none": 0.266304347826087, "acc_stderr,none": 0.018662425786691965, "alias": "include_base_44_russian"}, "include_base_44_russian_few_shot_og_applied_science": {"alias": " - include_base_44_russian_few_shot_og_applied_science", "acc,none": 0.2028985507246377, "acc_stderr,none": 0.04876877147472662}, "include_base_44_russian_few_shot_og_arts_humanities": {"alias": " - include_base_44_russian_few_shot_og_arts_humanities", "acc,none": 0.2028985507246377, "acc_stderr,none": 0.0487687714747266}, "include_base_44_russian_few_shot_og_business_commerce": {"alias": " - include_base_44_russian_few_shot_og_business_commerce", "acc,none": 0.2608695652173913, "acc_stderr,none": 0.05324977701702559}, "include_base_44_russian_few_shot_og_driving_license": {"alias": " - include_base_44_russian_few_shot_og_driving_license", "acc,none": 0.21428571428571427, "acc_stderr,none": 0.07896725691322383}, "include_base_44_russian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_russian_few_shot_og_health_oriented_education", "acc,none": 0.38823529411764707, "acc_stderr,none": 0.05317409082203827}, "include_base_44_russian_few_shot_og_marine_license": {"alias": " - include_base_44_russian_few_shot_og_marine_license", "acc,none": 0.37681159420289856, "acc_stderr,none": 0.05876481248527006}, "include_base_44_russian_few_shot_og_social_science": {"alias": " - include_base_44_russian_few_shot_og_social_science", "acc,none": 0.2608695652173913, "acc_stderr,none": 0.05324977701702559}, "include_base_44_russian_few_shot_og_stem": {"alias": " - include_base_44_russian_few_shot_og_stem", "acc,none": 0.19148936170212766, "acc_stderr,none": 0.040801273706799875}, "include_base_44_spanish": {"acc,none": 0.2672727272727273, "acc_stderr,none": 0.018850411012442, "alias": "include_base_44_spanish"}, "include_base_44_spanish_few_shot_og_arts_humanities": {"alias": " - include_base_44_spanish_few_shot_og_arts_humanities", "acc,none": 0.228, "acc_stderr,none": 0.026587432487268466}, "include_base_44_spanish_few_shot_og_health_oriented_education": {"alias": " - include_base_44_spanish_few_shot_og_health_oriented_education", "acc,none": 0.32, "acc_stderr,none": 0.09521904571390466}, "include_base_44_spanish_few_shot_og_social_science": {"alias": " - include_base_44_spanish_few_shot_og_social_science", "acc,none": 0.288, "acc_stderr,none": 0.028697004587398222}, "include_base_44_spanish_few_shot_og_stem": {"alias": " - include_base_44_spanish_few_shot_og_stem", "acc,none": 0.4, "acc_stderr,none": 0.1}, "include_base_44_telugu": {"acc,none": 0.25364963503649635, "acc_stderr,none": 0.018600893459229006, "alias": "include_base_44_telugu"}, "include_base_44_telugu_few_shot_og_applied_science": {"alias": " - include_base_44_telugu_few_shot_og_applied_science", "acc,none": 0.27710843373493976, "acc_stderr,none": 0.03484331592680588}, "include_base_44_telugu_few_shot_og_arts_humanities": {"alias": " - include_base_44_telugu_few_shot_og_arts_humanities", "acc,none": 0.21465968586387435, "acc_stderr,none": 0.029787042849701982}, "include_base_44_telugu_few_shot_og_social_science": {"alias": " - include_base_44_telugu_few_shot_og_social_science", "acc,none": 0.28313253012048195, "acc_stderr,none": 0.03507295431370519}, "include_base_44_telugu_few_shot_og_stem": {"alias": " - include_base_44_telugu_few_shot_og_stem", "acc,none": 0.2, "acc_stderr,none": 0.08164965809277261}, "include_base_44_turkish": {"acc,none": 0.29014598540145986, "acc_stderr,none": 0.01938786453909353, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.3192771084337349, "acc_stderr,none": 0.036293353299478595}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.25903614457831325, "acc_stderr,none": 0.034106466140718564}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.26506024096385544, "acc_stderr,none": 0.03436024037944966}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.38, "acc_stderr,none": 0.06934092056863767}, "piqa": {"alias": "piqa", "acc,none": 0.5250272034820457, "acc_stderr,none": 0.011651200967623709, "acc_norm,none": 0.4972796517954298, "acc_norm_stderr,none": 0.011665651503000729}, "xnli_ar": {"alias": "xnli_ar", "acc,none": 0.3409638554216867, "acc_stderr,none": 0.009501591178361543}, "xnli_de": {"alias": "xnli_de", "acc,none": 0.3333333333333333, "acc_stderr,none": 0.009448900914617609}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.3204819277108434, "acc_stderr,none": 0.009353837689339908}, "xnli_es": {"alias": "xnli_es", "acc,none": 0.334136546184739, "acc_stderr,none": 0.009454577602463625}, "xnli_fr": {"alias": "xnli_fr", "acc,none": 0.3345381526104418, "acc_stderr,none": 0.009457404390939167}, "xnli_hi": {"alias": "xnli_hi", "acc,none": 0.3329317269076305, "acc_stderr,none": 0.009446051001358226}, "xnli_ru": {"alias": "xnli_ru", "acc,none": 0.3321285140562249, "acc_stderr,none": 0.009440328001240636}, "xnli_sw": {"alias": "xnli_sw", "acc,none": 0.334136546184739, "acc_stderr,none": 0.009454577602463625}, "xnli_th": {"alias": "xnli_th", "acc,none": 0.3321285140562249, "acc_stderr,none": 0.009440328001240637}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.334136546184739, "acc_stderr,none": 0.009454577602463621}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.3313253012048193, "acc_stderr,none": 0.009434574056101968}} +{"created_at": "2025-06-26T21:40:27.921350", "global_step": 14000, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.20648464163822525, "acc_stderr,none": 0.011828865619002316, "acc_norm,none": 0.25170648464163825, "acc_norm_stderr,none": 0.012682496334042965}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.26515151515151514, "acc_stderr,none": 0.009057621139172621, "acc_norm,none": 0.26136363636363635, "acc_norm_stderr,none": 0.009015838366608193}, "belebele_arb_Arab": {"alias": "belebele_arb_Arab", "acc,none": 0.2833333333333333, "acc_stderr,none": 0.015028913755536897, "acc_norm,none": 0.2833333333333333, "acc_norm_stderr,none": 0.015028913755536897}, "belebele_ben_Beng": {"alias": "belebele_ben_Beng", "acc,none": 0.25666666666666665, "acc_stderr,none": 0.014567891342380039, "acc_norm,none": 0.25666666666666665, "acc_norm_stderr,none": 0.014567891342380039}, "belebele_deu_Latn": {"alias": "belebele_deu_Latn", "acc,none": 0.23222222222222222, "acc_stderr,none": 0.014082825936214571, "acc_norm,none": 0.23222222222222222, "acc_norm_stderr,none": 0.014082825936214571}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.23, "acc_stderr,none": 0.014035549969945594, "acc_norm,none": 0.23, "acc_norm_stderr,none": 0.014035549969945594}, "belebele_fra_Latn": {"alias": "belebele_fra_Latn", "acc,none": 0.22666666666666666, "acc_stderr,none": 0.013963598349030491, "acc_norm,none": 0.22666666666666666, "acc_norm_stderr,none": 0.013963598349030491}, "belebele_hin_Deva": {"alias": "belebele_hin_Deva", "acc,none": 0.2411111111111111, "acc_stderr,none": 0.014266513886578942, "acc_norm,none": 0.2411111111111111, "acc_norm_stderr,none": 0.014266513886578942}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.23333333333333334, "acc_stderr,none": 0.014106258477755675, "acc_norm,none": 0.23333333333333334, "acc_norm_stderr,none": 0.014106258477755675}, "belebele_jpn_Jpan": {"alias": "belebele_jpn_Jpan", "acc,none": 0.27555555555555555, "acc_stderr,none": 0.014901407215241896, "acc_norm,none": 0.27555555555555555, "acc_norm_stderr,none": 0.014901407215241896}, "belebele_kor_Hang": {"alias": "belebele_kor_Hang", "acc,none": 0.28, "acc_stderr,none": 0.014974951276705704, "acc_norm,none": 0.28, "acc_norm_stderr,none": 0.014974951276705704}, "belebele_rus_Cyrl": {"alias": "belebele_rus_Cyrl", "acc,none": 0.2588888888888889, "acc_stderr,none": 0.014608933836168495, "acc_norm,none": 0.2588888888888889, "acc_norm_stderr,none": 0.014608933836168495}, "belebele_spa_Latn": {"alias": "belebele_spa_Latn", "acc,none": 0.24333333333333335, "acc_stderr,none": 0.014311107963683064, "acc_norm,none": 0.24333333333333335, "acc_norm_stderr,none": 0.014311107963683064}, "belebele_swh_Latn": {"alias": "belebele_swh_Latn", "acc,none": 0.23555555555555555, "acc_stderr,none": 0.014152716079132635, "acc_norm,none": 0.23555555555555555, "acc_norm_stderr,none": 0.014152716079132635}, "belebele_tel_Telu": {"alias": "belebele_tel_Telu", "acc,none": 0.26, "acc_stderr,none": 0.014629271097998376, "acc_norm,none": 0.26, "acc_norm_stderr,none": 0.014629271097998376}, "belebele_tha_Thai": {"alias": "belebele_tha_Thai", "acc,none": 0.27, "acc_stderr,none": 0.014806876915962114, "acc_norm,none": 0.27, "acc_norm_stderr,none": 0.014806876915962114}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.25, "acc_stderr,none": 0.014441782171967503, "acc_norm,none": 0.25, "acc_norm_stderr,none": 0.014441782171967503}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.26666666666666666, "acc_stderr,none": 0.014748750651470893, "acc_norm,none": 0.26666666666666666, "acc_norm_stderr,none": 0.014748750651470893}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.2591117307309301, "acc_stderr,none": 0.004372516060164747, "acc_norm,none": 0.26289583748257317, "acc_norm_stderr,none": 0.004393066760916823}, "include_base_44_arabic": {"acc,none": 0.2246376811594203, "acc_stderr,none": 0.017630248846994472, "alias": "include_base_44_arabic"}, "include_base_44_arabic_few_shot_og_arts_humanities": {"alias": " - include_base_44_arabic_few_shot_og_arts_humanities", "acc,none": 0.20952380952380953, "acc_stderr,none": 0.039906571509931855}, "include_base_44_arabic_few_shot_og_business_commerce": {"alias": " - include_base_44_arabic_few_shot_og_business_commerce", "acc,none": 0.2926829268292683, "acc_stderr,none": 0.050554900016741525}, "include_base_44_arabic_few_shot_og_driving_license": {"alias": " - include_base_44_arabic_few_shot_og_driving_license", "acc,none": 0.13333333333333333, "acc_stderr,none": 0.033333333333333305}, "include_base_44_arabic_few_shot_og_general_knowledge": {"alias": " - include_base_44_arabic_few_shot_og_general_knowledge", "acc,none": 0.19047619047619047, "acc_stderr,none": 0.038505120955363834}, "include_base_44_arabic_few_shot_og_social_science": {"alias": " - include_base_44_arabic_few_shot_og_social_science", "acc,none": 0.3238095238095238, "acc_stderr,none": 0.04588414718067475}, "include_base_44_arabic_few_shot_og_stem": {"alias": " - include_base_44_arabic_few_shot_og_stem", "acc,none": 0.2, "acc_stderr,none": 0.05714285714285715}, "include_base_44_bengali": {"acc,none": 0.2755474452554745, "acc_stderr,none": 0.019105223807520604, "alias": "include_base_44_bengali"}, "include_base_44_bengali_few_shot_og_arts_humanities": {"alias": " - include_base_44_bengali_few_shot_og_arts_humanities", "acc,none": 0.24096385542168675, "acc_stderr,none": 0.0332939411907353}, "include_base_44_bengali_few_shot_og_general_knowledge": {"alias": " - include_base_44_bengali_few_shot_og_general_knowledge", "acc,none": 0.25903614457831325, "acc_stderr,none": 0.03410646614071856}, "include_base_44_bengali_few_shot_og_professional_certification": {"alias": " - include_base_44_bengali_few_shot_og_professional_certification", "acc,none": 0.3132530120481928, "acc_stderr,none": 0.036108050180310235}, "include_base_44_bengali_few_shot_og_stem": {"alias": " - include_base_44_bengali_few_shot_og_stem", "acc,none": 0.32, "acc_stderr,none": 0.06663945022680343}, "include_base_44_chinese": {"acc,none": 0.25137614678899084, "acc_stderr,none": 0.018596046139621257, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.3380281690140845, "acc_stderr,none": 0.056538877391335146}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.21839080459770116, "acc_stderr,none": 0.044551545932103705}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.2676056338028169, "acc_stderr,none": 0.05291406220869697}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295697}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.1839080459770115, "acc_stderr,none": 0.04177540678018985}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.04993832453115031}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.2676056338028169, "acc_stderr,none": 0.05291406220869697}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.375, "acc_stderr,none": 0.125}, "include_base_44_french": {"acc,none": 0.23627684964200477, "acc_stderr,none": 0.02064873224907067, "alias": "include_base_44_french"}, "include_base_44_french_few_shot_og_arts_humanities": {"alias": " - include_base_44_french_few_shot_og_arts_humanities", "acc,none": 0.22180451127819548, "acc_stderr,none": 0.025521524031416658}, "include_base_44_french_few_shot_og_driving_license": {"alias": " - include_base_44_french_few_shot_og_driving_license", "acc,none": 0.40425531914893614, "acc_stderr,none": 0.07235674844413013}, "include_base_44_french_few_shot_og_health_oriented_education": {"alias": " - include_base_44_french_few_shot_og_health_oriented_education", "acc,none": 0.1875, "acc_stderr,none": 0.10077822185373188}, "include_base_44_french_few_shot_og_social_science": {"alias": " - include_base_44_french_few_shot_og_social_science", "acc,none": 0.21621621621621623, "acc_stderr,none": 0.04818155789706758}, "include_base_44_french_few_shot_og_stem": {"alias": " - include_base_44_french_few_shot_og_stem", "acc,none": 0.125, "acc_stderr,none": 0.08539125638299665}, "include_base_44_german": {"acc,none": 0.3381294964028777, "acc_stderr,none": 0.03975099612733313, "alias": "include_base_44_german"}, "include_base_44_german_few_shot_og_driving_license": {"alias": " - include_base_44_german_few_shot_og_driving_license", "acc,none": 0.5217391304347826, "acc_stderr,none": 0.10649955403405122}, "include_base_44_german_few_shot_og_social_science": {"alias": " - include_base_44_german_few_shot_og_social_science", "acc,none": 0.27472527472527475, "acc_stderr,none": 0.04705213398778438}, "include_base_44_german_few_shot_og_stem": {"alias": " - include_base_44_german_few_shot_og_stem", "acc,none": 0.4, "acc_stderr,none": 0.1}, "include_base_44_hindi": {"acc,none": 0.23948811700182815, "acc_stderr,none": 0.018224115506132697, "alias": "include_base_44_hindi"}, "include_base_44_hindi_few_shot_og_applied_science": {"alias": " - include_base_44_hindi_few_shot_og_applied_science", "acc,none": 0.29577464788732394, "acc_stderr,none": 0.05454906121418898}, "include_base_44_hindi_few_shot_og_arts_humanities": {"alias": " - include_base_44_hindi_few_shot_og_arts_humanities", "acc,none": 0.2708333333333333, "acc_stderr,none": 0.045593471241867974}, "include_base_44_hindi_few_shot_og_driving_license": {"alias": " - include_base_44_hindi_few_shot_og_driving_license", "acc,none": 0.2676056338028169, "acc_stderr,none": 0.05291406220869697}, "include_base_44_hindi_few_shot_og_general_knowledge": {"alias": " - include_base_44_hindi_few_shot_og_general_knowledge", "acc,none": 0.19718309859154928, "acc_stderr,none": 0.047554769059532744}, "include_base_44_hindi_few_shot_og_health_oriented_education": {"alias": " - include_base_44_hindi_few_shot_og_health_oriented_education", "acc,none": 0.14084507042253522, "acc_stderr,none": 0.04157742116654289}, "include_base_44_hindi_few_shot_og_professional_certification": {"alias": " - include_base_44_hindi_few_shot_og_professional_certification", "acc,none": 0.28169014084507044, "acc_stderr,none": 0.053764141713832536}, "include_base_44_hindi_few_shot_og_social_science": {"alias": " - include_base_44_hindi_few_shot_og_social_science", "acc,none": 0.18309859154929578, "acc_stderr,none": 0.046225147349214284}, "include_base_44_hindi_few_shot_og_stem": {"alias": " - include_base_44_hindi_few_shot_og_stem", "acc,none": 0.32, "acc_stderr,none": 0.09521904571390467}, "include_base_44_italian": {"acc,none": 0.23722627737226276, "acc_stderr,none": 0.01804356386028789, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.05714285714285714, "acc_stderr,none": 0.03980745977252778}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.30538922155688625, "acc_stderr,none": 0.03574732654202544}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.3333333333333333, "acc_stderr,none": 0.14213381090374033}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.25161290322580643, "acc_stderr,none": 0.034967874881680024}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.19161676646706588, "acc_stderr,none": 0.030547196475366606}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.16666666666666666, "acc_stderr,none": 0.11236664374387367}, "include_base_44_japanese": {"acc,none": 0.25349301397205587, "acc_stderr,none": 0.019493283530441777, "alias": "include_base_44_japanese"}, "include_base_44_japanese_few_shot_og_driving_license": {"alias": " - include_base_44_japanese_few_shot_og_driving_license", "acc,none": 0.25252525252525254, "acc_stderr,none": 0.043887175543277195}, "include_base_44_japanese_few_shot_og_medical_license": {"alias": " - include_base_44_japanese_few_shot_og_medical_license", "acc,none": 0.2537313432835821, "acc_stderr,none": 0.030769444967296014}, "include_base_44_japanese_few_shot_og_professional_certification": {"alias": " - include_base_44_japanese_few_shot_og_professional_certification", "acc,none": 0.2537313432835821, "acc_stderr,none": 0.030769444967296007}, "include_base_44_korean": {"acc,none": 0.268, "acc_stderr,none": 0.019834657915337835, "alias": "include_base_44_korean"}, "include_base_44_korean_few_shot_og_professional_certification": {"alias": " - include_base_44_korean_few_shot_og_professional_certification", "acc,none": 0.252, "acc_stderr,none": 0.027513851933031352}, "include_base_44_korean_few_shot_og_social_science": {"alias": " - include_base_44_korean_few_shot_og_social_science", "acc,none": 0.284, "acc_stderr,none": 0.028576958730437398}, "include_base_44_russian": {"acc,none": 0.27898550724637683, "acc_stderr,none": 0.01909233805592185, "alias": "include_base_44_russian"}, "include_base_44_russian_few_shot_og_applied_science": {"alias": " - include_base_44_russian_few_shot_og_applied_science", "acc,none": 0.2753623188405797, "acc_stderr,none": 0.05416992765191319}, "include_base_44_russian_few_shot_og_arts_humanities": {"alias": " - include_base_44_russian_few_shot_og_arts_humanities", "acc,none": 0.2028985507246377, "acc_stderr,none": 0.0487687714747266}, "include_base_44_russian_few_shot_og_business_commerce": {"alias": " - include_base_44_russian_few_shot_og_business_commerce", "acc,none": 0.3188405797101449, "acc_stderr,none": 0.056514087837646526}, "include_base_44_russian_few_shot_og_driving_license": {"alias": " - include_base_44_russian_few_shot_og_driving_license", "acc,none": 0.2857142857142857, "acc_stderr,none": 0.08694008849288351}, "include_base_44_russian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_russian_few_shot_og_health_oriented_education", "acc,none": 0.25882352941176473, "acc_stderr,none": 0.04778846120374094}, "include_base_44_russian_few_shot_og_marine_license": {"alias": " - include_base_44_russian_few_shot_og_marine_license", "acc,none": 0.36231884057971014, "acc_stderr,none": 0.05828983090868436}, "include_base_44_russian_few_shot_og_social_science": {"alias": " - include_base_44_russian_few_shot_og_social_science", "acc,none": 0.2028985507246377, "acc_stderr,none": 0.048768771474726594}, "include_base_44_russian_few_shot_og_stem": {"alias": " - include_base_44_russian_few_shot_og_stem", "acc,none": 0.3191489361702128, "acc_stderr,none": 0.04833717738272893}, "include_base_44_spanish": {"acc,none": 0.2672727272727273, "acc_stderr,none": 0.018874420686308634, "alias": "include_base_44_spanish"}, "include_base_44_spanish_few_shot_og_arts_humanities": {"alias": " - include_base_44_spanish_few_shot_og_arts_humanities", "acc,none": 0.228, "acc_stderr,none": 0.026587432487268466}, "include_base_44_spanish_few_shot_og_health_oriented_education": {"alias": " - include_base_44_spanish_few_shot_og_health_oriented_education", "acc,none": 0.32, "acc_stderr,none": 0.09521904571390466}, "include_base_44_spanish_few_shot_og_social_science": {"alias": " - include_base_44_spanish_few_shot_og_social_science", "acc,none": 0.296, "acc_stderr,none": 0.028928939388379638}, "include_base_44_spanish_few_shot_og_stem": {"alias": " - include_base_44_spanish_few_shot_og_stem", "acc,none": 0.32, "acc_stderr,none": 0.09521904571390466}, "include_base_44_telugu": {"acc,none": 0.2518248175182482, "acc_stderr,none": 0.01857933655745623, "alias": "include_base_44_telugu"}, "include_base_44_telugu_few_shot_og_applied_science": {"alias": " - include_base_44_telugu_few_shot_og_applied_science", "acc,none": 0.24096385542168675, "acc_stderr,none": 0.033293941190735296}, "include_base_44_telugu_few_shot_og_arts_humanities": {"alias": " - include_base_44_telugu_few_shot_og_arts_humanities", "acc,none": 0.23036649214659685, "acc_stderr,none": 0.030547441226520547}, "include_base_44_telugu_few_shot_og_social_science": {"alias": " - include_base_44_telugu_few_shot_og_social_science", "acc,none": 0.2891566265060241, "acc_stderr,none": 0.03529486801511115}, "include_base_44_telugu_few_shot_og_stem": {"alias": " - include_base_44_telugu_few_shot_og_stem", "acc,none": 0.24, "acc_stderr,none": 0.08717797887081347}, "include_base_44_turkish": {"acc,none": 0.26277372262773724, "acc_stderr,none": 0.01864879176622963, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.3373493975903614, "acc_stderr,none": 0.03680783690727581}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.18674698795180722, "acc_stderr,none": 0.030338749144500597}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.23493975903614459, "acc_stderr,none": 0.03300533186128922}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.36, "acc_stderr,none": 0.06857142857142856}, "piqa": {"alias": "piqa", "acc,none": 0.515778019586507, "acc_stderr,none": 0.011660014400426176, "acc_norm,none": 0.49510337323177367, "acc_norm_stderr,none": 0.011665264730078145}, "xnli_ar": {"alias": "xnli_ar", "acc,none": 0.3333333333333333, "acc_stderr,none": 0.009448900914617607}, "xnli_de": {"alias": "xnli_de", "acc,none": 0.3337349397590361, "acc_stderr,none": 0.009451743112667057}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.3144578313253012, "acc_stderr,none": 0.009306488524868907}, "xnli_es": {"alias": "xnli_es", "acc,none": 0.334136546184739, "acc_stderr,none": 0.009454577602463625}, "xnli_fr": {"alias": "xnli_fr", "acc,none": 0.3349397590361446, "acc_stderr,none": 0.009460223484996465}, "xnli_hi": {"alias": "xnli_hi", "acc,none": 0.3349397590361446, "acc_stderr,none": 0.009460223484996469}, "xnli_ru": {"alias": "xnli_ru", "acc,none": 0.336144578313253, "acc_stderr,none": 0.009468634669293527}, "xnli_sw": {"alias": "xnli_sw", "acc,none": 0.3405622489959839, "acc_stderr,none": 0.009498886690274447}, "xnli_th": {"alias": "xnli_th", "acc,none": 0.3397590361445783, "acc_stderr,none": 0.00949345492543825}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.3325301204819277, "acc_stderr,none": 0.00944319336590334}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.3285140562248996, "acc_stderr,none": 0.009414190734131758}} +{"created_at": "2025-06-26T22:08:25.006774", "global_step": 14500, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.197098976109215, "acc_stderr,none": 0.011625047669880621, "acc_norm,none": 0.2508532423208191, "acc_norm_stderr,none": 0.01266819862131543}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.26304713804713803, "acc_stderr,none": 0.009034514898865826, "acc_norm,none": 0.26557239057239057, "acc_norm_stderr,none": 0.009062210626971845}, "belebele_arb_Arab": {"alias": "belebele_arb_Arab", "acc,none": 0.27666666666666667, "acc_stderr,none": 0.014919965280947372, "acc_norm,none": 0.27666666666666667, "acc_norm_stderr,none": 0.014919965280947372}, "belebele_ben_Beng": {"alias": "belebele_ben_Beng", "acc,none": 0.24888888888888888, "acc_stderr,none": 0.014420323451642552, "acc_norm,none": 0.24888888888888888, "acc_norm_stderr,none": 0.014420323451642552}, "belebele_deu_Latn": {"alias": "belebele_deu_Latn", "acc,none": 0.2577777777777778, "acc_stderr,none": 0.014588474089651653, "acc_norm,none": 0.2577777777777778, "acc_norm_stderr,none": 0.014588474089651653}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.25222222222222224, "acc_stderr,none": 0.0144843198114339, "acc_norm,none": 0.25222222222222224, "acc_norm_stderr,none": 0.0144843198114339}, "belebele_fra_Latn": {"alias": "belebele_fra_Latn", "acc,none": 0.25222222222222224, "acc_stderr,none": 0.0144843198114339, "acc_norm,none": 0.25222222222222224, "acc_norm_stderr,none": 0.0144843198114339}, "belebele_hin_Deva": {"alias": "belebele_hin_Deva", "acc,none": 0.23222222222222222, "acc_stderr,none": 0.014082825936214571, "acc_norm,none": 0.23222222222222222, "acc_norm_stderr,none": 0.014082825936214571}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.26222222222222225, "acc_stderr,none": 0.014669580202217907, "acc_norm,none": 0.26222222222222225, "acc_norm_stderr,none": 0.014669580202217907}, "belebele_jpn_Jpan": {"alias": "belebele_jpn_Jpan", "acc,none": 0.27, "acc_stderr,none": 0.014806876915962114, "acc_norm,none": 0.27, "acc_norm_stderr,none": 0.014806876915962114}, "belebele_kor_Hang": {"alias": "belebele_kor_Hang", "acc,none": 0.2788888888888889, "acc_stderr,none": 0.014956736888683079, "acc_norm,none": 0.2788888888888889, "acc_norm_stderr,none": 0.014956736888683079}, "belebele_rus_Cyrl": {"alias": "belebele_rus_Cyrl", "acc,none": 0.2911111111111111, "acc_stderr,none": 0.015150906906440088, "acc_norm,none": 0.2911111111111111, "acc_norm_stderr,none": 0.015150906906440088}, "belebele_spa_Latn": {"alias": "belebele_spa_Latn", "acc,none": 0.25333333333333335, "acc_stderr,none": 0.014505399844356807, "acc_norm,none": 0.25333333333333335, "acc_norm_stderr,none": 0.014505399844356807}, "belebele_swh_Latn": {"alias": "belebele_swh_Latn", "acc,none": 0.24222222222222223, "acc_stderr,none": 0.014288876375699817, "acc_norm,none": 0.24222222222222223, "acc_norm_stderr,none": 0.014288876375699817}, "belebele_tel_Telu": {"alias": "belebele_tel_Telu", "acc,none": 0.2788888888888889, "acc_stderr,none": 0.014956736888683079, "acc_norm,none": 0.2788888888888889, "acc_norm_stderr,none": 0.014956736888683079}, "belebele_tha_Thai": {"alias": "belebele_tha_Thai", "acc,none": 0.28, "acc_stderr,none": 0.014974951276705704, "acc_norm,none": 0.28, "acc_norm_stderr,none": 0.014974951276705704}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.25666666666666665, "acc_stderr,none": 0.014567891342380051, "acc_norm,none": 0.25666666666666665, "acc_norm_stderr,none": 0.014567891342380051}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.26222222222222225, "acc_stderr,none": 0.014669580202217902, "acc_norm,none": 0.26222222222222225, "acc_norm_stderr,none": 0.014669580202217902}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.2599083847839076, "acc_stderr,none": 0.004376877619234112, "acc_norm,none": 0.2633937462656841, "acc_norm_stderr,none": 0.004395739495688581}, "include_base_44_arabic": {"acc,none": 0.22282608695652173, "acc_stderr,none": 0.017591835610197584, "alias": "include_base_44_arabic"}, "include_base_44_arabic_few_shot_og_arts_humanities": {"alias": " - include_base_44_arabic_few_shot_og_arts_humanities", "acc,none": 0.2, "acc_stderr,none": 0.03922322702763677}, "include_base_44_arabic_few_shot_og_business_commerce": {"alias": " - include_base_44_arabic_few_shot_og_business_commerce", "acc,none": 0.2804878048780488, "acc_stderr,none": 0.049915285789685265}, "include_base_44_arabic_few_shot_og_driving_license": {"alias": " - include_base_44_arabic_few_shot_og_driving_license", "acc,none": 0.13333333333333333, "acc_stderr,none": 0.033333333333333305}, "include_base_44_arabic_few_shot_og_general_knowledge": {"alias": " - include_base_44_arabic_few_shot_og_general_knowledge", "acc,none": 0.2, "acc_stderr,none": 0.039223227027636774}, "include_base_44_arabic_few_shot_og_social_science": {"alias": " - include_base_44_arabic_few_shot_og_social_science", "acc,none": 0.3238095238095238, "acc_stderr,none": 0.045884147180674746}, "include_base_44_arabic_few_shot_og_stem": {"alias": " - include_base_44_arabic_few_shot_og_stem", "acc,none": 0.2, "acc_stderr,none": 0.05714285714285715}, "include_base_44_bengali": {"acc,none": 0.25, "acc_stderr,none": 0.01854153378413327, "alias": "include_base_44_bengali"}, "include_base_44_bengali_few_shot_og_arts_humanities": {"alias": " - include_base_44_bengali_few_shot_og_arts_humanities", "acc,none": 0.23493975903614459, "acc_stderr,none": 0.03300533186128922}, "include_base_44_bengali_few_shot_og_general_knowledge": {"alias": " - include_base_44_bengali_few_shot_og_general_knowledge", "acc,none": 0.23493975903614459, "acc_stderr,none": 0.03300533186128922}, "include_base_44_bengali_few_shot_og_professional_certification": {"alias": " - include_base_44_bengali_few_shot_og_professional_certification", "acc,none": 0.28313253012048195, "acc_stderr,none": 0.03507295431370519}, "include_base_44_bengali_few_shot_og_stem": {"alias": " - include_base_44_bengali_few_shot_og_stem", "acc,none": 0.24, "acc_stderr,none": 0.0610118757258932}, "include_base_44_chinese": {"acc,none": 0.24587155963302754, "acc_stderr,none": 0.018407536446484705, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.36619718309859156, "acc_stderr,none": 0.05758184314388001}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.22988505747126436, "acc_stderr,none": 0.045371581852507746}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.29577464788732394, "acc_stderr,none": 0.05454906121418898}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.18309859154929578, "acc_stderr,none": 0.04622514734921428}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.19540229885057472, "acc_stderr,none": 0.04275678110973872}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.2112676056338028, "acc_stderr,none": 0.048790163593489484}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.23943661971830985, "acc_stderr,none": 0.05100514327793383}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.3125, "acc_stderr,none": 0.11967838846954226}, "include_base_44_french": {"acc,none": 0.26014319809069214, "acc_stderr,none": 0.021470906013381093, "alias": "include_base_44_french"}, "include_base_44_french_few_shot_og_arts_humanities": {"alias": " - include_base_44_french_few_shot_og_arts_humanities", "acc,none": 0.24812030075187969, "acc_stderr,none": 0.026532757760263436}, "include_base_44_french_few_shot_og_driving_license": {"alias": " - include_base_44_french_few_shot_og_driving_license", "acc,none": 0.3404255319148936, "acc_stderr,none": 0.06986570800554745}, "include_base_44_french_few_shot_og_health_oriented_education": {"alias": " - include_base_44_french_few_shot_og_health_oriented_education", "acc,none": 0.25, "acc_stderr,none": 0.11180339887498948}, "include_base_44_french_few_shot_og_social_science": {"alias": " - include_base_44_french_few_shot_og_social_science", "acc,none": 0.28378378378378377, "acc_stderr,none": 0.05276603149821338}, "include_base_44_french_few_shot_og_stem": {"alias": " - include_base_44_french_few_shot_og_stem", "acc,none": 0.125, "acc_stderr,none": 0.08539125638299665}, "include_base_44_german": {"acc,none": 0.33093525179856115, "acc_stderr,none": 0.040255704489961144, "alias": "include_base_44_german"}, "include_base_44_german_few_shot_og_driving_license": {"alias": " - include_base_44_german_few_shot_og_driving_license", "acc,none": 0.2608695652173913, "acc_stderr,none": 0.09361833424764437}, "include_base_44_german_few_shot_og_social_science": {"alias": " - include_base_44_german_few_shot_og_social_science", "acc,none": 0.34065934065934067, "acc_stderr,none": 0.049956709512768704}, "include_base_44_german_few_shot_og_stem": {"alias": " - include_base_44_german_few_shot_og_stem", "acc,none": 0.36, "acc_stderr,none": 0.09797958971132713}, "include_base_44_hindi": {"acc,none": 0.2449725776965265, "acc_stderr,none": 0.0184423159079951, "alias": "include_base_44_hindi"}, "include_base_44_hindi_few_shot_og_applied_science": {"alias": " - include_base_44_hindi_few_shot_og_applied_science", "acc,none": 0.28169014084507044, "acc_stderr,none": 0.05376414171383254}, "include_base_44_hindi_few_shot_og_arts_humanities": {"alias": " - include_base_44_hindi_few_shot_og_arts_humanities", "acc,none": 0.25, "acc_stderr,none": 0.04442616583193193}, "include_base_44_hindi_few_shot_og_driving_license": {"alias": " - include_base_44_hindi_few_shot_og_driving_license", "acc,none": 0.2112676056338028, "acc_stderr,none": 0.048790163593489484}, "include_base_44_hindi_few_shot_og_general_knowledge": {"alias": " - include_base_44_hindi_few_shot_og_general_knowledge", "acc,none": 0.30985915492957744, "acc_stderr,none": 0.05527159681788331}, "include_base_44_hindi_few_shot_og_health_oriented_education": {"alias": " - include_base_44_hindi_few_shot_og_health_oriented_education", "acc,none": 0.23943661971830985, "acc_stderr,none": 0.05100514327793383}, "include_base_44_hindi_few_shot_og_professional_certification": {"alias": " - include_base_44_hindi_few_shot_og_professional_certification", "acc,none": 0.16901408450704225, "acc_stderr,none": 0.04479290819909662}, "include_base_44_hindi_few_shot_og_social_science": {"alias": " - include_base_44_hindi_few_shot_og_social_science", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295697}, "include_base_44_hindi_few_shot_og_stem": {"alias": " - include_base_44_hindi_few_shot_og_stem", "acc,none": 0.24, "acc_stderr,none": 0.08717797887081345}, "include_base_44_italian": {"acc,none": 0.26277372262773724, "acc_stderr,none": 0.01886205868646362, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.22857142857142856, "acc_stderr,none": 0.0720144043214405}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.2874251497005988, "acc_stderr,none": 0.035125586477990835}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.3333333333333333, "acc_stderr,none": 0.14213381090374033}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.27741935483870966, "acc_stderr,none": 0.03607872492487903}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.23353293413173654, "acc_stderr,none": 0.03283724952964297}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.16666666666666666, "acc_stderr,none": 0.11236664374387367}, "include_base_44_japanese": {"acc,none": 0.249500998003992, "acc_stderr,none": 0.019379840623224263, "alias": "include_base_44_japanese"}, "include_base_44_japanese_few_shot_og_driving_license": {"alias": " - include_base_44_japanese_few_shot_og_driving_license", "acc,none": 0.2727272727272727, "acc_stderr,none": 0.04498833266412666}, "include_base_44_japanese_few_shot_og_medical_license": {"alias": " - include_base_44_japanese_few_shot_og_medical_license", "acc,none": 0.2537313432835821, "acc_stderr,none": 0.030769444967296018}, "include_base_44_japanese_few_shot_og_professional_certification": {"alias": " - include_base_44_japanese_few_shot_og_professional_certification", "acc,none": 0.23383084577114427, "acc_stderr,none": 0.029929415408348377}, "include_base_44_korean": {"acc,none": 0.266, "acc_stderr,none": 0.01979046868071624, "alias": "include_base_44_korean"}, "include_base_44_korean_few_shot_og_professional_certification": {"alias": " - include_base_44_korean_few_shot_og_professional_certification", "acc,none": 0.252, "acc_stderr,none": 0.027513851933031352}, "include_base_44_korean_few_shot_og_social_science": {"alias": " - include_base_44_korean_few_shot_og_social_science", "acc,none": 0.28, "acc_stderr,none": 0.02845414827783233}, "include_base_44_russian": {"acc,none": 0.3007246376811594, "acc_stderr,none": 0.01946163447171306, "alias": "include_base_44_russian"}, "include_base_44_russian_few_shot_og_applied_science": {"alias": " - include_base_44_russian_few_shot_og_applied_science", "acc,none": 0.37681159420289856, "acc_stderr,none": 0.058764812485270074}, "include_base_44_russian_few_shot_og_arts_humanities": {"alias": " - include_base_44_russian_few_shot_og_arts_humanities", "acc,none": 0.34782608695652173, "acc_stderr,none": 0.05775749253522359}, "include_base_44_russian_few_shot_og_business_commerce": {"alias": " - include_base_44_russian_few_shot_og_business_commerce", "acc,none": 0.36231884057971014, "acc_stderr,none": 0.05828983090868436}, "include_base_44_russian_few_shot_og_driving_license": {"alias": " - include_base_44_russian_few_shot_og_driving_license", "acc,none": 0.10714285714285714, "acc_stderr,none": 0.05952380952380953}, "include_base_44_russian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_russian_few_shot_og_health_oriented_education", "acc,none": 0.27058823529411763, "acc_stderr,none": 0.04847314453023652}, "include_base_44_russian_few_shot_og_marine_license": {"alias": " - include_base_44_russian_few_shot_og_marine_license", "acc,none": 0.2318840579710145, "acc_stderr,none": 0.05117930441535768}, "include_base_44_russian_few_shot_og_social_science": {"alias": " - include_base_44_russian_few_shot_og_social_science", "acc,none": 0.2753623188405797, "acc_stderr,none": 0.0541699276519132}, "include_base_44_russian_few_shot_og_stem": {"alias": " - include_base_44_russian_few_shot_og_stem", "acc,none": 0.3191489361702128, "acc_stderr,none": 0.04833717738272893}, "include_base_44_spanish": {"acc,none": 0.26, "acc_stderr,none": 0.01871140160748388, "alias": "include_base_44_spanish"}, "include_base_44_spanish_few_shot_og_arts_humanities": {"alias": " - include_base_44_spanish_few_shot_og_arts_humanities", "acc,none": 0.224, "acc_stderr,none": 0.026421361687347905}, "include_base_44_spanish_few_shot_og_health_oriented_education": {"alias": " - include_base_44_spanish_few_shot_og_health_oriented_education", "acc,none": 0.32, "acc_stderr,none": 0.09521904571390466}, "include_base_44_spanish_few_shot_og_social_science": {"alias": " - include_base_44_spanish_few_shot_og_social_science", "acc,none": 0.292, "acc_stderr,none": 0.028814320402205638}, "include_base_44_spanish_few_shot_og_stem": {"alias": " - include_base_44_spanish_few_shot_og_stem", "acc,none": 0.24, "acc_stderr,none": 0.08717797887081345}, "include_base_44_telugu": {"acc,none": 0.2518248175182482, "acc_stderr,none": 0.01856935441889728, "alias": "include_base_44_telugu"}, "include_base_44_telugu_few_shot_og_applied_science": {"alias": " - include_base_44_telugu_few_shot_og_applied_science", "acc,none": 0.2289156626506024, "acc_stderr,none": 0.03270745277352477}, "include_base_44_telugu_few_shot_og_arts_humanities": {"alias": " - include_base_44_telugu_few_shot_og_arts_humanities", "acc,none": 0.2356020942408377, "acc_stderr,none": 0.030787364755364144}, "include_base_44_telugu_few_shot_og_social_science": {"alias": " - include_base_44_telugu_few_shot_og_social_science", "acc,none": 0.29518072289156627, "acc_stderr,none": 0.03550920185689629}, "include_base_44_telugu_few_shot_og_stem": {"alias": " - include_base_44_telugu_few_shot_og_stem", "acc,none": 0.24, "acc_stderr,none": 0.08717797887081347}, "include_base_44_turkish": {"acc,none": 0.24452554744525548, "acc_stderr,none": 0.01835058971086986, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.25301204819277107, "acc_stderr,none": 0.03384429155233135}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.21686746987951808, "acc_stderr,none": 0.03208284450356365}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.2289156626506024, "acc_stderr,none": 0.03270745277352477}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.36, "acc_stderr,none": 0.06857142857142856}, "piqa": {"alias": "piqa", "acc,none": 0.5261153427638737, "acc_stderr,none": 0.011649900854263422, "acc_norm,none": 0.5087051142546246, "acc_norm_stderr,none": 0.011664055982032843}, "xnli_ar": {"alias": "xnli_ar", "acc,none": 0.3453815261044177, "acc_stderr,none": 0.009530841175865183}, "xnli_de": {"alias": "xnli_de", "acc,none": 0.3337349397590361, "acc_stderr,none": 0.009451743112667057}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.3196787148594378, "acc_stderr,none": 0.009347628444867175}, "xnli_es": {"alias": "xnli_es", "acc,none": 0.3373493975903614, "acc_stderr,none": 0.009476976849778591}, "xnli_fr": {"alias": "xnli_fr", "acc,none": 0.3365461847389558, "acc_stderr,none": 0.009471423054177133}, "xnli_hi": {"alias": "xnli_hi", "acc,none": 0.3345381526104418, "acc_stderr,none": 0.009457404390939166}, "xnli_ru": {"alias": "xnli_ru", "acc,none": 0.3377510040160643, "acc_stderr,none": 0.009479742273956477}, "xnli_sw": {"alias": "xnli_sw", "acc,none": 0.321285140562249, "acc_stderr,none": 0.0093600151224269}, "xnli_th": {"alias": "xnli_th", "acc,none": 0.3309236947791165, "acc_stderr,none": 0.009431685461463312}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.3329317269076305, "acc_stderr,none": 0.009446051001358225}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.3305220883534137, "acc_stderr,none": 0.009428789109289825}} +{"created_at": "2025-06-26T22:33:26.102350", "global_step": 15000, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.20136518771331058, "acc_stderr,none": 0.011718927477444267, "acc_norm,none": 0.24658703071672355, "acc_norm_stderr,none": 0.012595726268790132}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.2638888888888889, "acc_stderr,none": 0.009043789220055148, "acc_norm,none": 0.26557239057239057, "acc_norm_stderr,none": 0.009062210626971845}, "belebele_arb_Arab": {"alias": "belebele_arb_Arab", "acc,none": 0.2788888888888889, "acc_stderr,none": 0.014956736888683087, "acc_norm,none": 0.2788888888888889, "acc_norm_stderr,none": 0.014956736888683087}, "belebele_ben_Beng": {"alias": "belebele_ben_Beng", "acc,none": 0.28888888888888886, "acc_stderr,none": 0.015116606414982338, "acc_norm,none": 0.28888888888888886, "acc_norm_stderr,none": 0.015116606414982338}, "belebele_deu_Latn": {"alias": "belebele_deu_Latn", "acc,none": 0.24444444444444444, "acc_stderr,none": 0.014333209259632397, "acc_norm,none": 0.24444444444444444, "acc_norm_stderr,none": 0.014333209259632397}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.24444444444444444, "acc_stderr,none": 0.014333209259632428, "acc_norm,none": 0.24444444444444444, "acc_norm_stderr,none": 0.014333209259632428}, "belebele_fra_Latn": {"alias": "belebele_fra_Latn", "acc,none": 0.2677777777777778, "acc_stderr,none": 0.014768244481214544, "acc_norm,none": 0.2677777777777778, "acc_norm_stderr,none": 0.014768244481214544}, "belebele_hin_Deva": {"alias": "belebele_hin_Deva", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884525, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884525}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.26666666666666666, "acc_stderr,none": 0.014748750651470898, "acc_norm,none": 0.26666666666666666, "acc_norm_stderr,none": 0.014748750651470898}, "belebele_jpn_Jpan": {"alias": "belebele_jpn_Jpan", "acc,none": 0.25222222222222224, "acc_stderr,none": 0.014484319811433903, "acc_norm,none": 0.25222222222222224, "acc_norm_stderr,none": 0.014484319811433903}, "belebele_kor_Hang": {"alias": "belebele_kor_Hang", "acc,none": 0.27666666666666667, "acc_stderr,none": 0.014919965280947384, "acc_norm,none": 0.27666666666666667, "acc_norm_stderr,none": 0.014919965280947384}, "belebele_rus_Cyrl": {"alias": "belebele_rus_Cyrl", "acc,none": 0.27444444444444444, "acc_stderr,none": 0.01488273373639156, "acc_norm,none": 0.27444444444444444, "acc_norm_stderr,none": 0.01488273373639156}, "belebele_spa_Latn": {"alias": "belebele_spa_Latn", "acc,none": 0.2633333333333333, "acc_stderr,none": 0.014689553047342534, "acc_norm,none": 0.2633333333333333, "acc_norm_stderr,none": 0.014689553047342534}, "belebele_swh_Latn": {"alias": "belebele_swh_Latn", "acc,none": 0.25555555555555554, "acc_stderr,none": 0.014547185072254276, "acc_norm,none": 0.25555555555555554, "acc_norm_stderr,none": 0.014547185072254276}, "belebele_tel_Telu": {"alias": "belebele_tel_Telu", "acc,none": 0.2811111111111111, "acc_stderr,none": 0.01499305194369019, "acc_norm,none": 0.2811111111111111, "acc_norm_stderr,none": 0.01499305194369019}, "belebele_tha_Thai": {"alias": "belebele_tha_Thai", "acc,none": 0.2788888888888889, "acc_stderr,none": 0.014956736888683087, "acc_norm,none": 0.2788888888888889, "acc_norm_stderr,none": 0.014956736888683087}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.27444444444444444, "acc_stderr,none": 0.014882733736391575, "acc_norm,none": 0.27444444444444444, "acc_norm_stderr,none": 0.014882733736391575}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.2477777777777778, "acc_stderr,none": 0.014398737377336086, "acc_norm,none": 0.2477777777777778, "acc_norm_stderr,none": 0.014398737377336086}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.2586138219478192, "acc_stderr,none": 0.004369780529824011, "acc_norm,none": 0.25941047600079664, "acc_norm_stderr,none": 0.004374153847826759}, "include_base_44_arabic": {"acc,none": 0.2210144927536232, "acc_stderr,none": 0.017549408179868175, "alias": "include_base_44_arabic"}, "include_base_44_arabic_few_shot_og_arts_humanities": {"alias": " - include_base_44_arabic_few_shot_og_arts_humanities", "acc,none": 0.18095238095238095, "acc_stderr,none": 0.03775026958386238}, "include_base_44_arabic_few_shot_og_business_commerce": {"alias": " - include_base_44_arabic_few_shot_og_business_commerce", "acc,none": 0.2926829268292683, "acc_stderr,none": 0.05055490001674152}, "include_base_44_arabic_few_shot_og_driving_license": {"alias": " - include_base_44_arabic_few_shot_og_driving_license", "acc,none": 0.14285714285714285, "acc_stderr,none": 0.03431317581537584}, "include_base_44_arabic_few_shot_og_general_knowledge": {"alias": " - include_base_44_arabic_few_shot_og_general_knowledge", "acc,none": 0.2, "acc_stderr,none": 0.039223227027636774}, "include_base_44_arabic_few_shot_og_social_science": {"alias": " - include_base_44_arabic_few_shot_og_social_science", "acc,none": 0.3142857142857143, "acc_stderr,none": 0.045521571818039494}, "include_base_44_arabic_few_shot_og_stem": {"alias": " - include_base_44_arabic_few_shot_og_stem", "acc,none": 0.2, "acc_stderr,none": 0.05714285714285715}, "include_base_44_bengali": {"acc,none": 0.24087591240875914, "acc_stderr,none": 0.01832171580564021, "alias": "include_base_44_bengali"}, "include_base_44_bengali_few_shot_og_arts_humanities": {"alias": " - include_base_44_bengali_few_shot_og_arts_humanities", "acc,none": 0.23493975903614459, "acc_stderr,none": 0.03300533186128922}, "include_base_44_bengali_few_shot_og_general_knowledge": {"alias": " - include_base_44_bengali_few_shot_og_general_knowledge", "acc,none": 0.22289156626506024, "acc_stderr,none": 0.03240004825594687}, "include_base_44_bengali_few_shot_og_professional_certification": {"alias": " - include_base_44_bengali_few_shot_og_professional_certification", "acc,none": 0.25903614457831325, "acc_stderr,none": 0.03410646614071856}, "include_base_44_bengali_few_shot_og_stem": {"alias": " - include_base_44_bengali_few_shot_og_stem", "acc,none": 0.26, "acc_stderr,none": 0.06266203485560372}, "include_base_44_chinese": {"acc,none": 0.24954128440366974, "acc_stderr,none": 0.018487692392233034, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.36619718309859156, "acc_stderr,none": 0.05758184314388001}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.21839080459770116, "acc_stderr,none": 0.044551545932103705}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.29577464788732394, "acc_stderr,none": 0.05454906121418898}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.18309859154929578, "acc_stderr,none": 0.046225147349214284}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.20689655172413793, "acc_stderr,none": 0.04368097459950702}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.2112676056338028, "acc_stderr,none": 0.048790163593489484}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.051995626882956975}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.375, "acc_stderr,none": 0.125}, "include_base_44_french": {"acc,none": 0.2720763723150358, "acc_stderr,none": 0.021819431123793104, "alias": "include_base_44_french"}, "include_base_44_french_few_shot_og_arts_humanities": {"alias": " - include_base_44_french_few_shot_og_arts_humanities", "acc,none": 0.2593984962406015, "acc_stderr,none": 0.026924837126657657}, "include_base_44_french_few_shot_og_driving_license": {"alias": " - include_base_44_french_few_shot_og_driving_license", "acc,none": 0.2978723404255319, "acc_stderr,none": 0.06742861107915607}, "include_base_44_french_few_shot_og_health_oriented_education": {"alias": " - include_base_44_french_few_shot_og_health_oriented_education", "acc,none": 0.25, "acc_stderr,none": 0.11180339887498948}, "include_base_44_french_few_shot_og_social_science": {"alias": " - include_base_44_french_few_shot_og_social_science", "acc,none": 0.32432432432432434, "acc_stderr,none": 0.05478951716752587}, "include_base_44_french_few_shot_og_stem": {"alias": " - include_base_44_french_few_shot_og_stem", "acc,none": 0.1875, "acc_stderr,none": 0.10077822185373188}, "include_base_44_german": {"acc,none": 0.35251798561151076, "acc_stderr,none": 0.04081499260984099, "alias": "include_base_44_german"}, "include_base_44_german_few_shot_og_driving_license": {"alias": " - include_base_44_german_few_shot_og_driving_license", "acc,none": 0.2608695652173913, "acc_stderr,none": 0.09361833424764437}, "include_base_44_german_few_shot_og_social_science": {"alias": " - include_base_44_german_few_shot_og_social_science", "acc,none": 0.37362637362637363, "acc_stderr,none": 0.0509934316638677}, "include_base_44_german_few_shot_og_stem": {"alias": " - include_base_44_german_few_shot_og_stem", "acc,none": 0.36, "acc_stderr,none": 0.09797958971132713}, "include_base_44_hindi": {"acc,none": 0.22851919561243145, "acc_stderr,none": 0.018043316221757512, "alias": "include_base_44_hindi"}, "include_base_44_hindi_few_shot_og_applied_science": {"alias": " - include_base_44_hindi_few_shot_og_applied_science", "acc,none": 0.2676056338028169, "acc_stderr,none": 0.05291406220869697}, "include_base_44_hindi_few_shot_og_arts_humanities": {"alias": " - include_base_44_hindi_few_shot_og_arts_humanities", "acc,none": 0.17708333333333334, "acc_stderr,none": 0.03916561682056947}, "include_base_44_hindi_few_shot_og_driving_license": {"alias": " - include_base_44_hindi_few_shot_og_driving_license", "acc,none": 0.2112676056338028, "acc_stderr,none": 0.0487901635934895}, "include_base_44_hindi_few_shot_og_general_knowledge": {"alias": " - include_base_44_hindi_few_shot_og_general_knowledge", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295696}, "include_base_44_hindi_few_shot_og_health_oriented_education": {"alias": " - include_base_44_hindi_few_shot_og_health_oriented_education", "acc,none": 0.23943661971830985, "acc_stderr,none": 0.05100514327793383}, "include_base_44_hindi_few_shot_og_professional_certification": {"alias": " - include_base_44_hindi_few_shot_og_professional_certification", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.049938324531150324}, "include_base_44_hindi_few_shot_og_social_science": {"alias": " - include_base_44_hindi_few_shot_og_social_science", "acc,none": 0.23943661971830985, "acc_stderr,none": 0.05100514327793383}, "include_base_44_hindi_few_shot_og_stem": {"alias": " - include_base_44_hindi_few_shot_og_stem", "acc,none": 0.24, "acc_stderr,none": 0.08717797887081345}, "include_base_44_italian": {"acc,none": 0.2591240875912409, "acc_stderr,none": 0.018777195581503568, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.34285714285714286, "acc_stderr,none": 0.08140424227436861}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.2634730538922156, "acc_stderr,none": 0.034190730421806675}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.3333333333333333, "acc_stderr,none": 0.14213381090374033}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.25806451612903225, "acc_stderr,none": 0.03526036935484004}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.23952095808383234, "acc_stderr,none": 0.03312541599851863}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.16666666666666666, "acc_stderr,none": 0.11236664374387367}, "include_base_44_japanese": {"acc,none": 0.25149700598802394, "acc_stderr,none": 0.019393212126218518, "alias": "include_base_44_japanese"}, "include_base_44_japanese_few_shot_og_driving_license": {"alias": " - include_base_44_japanese_few_shot_og_driving_license", "acc,none": 0.2727272727272727, "acc_stderr,none": 0.04498833266412666}, "include_base_44_japanese_few_shot_og_medical_license": {"alias": " - include_base_44_japanese_few_shot_og_medical_license", "acc,none": 0.27860696517412936, "acc_stderr,none": 0.031700561834973086}, "include_base_44_japanese_few_shot_og_professional_certification": {"alias": " - include_base_44_japanese_few_shot_og_professional_certification", "acc,none": 0.21393034825870647, "acc_stderr,none": 0.028996909693328927}, "include_base_44_korean": {"acc,none": 0.266, "acc_stderr,none": 0.01979046868071624, "alias": "include_base_44_korean"}, "include_base_44_korean_few_shot_og_professional_certification": {"alias": " - include_base_44_korean_few_shot_og_professional_certification", "acc,none": 0.252, "acc_stderr,none": 0.027513851933031352}, "include_base_44_korean_few_shot_og_social_science": {"alias": " - include_base_44_korean_few_shot_og_social_science", "acc,none": 0.28, "acc_stderr,none": 0.02845414827783233}, "include_base_44_russian": {"acc,none": 0.2971014492753623, "acc_stderr,none": 0.01946624698555657, "alias": "include_base_44_russian"}, "include_base_44_russian_few_shot_og_applied_science": {"alias": " - include_base_44_russian_few_shot_og_applied_science", "acc,none": 0.3333333333333333, "acc_stderr,none": 0.05716619504750293}, "include_base_44_russian_few_shot_og_arts_humanities": {"alias": " - include_base_44_russian_few_shot_og_arts_humanities", "acc,none": 0.391304347826087, "acc_stderr,none": 0.05918381823737157}, "include_base_44_russian_few_shot_og_business_commerce": {"alias": " - include_base_44_russian_few_shot_og_business_commerce", "acc,none": 0.3188405797101449, "acc_stderr,none": 0.05651408783764653}, "include_base_44_russian_few_shot_og_driving_license": {"alias": " - include_base_44_russian_few_shot_og_driving_license", "acc,none": 0.17857142857142858, "acc_stderr,none": 0.07370704611510598}, "include_base_44_russian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_russian_few_shot_og_health_oriented_education", "acc,none": 0.27058823529411763, "acc_stderr,none": 0.048473144530236524}, "include_base_44_russian_few_shot_og_marine_license": {"alias": " - include_base_44_russian_few_shot_og_marine_license", "acc,none": 0.2898550724637681, "acc_stderr,none": 0.05501859437429688}, "include_base_44_russian_few_shot_og_social_science": {"alias": " - include_base_44_russian_few_shot_og_social_science", "acc,none": 0.2318840579710145, "acc_stderr,none": 0.05117930441535768}, "include_base_44_russian_few_shot_og_stem": {"alias": " - include_base_44_russian_few_shot_og_stem", "acc,none": 0.2978723404255319, "acc_stderr,none": 0.04742219538812545}, "include_base_44_spanish": {"acc,none": 0.2727272727272727, "acc_stderr,none": 0.018986649006416063, "alias": "include_base_44_spanish"}, "include_base_44_spanish_few_shot_og_arts_humanities": {"alias": " - include_base_44_spanish_few_shot_og_arts_humanities", "acc,none": 0.24, "acc_stderr,none": 0.027065293652239007}, "include_base_44_spanish_few_shot_og_health_oriented_education": {"alias": " - include_base_44_spanish_few_shot_og_health_oriented_education", "acc,none": 0.24, "acc_stderr,none": 0.08717797887081347}, "include_base_44_spanish_few_shot_og_social_science": {"alias": " - include_base_44_spanish_few_shot_og_social_science", "acc,none": 0.296, "acc_stderr,none": 0.028928939388379638}, "include_base_44_spanish_few_shot_og_stem": {"alias": " - include_base_44_spanish_few_shot_og_stem", "acc,none": 0.4, "acc_stderr,none": 0.1}, "include_base_44_telugu": {"acc,none": 0.26824817518248173, "acc_stderr,none": 0.018907796361025974, "alias": "include_base_44_telugu"}, "include_base_44_telugu_few_shot_og_applied_science": {"alias": " - include_base_44_telugu_few_shot_og_applied_science", "acc,none": 0.24096385542168675, "acc_stderr,none": 0.033293941190735296}, "include_base_44_telugu_few_shot_og_arts_humanities": {"alias": " - include_base_44_telugu_few_shot_og_arts_humanities", "acc,none": 0.2356020942408377, "acc_stderr,none": 0.030787364755364144}, "include_base_44_telugu_few_shot_og_social_science": {"alias": " - include_base_44_telugu_few_shot_og_social_science", "acc,none": 0.3313253012048193, "acc_stderr,none": 0.036643147772880864}, "include_base_44_telugu_few_shot_og_stem": {"alias": " - include_base_44_telugu_few_shot_og_stem", "acc,none": 0.28, "acc_stderr,none": 0.09165151389911678}, "include_base_44_turkish": {"acc,none": 0.24452554744525548, "acc_stderr,none": 0.018374360028799646, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.2469879518072289, "acc_stderr,none": 0.03357351982064537}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.21686746987951808, "acc_stderr,none": 0.03208284450356365}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.24096385542168675, "acc_stderr,none": 0.0332939411907353}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.34, "acc_stderr,none": 0.0676726816132972}, "piqa": {"alias": "piqa", "acc,none": 0.5168661588683352, "acc_stderr,none": 0.011659185184878913, "acc_norm,none": 0.5027203482045702, "acc_norm_stderr,none": 0.011665651503000734}, "xnli_ar": {"alias": "xnli_ar", "acc,none": 0.3261044176706827, "acc_stderr,none": 0.009396415172722666}, "xnli_de": {"alias": "xnli_de", "acc,none": 0.3337349397590361, "acc_stderr,none": 0.009451743112667057}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.3240963855421687, "acc_stderr,none": 0.009381386527922647}, "xnli_es": {"alias": "xnli_es", "acc,none": 0.3345381526104418, "acc_stderr,none": 0.009457404390939167}, "xnli_fr": {"alias": "xnli_fr", "acc,none": 0.3333333333333333, "acc_stderr,none": 0.009448900914617614}, "xnli_hi": {"alias": "xnli_hi", "acc,none": 0.334136546184739, "acc_stderr,none": 0.009454577602463621}, "xnli_ru": {"alias": "xnli_ru", "acc,none": 0.3345381526104418, "acc_stderr,none": 0.009457404390939166}, "xnli_sw": {"alias": "xnli_sw", "acc,none": 0.3345381526104418, "acc_stderr,none": 0.009457404390939167}, "xnli_th": {"alias": "xnli_th", "acc,none": 0.3514056224899598, "acc_stderr,none": 0.009569263079823963}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.3333333333333333, "acc_stderr,none": 0.009448900914617609}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.3325301204819277, "acc_stderr,none": 0.00944319336590334}} +{"created_at": "2025-06-26T23:00:15.899190", "global_step": 15500, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.19539249146757678, "acc_stderr,none": 0.01158690718995291, "acc_norm,none": 0.25170648464163825, "acc_norm_stderr,none": 0.012682496334042963}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.2596801346801347, "acc_stderr,none": 0.008996990428562217, "acc_norm,none": 0.26725589225589225, "acc_norm_stderr,none": 0.009080463246017469}, "belebele_arb_Arab": {"alias": "belebele_arb_Arab", "acc,none": 0.25333333333333335, "acc_stderr,none": 0.014505399844356818, "acc_norm,none": 0.25333333333333335, "acc_norm_stderr,none": 0.014505399844356818}, "belebele_ben_Beng": {"alias": "belebele_ben_Beng", "acc,none": 0.2588888888888889, "acc_stderr,none": 0.014608933836168467, "acc_norm,none": 0.2588888888888889, "acc_norm_stderr,none": 0.014608933836168467}, "belebele_deu_Latn": {"alias": "belebele_deu_Latn", "acc,none": 0.23666666666666666, "acc_stderr,none": 0.014175742474391956, "acc_norm,none": 0.23666666666666666, "acc_norm_stderr,none": 0.014175742474391956}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.2511111111111111, "acc_stderr,none": 0.014463114105170807, "acc_norm,none": 0.2511111111111111, "acc_norm_stderr,none": 0.014463114105170807}, "belebele_fra_Latn": {"alias": "belebele_fra_Latn", "acc,none": 0.2588888888888889, "acc_stderr,none": 0.014608933836168493, "acc_norm,none": 0.2588888888888889, "acc_norm_stderr,none": 0.014608933836168493}, "belebele_hin_Deva": {"alias": "belebele_hin_Deva", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884528, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884528}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.2544444444444444, "acc_stderr,none": 0.014526354751055178, "acc_norm,none": 0.2544444444444444, "acc_norm_stderr,none": 0.014526354751055178}, "belebele_jpn_Jpan": {"alias": "belebele_jpn_Jpan", "acc,none": 0.22666666666666666, "acc_stderr,none": 0.013963598349030496, "acc_norm,none": 0.22666666666666666, "acc_norm_stderr,none": 0.013963598349030496}, "belebele_kor_Hang": {"alias": "belebele_kor_Hang", "acc,none": 0.23, "acc_stderr,none": 0.014035549969945585, "acc_norm,none": 0.23, "acc_norm_stderr,none": 0.014035549969945585}, "belebele_rus_Cyrl": {"alias": "belebele_rus_Cyrl", "acc,none": 0.2388888888888889, "acc_stderr,none": 0.014221393731276207, "acc_norm,none": 0.2388888888888889, "acc_norm_stderr,none": 0.014221393731276207}, "belebele_spa_Latn": {"alias": "belebele_spa_Latn", "acc,none": 0.2388888888888889, "acc_stderr,none": 0.014221393731276203, "acc_norm,none": 0.2388888888888889, "acc_norm_stderr,none": 0.014221393731276203}, "belebele_swh_Latn": {"alias": "belebele_swh_Latn", "acc,none": 0.2388888888888889, "acc_stderr,none": 0.014221393731276203, "acc_norm,none": 0.2388888888888889, "acc_norm_stderr,none": 0.014221393731276203}, "belebele_tel_Telu": {"alias": "belebele_tel_Telu", "acc,none": 0.2722222222222222, "acc_stderr,none": 0.01484503879443395, "acc_norm,none": 0.2722222222222222, "acc_norm_stderr,none": 0.01484503879443395}, "belebele_tha_Thai": {"alias": "belebele_tha_Thai", "acc,none": 0.25, "acc_stderr,none": 0.014441782171967503, "acc_norm,none": 0.25, "acc_norm_stderr,none": 0.014441782171967503}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.25222222222222224, "acc_stderr,none": 0.0144843198114339, "acc_norm,none": 0.25222222222222224, "acc_norm_stderr,none": 0.0144843198114339}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.23666666666666666, "acc_stderr,none": 0.014175742474391963, "acc_norm,none": 0.23666666666666666, "acc_norm_stderr,none": 0.014175742474391963}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.2599083847839076, "acc_stderr,none": 0.004376877619234115, "acc_norm,none": 0.2635929097789285, "acc_norm_stderr,none": 0.004396806562351321}, "include_base_44_arabic": {"acc,none": 0.23369565217391305, "acc_stderr,none": 0.018055861201675377, "alias": "include_base_44_arabic"}, "include_base_44_arabic_few_shot_og_arts_humanities": {"alias": " - include_base_44_arabic_few_shot_og_arts_humanities", "acc,none": 0.24761904761904763, "acc_stderr,none": 0.04232473532055043}, "include_base_44_arabic_few_shot_og_business_commerce": {"alias": " - include_base_44_arabic_few_shot_og_business_commerce", "acc,none": 0.1951219512195122, "acc_stderr,none": 0.04403272848041176}, "include_base_44_arabic_few_shot_og_driving_license": {"alias": " - include_base_44_arabic_few_shot_og_driving_license", "acc,none": 0.2761904761904762, "acc_stderr,none": 0.04384295586918882}, "include_base_44_arabic_few_shot_og_general_knowledge": {"alias": " - include_base_44_arabic_few_shot_og_general_knowledge", "acc,none": 0.2571428571428571, "acc_stderr,none": 0.04285714285714284}, "include_base_44_arabic_few_shot_og_social_science": {"alias": " - include_base_44_arabic_few_shot_og_social_science", "acc,none": 0.20952380952380953, "acc_stderr,none": 0.03990657150993187}, "include_base_44_arabic_few_shot_og_stem": {"alias": " - include_base_44_arabic_few_shot_og_stem", "acc,none": 0.18, "acc_stderr,none": 0.054883922035138706}, "include_base_44_bengali": {"acc,none": 0.24635036496350365, "acc_stderr,none": 0.018401284565571413, "alias": "include_base_44_bengali"}, "include_base_44_bengali_few_shot_og_arts_humanities": {"alias": " - include_base_44_bengali_few_shot_og_arts_humanities", "acc,none": 0.21686746987951808, "acc_stderr,none": 0.03208284450356365}, "include_base_44_bengali_few_shot_og_general_knowledge": {"alias": " - include_base_44_bengali_few_shot_og_general_knowledge", "acc,none": 0.26506024096385544, "acc_stderr,none": 0.03436024037944967}, "include_base_44_bengali_few_shot_og_professional_certification": {"alias": " - include_base_44_bengali_few_shot_og_professional_certification", "acc,none": 0.28313253012048195, "acc_stderr,none": 0.03507295431370518}, "include_base_44_bengali_few_shot_og_stem": {"alias": " - include_base_44_bengali_few_shot_og_stem", "acc,none": 0.16, "acc_stderr,none": 0.05237229365663815}, "include_base_44_chinese": {"acc,none": 0.24954128440366974, "acc_stderr,none": 0.018638485462900607, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295697}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.27586206896551724, "acc_stderr,none": 0.048195602891152295}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.29577464788732394, "acc_stderr,none": 0.05454906121418899}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.04993832453115032}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.22988505747126436, "acc_stderr,none": 0.045371581852507746}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295696}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.04993832453115032}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.1875, "acc_stderr,none": 0.10077822185373188}, "include_base_44_french": {"acc,none": 0.2458233890214797, "acc_stderr,none": 0.021137340310667076, "alias": "include_base_44_french"}, "include_base_44_french_few_shot_og_arts_humanities": {"alias": " - include_base_44_french_few_shot_og_arts_humanities", "acc,none": 0.24060150375939848, "acc_stderr,none": 0.026257966995706337}, "include_base_44_french_few_shot_og_driving_license": {"alias": " - include_base_44_french_few_shot_og_driving_license", "acc,none": 0.2765957446808511, "acc_stderr,none": 0.06595297051445341}, "include_base_44_french_few_shot_og_health_oriented_education": {"alias": " - include_base_44_french_few_shot_og_health_oriented_education", "acc,none": 0.1875, "acc_stderr,none": 0.10077822185373188}, "include_base_44_french_few_shot_og_social_science": {"alias": " - include_base_44_french_few_shot_og_social_science", "acc,none": 0.24324324324324326, "acc_stderr,none": 0.050215421942054014}, "include_base_44_french_few_shot_og_stem": {"alias": " - include_base_44_french_few_shot_og_stem", "acc,none": 0.3125, "acc_stderr,none": 0.11967838846954226}, "include_base_44_german": {"acc,none": 0.31654676258992803, "acc_stderr,none": 0.03982656055783362, "alias": "include_base_44_german"}, "include_base_44_german_few_shot_og_driving_license": {"alias": " - include_base_44_german_few_shot_og_driving_license", "acc,none": 0.2608695652173913, "acc_stderr,none": 0.09361833424764436}, "include_base_44_german_few_shot_og_social_science": {"alias": " - include_base_44_german_few_shot_og_social_science", "acc,none": 0.32967032967032966, "acc_stderr,none": 0.049552195085965874}, "include_base_44_german_few_shot_og_stem": {"alias": " - include_base_44_german_few_shot_og_stem", "acc,none": 0.32, "acc_stderr,none": 0.09521904571390465}, "include_base_44_hindi": {"acc,none": 0.23948811700182815, "acc_stderr,none": 0.01830198403573232, "alias": "include_base_44_hindi"}, "include_base_44_hindi_few_shot_og_applied_science": {"alias": " - include_base_44_hindi_few_shot_og_applied_science", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295698}, "include_base_44_hindi_few_shot_og_arts_humanities": {"alias": " - include_base_44_hindi_few_shot_og_arts_humanities", "acc,none": 0.17708333333333334, "acc_stderr,none": 0.03916561682056947}, "include_base_44_hindi_few_shot_og_driving_license": {"alias": " - include_base_44_hindi_few_shot_og_driving_license", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.04993832453115031}, "include_base_44_hindi_few_shot_og_general_knowledge": {"alias": " - include_base_44_hindi_few_shot_og_general_knowledge", "acc,none": 0.29577464788732394, "acc_stderr,none": 0.05454906121418899}, "include_base_44_hindi_few_shot_og_health_oriented_education": {"alias": " - include_base_44_hindi_few_shot_og_health_oriented_education", "acc,none": 0.2676056338028169, "acc_stderr,none": 0.05291406220869697}, "include_base_44_hindi_few_shot_og_professional_certification": {"alias": " - include_base_44_hindi_few_shot_og_professional_certification", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295697}, "include_base_44_hindi_few_shot_og_social_science": {"alias": " - include_base_44_hindi_few_shot_og_social_science", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295697}, "include_base_44_hindi_few_shot_og_stem": {"alias": " - include_base_44_hindi_few_shot_og_stem", "acc,none": 0.16, "acc_stderr,none": 0.0748331477354788}, "include_base_44_italian": {"acc,none": 0.28284671532846717, "acc_stderr,none": 0.019293441950608465, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.2857142857142857, "acc_stderr,none": 0.07747516350666293}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.2694610778443114, "acc_stderr,none": 0.03443623453899477}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.25, "acc_stderr,none": 0.1305582419667734}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.32903225806451614, "acc_stderr,none": 0.037862535985883856}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.2634730538922156, "acc_stderr,none": 0.034190730421806675}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.16666666666666666, "acc_stderr,none": 0.11236664374387367}, "include_base_44_japanese": {"acc,none": 0.2654690618762475, "acc_stderr,none": 0.019760731109584595, "alias": "include_base_44_japanese"}, "include_base_44_japanese_few_shot_og_driving_license": {"alias": " - include_base_44_japanese_few_shot_og_driving_license", "acc,none": 0.24242424242424243, "acc_stderr,none": 0.04329004329004327}, "include_base_44_japanese_few_shot_og_medical_license": {"alias": " - include_base_44_japanese_few_shot_og_medical_license", "acc,none": 0.2935323383084577, "acc_stderr,none": 0.03220024104534205}, "include_base_44_japanese_few_shot_og_professional_certification": {"alias": " - include_base_44_japanese_few_shot_og_professional_certification", "acc,none": 0.24875621890547264, "acc_stderr,none": 0.030567675938916714}, "include_base_44_korean": {"acc,none": 0.238, "acc_stderr,none": 0.019035793804912167, "alias": "include_base_44_korean"}, "include_base_44_korean_few_shot_og_professional_certification": {"alias": " - include_base_44_korean_few_shot_og_professional_certification", "acc,none": 0.268, "acc_stderr,none": 0.028068762382526695}, "include_base_44_korean_few_shot_og_social_science": {"alias": " - include_base_44_korean_few_shot_og_social_science", "acc,none": 0.208, "acc_stderr,none": 0.02572139890141639}, "include_base_44_russian": {"acc,none": 0.302536231884058, "acc_stderr,none": 0.019640136251231635, "alias": "include_base_44_russian"}, "include_base_44_russian_few_shot_og_applied_science": {"alias": " - include_base_44_russian_few_shot_og_applied_science", "acc,none": 0.2608695652173913, "acc_stderr,none": 0.05324977701702559}, "include_base_44_russian_few_shot_og_arts_humanities": {"alias": " - include_base_44_russian_few_shot_og_arts_humanities", "acc,none": 0.3188405797101449, "acc_stderr,none": 0.05651408783764653}, "include_base_44_russian_few_shot_og_business_commerce": {"alias": " - include_base_44_russian_few_shot_og_business_commerce", "acc,none": 0.36231884057971014, "acc_stderr,none": 0.05828983090868436}, "include_base_44_russian_few_shot_og_driving_license": {"alias": " - include_base_44_russian_few_shot_og_driving_license", "acc,none": 0.2857142857142857, "acc_stderr,none": 0.08694008849288351}, "include_base_44_russian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_russian_few_shot_og_health_oriented_education", "acc,none": 0.3411764705882353, "acc_stderr,none": 0.05172904297361928}, "include_base_44_russian_few_shot_og_marine_license": {"alias": " - include_base_44_russian_few_shot_og_marine_license", "acc,none": 0.2753623188405797, "acc_stderr,none": 0.05416992765191319}, "include_base_44_russian_few_shot_og_social_science": {"alias": " - include_base_44_russian_few_shot_og_social_science", "acc,none": 0.2898550724637681, "acc_stderr,none": 0.05501859437429688}, "include_base_44_russian_few_shot_og_stem": {"alias": " - include_base_44_russian_few_shot_og_stem", "acc,none": 0.2765957446808511, "acc_stderr,none": 0.046384384968156625}, "include_base_44_spanish": {"acc,none": 0.2727272727272727, "acc_stderr,none": 0.018981737547083512, "alias": "include_base_44_spanish"}, "include_base_44_spanish_few_shot_og_arts_humanities": {"alias": " - include_base_44_spanish_few_shot_og_arts_humanities", "acc,none": 0.236, "acc_stderr,none": 0.026909337594953824}, "include_base_44_spanish_few_shot_og_health_oriented_education": {"alias": " - include_base_44_spanish_few_shot_og_health_oriented_education", "acc,none": 0.32, "acc_stderr,none": 0.09521904571390466}, "include_base_44_spanish_few_shot_og_social_science": {"alias": " - include_base_44_spanish_few_shot_og_social_science", "acc,none": 0.292, "acc_stderr,none": 0.028814320402205638}, "include_base_44_spanish_few_shot_og_stem": {"alias": " - include_base_44_spanish_few_shot_og_stem", "acc,none": 0.4, "acc_stderr,none": 0.1}, "include_base_44_telugu": {"acc,none": 0.22992700729927007, "acc_stderr,none": 0.01802845227181331, "alias": "include_base_44_telugu"}, "include_base_44_telugu_few_shot_og_applied_science": {"alias": " - include_base_44_telugu_few_shot_og_applied_science", "acc,none": 0.23493975903614459, "acc_stderr,none": 0.03300533186128922}, "include_base_44_telugu_few_shot_og_arts_humanities": {"alias": " - include_base_44_telugu_few_shot_og_arts_humanities", "acc,none": 0.2094240837696335, "acc_stderr,none": 0.029519452721613795}, "include_base_44_telugu_few_shot_og_social_science": {"alias": " - include_base_44_telugu_few_shot_og_social_science", "acc,none": 0.2469879518072289, "acc_stderr,none": 0.03357351982064537}, "include_base_44_telugu_few_shot_og_stem": {"alias": " - include_base_44_telugu_few_shot_og_stem", "acc,none": 0.24, "acc_stderr,none": 0.08717797887081347}, "include_base_44_turkish": {"acc,none": 0.29014598540145986, "acc_stderr,none": 0.019422021083763757, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.3253012048192771, "acc_stderr,none": 0.03647168523683227}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.26506024096385544, "acc_stderr,none": 0.03436024037944966}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.2710843373493976, "acc_stderr,none": 0.03460579907553026}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.32, "acc_stderr,none": 0.06663945022680343}, "piqa": {"alias": "piqa", "acc,none": 0.529379760609358, "acc_stderr,none": 0.01164566756505086, "acc_norm,none": 0.5081610446137106, "acc_norm_stderr,none": 0.011664270112244222}, "xnli_ar": {"alias": "xnli_ar", "acc,none": 0.3526104417670683, "acc_stderr,none": 0.009576746271768752}, "xnli_de": {"alias": "xnli_de", "acc,none": 0.3333333333333333, "acc_stderr,none": 0.009448900914617607}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.3397590361445783, "acc_stderr,none": 0.009493454925438257}, "xnli_es": {"alias": "xnli_es", "acc,none": 0.3333333333333333, "acc_stderr,none": 0.009448900914617614}, "xnli_fr": {"alias": "xnli_fr", "acc,none": 0.3357429718875502, "acc_stderr,none": 0.00946583861733735}, "xnli_hi": {"alias": "xnli_hi", "acc,none": 0.3349397590361446, "acc_stderr,none": 0.009460223484996469}, "xnli_ru": {"alias": "xnli_ru", "acc,none": 0.3325301204819277, "acc_stderr,none": 0.009443193365903338}, "xnli_sw": {"alias": "xnli_sw", "acc,none": 0.3345381526104418, "acc_stderr,none": 0.009457404390939166}, "xnli_th": {"alias": "xnli_th", "acc,none": 0.3433734939759036, "acc_stderr,none": 0.009517658993060703}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.3333333333333333, "acc_stderr,none": 0.009448900914617609}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.3417670682730924, "acc_stderr,none": 0.009506977398287628}} +{"created_at": "2025-06-26T23:27:09.454366", "global_step": 16000, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.20563139931740615, "acc_stderr,none": 0.011810745260742574, "acc_norm,none": 0.25, "acc_norm_stderr,none": 0.012653835621466646}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.2676767676767677, "acc_stderr,none": 0.009085000147099348, "acc_norm,none": 0.26430976430976433, "acc_norm_stderr,none": 0.009048410451863014}, "belebele_arb_Arab": {"alias": "belebele_arb_Arab", "acc,none": 0.23222222222222222, "acc_stderr,none": 0.014082825936214571, "acc_norm,none": 0.23222222222222222, "acc_norm_stderr,none": 0.014082825936214571}, "belebele_ben_Beng": {"alias": "belebele_ben_Beng", "acc,none": 0.23555555555555555, "acc_stderr,none": 0.014152716079132628, "acc_norm,none": 0.23555555555555555, "acc_norm_stderr,none": 0.014152716079132628}, "belebele_deu_Latn": {"alias": "belebele_deu_Latn", "acc,none": 0.22666666666666666, "acc_stderr,none": 0.013963598349030467, "acc_norm,none": 0.22666666666666666, "acc_norm_stderr,none": 0.013963598349030467}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_fra_Latn": {"alias": "belebele_fra_Latn", "acc,none": 0.23444444444444446, "acc_stderr,none": 0.014129554968110773, "acc_norm,none": 0.23444444444444446, "acc_norm_stderr,none": 0.014129554968110773}, "belebele_hin_Deva": {"alias": "belebele_hin_Deva", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884528, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884528}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.2411111111111111, "acc_stderr,none": 0.014266513886578952, "acc_norm,none": 0.2411111111111111, "acc_norm_stderr,none": 0.014266513886578952}, "belebele_jpn_Jpan": {"alias": "belebele_jpn_Jpan", "acc,none": 0.2311111111111111, "acc_stderr,none": 0.014059256663218197, "acc_norm,none": 0.2311111111111111, "acc_norm_stderr,none": 0.014059256663218197}, "belebele_kor_Hang": {"alias": "belebele_kor_Hang", "acc,none": 0.2222222222222222, "acc_stderr,none": 0.013865695626579367, "acc_norm,none": 0.2222222222222222, "acc_norm_stderr,none": 0.013865695626579367}, "belebele_rus_Cyrl": {"alias": "belebele_rus_Cyrl", "acc,none": 0.22666666666666666, "acc_stderr,none": 0.013963598349030461, "acc_norm,none": 0.22666666666666666, "acc_norm_stderr,none": 0.013963598349030461}, "belebele_spa_Latn": {"alias": "belebele_spa_Latn", "acc,none": 0.23444444444444446, "acc_stderr,none": 0.014129554968110771, "acc_norm,none": 0.23444444444444446, "acc_norm_stderr,none": 0.014129554968110771}, "belebele_swh_Latn": {"alias": "belebele_swh_Latn", "acc,none": 0.22333333333333333, "acc_stderr,none": 0.0138903842971987, "acc_norm,none": 0.22333333333333333, "acc_norm_stderr,none": 0.0138903842971987}, "belebele_tel_Telu": {"alias": "belebele_tel_Telu", "acc,none": 0.27111111111111114, "acc_stderr,none": 0.014826016446581963, "acc_norm,none": 0.27111111111111114, "acc_norm_stderr,none": 0.014826016446581963}, "belebele_tha_Thai": {"alias": "belebele_tha_Thai", "acc,none": 0.23555555555555555, "acc_stderr,none": 0.014152716079132642, "acc_norm,none": 0.23555555555555555, "acc_norm_stderr,none": 0.014152716079132642}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.23555555555555555, "acc_stderr,none": 0.014152716079132651, "acc_norm,none": 0.23555555555555555, "acc_norm_stderr,none": 0.014152716079132651}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.23, "acc_stderr,none": 0.014035549969945585, "acc_norm,none": 0.23, "acc_norm_stderr,none": 0.014035549969945585}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.2604062935670185, "acc_stderr,none": 0.004379594059141047, "acc_norm,none": 0.2583150766779526, "acc_norm_stderr,none": 0.004368135676213557}, "include_base_44_arabic": {"acc,none": 0.22644927536231885, "acc_stderr,none": 0.01781264863407592, "alias": "include_base_44_arabic"}, "include_base_44_arabic_few_shot_og_arts_humanities": {"alias": " - include_base_44_arabic_few_shot_og_arts_humanities", "acc,none": 0.23809523809523808, "acc_stderr,none": 0.041764667586049006}, "include_base_44_arabic_few_shot_og_business_commerce": {"alias": " - include_base_44_arabic_few_shot_og_business_commerce", "acc,none": 0.17073170731707318, "acc_stderr,none": 0.04180826184428598}, "include_base_44_arabic_few_shot_og_driving_license": {"alias": " - include_base_44_arabic_few_shot_og_driving_license", "acc,none": 0.29523809523809524, "acc_stderr,none": 0.044729159560441434}, "include_base_44_arabic_few_shot_og_general_knowledge": {"alias": " - include_base_44_arabic_few_shot_og_general_knowledge", "acc,none": 0.24761904761904763, "acc_stderr,none": 0.0423247353205504}, "include_base_44_arabic_few_shot_og_social_science": {"alias": " - include_base_44_arabic_few_shot_og_social_science", "acc,none": 0.18095238095238095, "acc_stderr,none": 0.037750269583862364}, "include_base_44_arabic_few_shot_og_stem": {"alias": " - include_base_44_arabic_few_shot_og_stem", "acc,none": 0.2, "acc_stderr,none": 0.057142857142857134}, "include_base_44_bengali": {"acc,none": 0.26094890510948904, "acc_stderr,none": 0.01879018332562582, "alias": "include_base_44_bengali"}, "include_base_44_bengali_few_shot_og_arts_humanities": {"alias": " - include_base_44_bengali_few_shot_og_arts_humanities", "acc,none": 0.2289156626506024, "acc_stderr,none": 0.03270745277352477}, "include_base_44_bengali_few_shot_og_general_knowledge": {"alias": " - include_base_44_bengali_few_shot_og_general_knowledge", "acc,none": 0.2891566265060241, "acc_stderr,none": 0.035294868015111135}, "include_base_44_bengali_few_shot_og_professional_certification": {"alias": " - include_base_44_bengali_few_shot_og_professional_certification", "acc,none": 0.27710843373493976, "acc_stderr,none": 0.03484331592680588}, "include_base_44_bengali_few_shot_og_stem": {"alias": " - include_base_44_bengali_few_shot_og_stem", "acc,none": 0.22, "acc_stderr,none": 0.05917804336345136}, "include_base_44_chinese": {"acc,none": 0.24403669724770644, "acc_stderr,none": 0.018454001126441598, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.049938324531150324}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.25287356321839083, "acc_stderr,none": 0.04687049503854671}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.323943661971831, "acc_stderr,none": 0.05593416612923642}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.23943661971830985, "acc_stderr,none": 0.05100514327793383}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.2413793103448276, "acc_stderr,none": 0.04614377668264891}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.051995626882956975}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.19718309859154928, "acc_stderr,none": 0.04755476905953276}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.125, "acc_stderr,none": 0.08539125638299665}, "include_base_44_french": {"acc,none": 0.2553699284009546, "acc_stderr,none": 0.021198584539885237, "alias": "include_base_44_french"}, "include_base_44_french_few_shot_og_arts_humanities": {"alias": " - include_base_44_french_few_shot_og_arts_humanities", "acc,none": 0.22556390977443608, "acc_stderr,none": 0.025674657674902242}, "include_base_44_french_few_shot_og_driving_license": {"alias": " - include_base_44_french_few_shot_og_driving_license", "acc,none": 0.425531914893617, "acc_stderr,none": 0.07289875413448858}, "include_base_44_french_few_shot_og_health_oriented_education": {"alias": " - include_base_44_french_few_shot_og_health_oriented_education", "acc,none": 0.1875, "acc_stderr,none": 0.10077822185373188}, "include_base_44_french_few_shot_og_social_science": {"alias": " - include_base_44_french_few_shot_og_social_science", "acc,none": 0.25675675675675674, "acc_stderr,none": 0.051128728837659454}, "include_base_44_french_few_shot_og_stem": {"alias": " - include_base_44_french_few_shot_og_stem", "acc,none": 0.3125, "acc_stderr,none": 0.11967838846954226}, "include_base_44_german": {"acc,none": 0.2949640287769784, "acc_stderr,none": 0.03839718711502045, "alias": "include_base_44_german"}, "include_base_44_german_few_shot_og_driving_license": {"alias": " - include_base_44_german_few_shot_og_driving_license", "acc,none": 0.4782608695652174, "acc_stderr,none": 0.10649955403405122}, "include_base_44_german_few_shot_og_social_science": {"alias": " - include_base_44_german_few_shot_og_social_science", "acc,none": 0.27472527472527475, "acc_stderr,none": 0.04705213398778437}, "include_base_44_german_few_shot_og_stem": {"alias": " - include_base_44_german_few_shot_og_stem", "acc,none": 0.2, "acc_stderr,none": 0.08164965809277261}, "include_base_44_hindi": {"acc,none": 0.23948811700182815, "acc_stderr,none": 0.01830198403573232, "alias": "include_base_44_hindi"}, "include_base_44_hindi_few_shot_og_applied_science": {"alias": " - include_base_44_hindi_few_shot_og_applied_science", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295698}, "include_base_44_hindi_few_shot_og_arts_humanities": {"alias": " - include_base_44_hindi_few_shot_og_arts_humanities", "acc,none": 0.17708333333333334, "acc_stderr,none": 0.03916561682056947}, "include_base_44_hindi_few_shot_og_driving_license": {"alias": " - include_base_44_hindi_few_shot_og_driving_license", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.04993832453115031}, "include_base_44_hindi_few_shot_og_general_knowledge": {"alias": " - include_base_44_hindi_few_shot_og_general_knowledge", "acc,none": 0.29577464788732394, "acc_stderr,none": 0.05454906121418899}, "include_base_44_hindi_few_shot_og_health_oriented_education": {"alias": " - include_base_44_hindi_few_shot_og_health_oriented_education", "acc,none": 0.2676056338028169, "acc_stderr,none": 0.05291406220869697}, "include_base_44_hindi_few_shot_og_professional_certification": {"alias": " - include_base_44_hindi_few_shot_og_professional_certification", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295697}, "include_base_44_hindi_few_shot_og_social_science": {"alias": " - include_base_44_hindi_few_shot_og_social_science", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295697}, "include_base_44_hindi_few_shot_og_stem": {"alias": " - include_base_44_hindi_few_shot_og_stem", "acc,none": 0.16, "acc_stderr,none": 0.0748331477354788}, "include_base_44_italian": {"acc,none": 0.2645985401459854, "acc_stderr,none": 0.01881434708096304, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.2, "acc_stderr,none": 0.06859943405700354}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.2994011976047904, "acc_stderr,none": 0.03554736535384932}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.16666666666666666, "acc_stderr,none": 0.11236664374387367}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.3161290322580645, "acc_stderr,none": 0.037467862178731784}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.20958083832335328, "acc_stderr,none": 0.03159006158827181}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.16666666666666666, "acc_stderr,none": 0.11236664374387367}, "include_base_44_japanese": {"acc,none": 0.2634730538922156, "acc_stderr,none": 0.01972538101830729, "alias": "include_base_44_japanese"}, "include_base_44_japanese_few_shot_og_driving_license": {"alias": " - include_base_44_japanese_few_shot_og_driving_license", "acc,none": 0.24242424242424243, "acc_stderr,none": 0.04329004329004327}, "include_base_44_japanese_few_shot_og_medical_license": {"alias": " - include_base_44_japanese_few_shot_og_medical_license", "acc,none": 0.2835820895522388, "acc_stderr,none": 0.03187187537919797}, "include_base_44_japanese_few_shot_og_professional_certification": {"alias": " - include_base_44_japanese_few_shot_og_professional_certification", "acc,none": 0.2537313432835821, "acc_stderr,none": 0.030769444967296024}, "include_base_44_korean": {"acc,none": 0.23, "acc_stderr,none": 0.018856038841793175, "alias": "include_base_44_korean"}, "include_base_44_korean_few_shot_og_professional_certification": {"alias": " - include_base_44_korean_few_shot_og_professional_certification", "acc,none": 0.236, "acc_stderr,none": 0.026909337594953835}, "include_base_44_korean_few_shot_og_social_science": {"alias": " - include_base_44_korean_few_shot_og_social_science", "acc,none": 0.224, "acc_stderr,none": 0.02642136168734791}, "include_base_44_russian": {"acc,none": 0.2717391304347826, "acc_stderr,none": 0.01880668963538939, "alias": "include_base_44_russian"}, "include_base_44_russian_few_shot_og_applied_science": {"alias": " - include_base_44_russian_few_shot_og_applied_science", "acc,none": 0.21739130434782608, "acc_stderr,none": 0.0500194557943937}, "include_base_44_russian_few_shot_og_arts_humanities": {"alias": " - include_base_44_russian_few_shot_og_arts_humanities", "acc,none": 0.21739130434782608, "acc_stderr,none": 0.0500194557943937}, "include_base_44_russian_few_shot_og_business_commerce": {"alias": " - include_base_44_russian_few_shot_og_business_commerce", "acc,none": 0.2608695652173913, "acc_stderr,none": 0.05324977701702559}, "include_base_44_russian_few_shot_og_driving_license": {"alias": " - include_base_44_russian_few_shot_og_driving_license", "acc,none": 0.21428571428571427, "acc_stderr,none": 0.07896725691322383}, "include_base_44_russian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_russian_few_shot_og_health_oriented_education", "acc,none": 0.38823529411764707, "acc_stderr,none": 0.05317409082203827}, "include_base_44_russian_few_shot_og_marine_license": {"alias": " - include_base_44_russian_few_shot_og_marine_license", "acc,none": 0.37681159420289856, "acc_stderr,none": 0.05876481248527006}, "include_base_44_russian_few_shot_og_social_science": {"alias": " - include_base_44_russian_few_shot_og_social_science", "acc,none": 0.2753623188405797, "acc_stderr,none": 0.05416992765191318}, "include_base_44_russian_few_shot_og_stem": {"alias": " - include_base_44_russian_few_shot_og_stem", "acc,none": 0.19148936170212766, "acc_stderr,none": 0.040801273706799875}, "include_base_44_spanish": {"acc,none": 0.2672727272727273, "acc_stderr,none": 0.018850411012442, "alias": "include_base_44_spanish"}, "include_base_44_spanish_few_shot_og_arts_humanities": {"alias": " - include_base_44_spanish_few_shot_og_arts_humanities", "acc,none": 0.228, "acc_stderr,none": 0.026587432487268466}, "include_base_44_spanish_few_shot_og_health_oriented_education": {"alias": " - include_base_44_spanish_few_shot_og_health_oriented_education", "acc,none": 0.32, "acc_stderr,none": 0.09521904571390466}, "include_base_44_spanish_few_shot_og_social_science": {"alias": " - include_base_44_spanish_few_shot_og_social_science", "acc,none": 0.288, "acc_stderr,none": 0.028697004587398222}, "include_base_44_spanish_few_shot_og_stem": {"alias": " - include_base_44_spanish_few_shot_og_stem", "acc,none": 0.4, "acc_stderr,none": 0.1}, "include_base_44_telugu": {"acc,none": 0.24635036496350365, "acc_stderr,none": 0.01846056405654356, "alias": "include_base_44_telugu"}, "include_base_44_telugu_few_shot_og_applied_science": {"alias": " - include_base_44_telugu_few_shot_og_applied_science", "acc,none": 0.25301204819277107, "acc_stderr,none": 0.03384429155233135}, "include_base_44_telugu_few_shot_og_arts_humanities": {"alias": " - include_base_44_telugu_few_shot_og_arts_humanities", "acc,none": 0.225130890052356, "acc_stderr,none": 0.030300857413315146}, "include_base_44_telugu_few_shot_og_social_science": {"alias": " - include_base_44_telugu_few_shot_og_social_science", "acc,none": 0.25903614457831325, "acc_stderr,none": 0.03410646614071857}, "include_base_44_telugu_few_shot_og_stem": {"alias": " - include_base_44_telugu_few_shot_og_stem", "acc,none": 0.28, "acc_stderr,none": 0.0916515138991168}, "include_base_44_turkish": {"acc,none": 0.28832116788321166, "acc_stderr,none": 0.01932888627389526, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.3253012048192771, "acc_stderr,none": 0.03647168523683228}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.2710843373493976, "acc_stderr,none": 0.034605799075530255}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.24096385542168675, "acc_stderr,none": 0.0332939411907353}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.38, "acc_stderr,none": 0.06934092056863767}, "piqa": {"alias": "piqa", "acc,none": 0.5223068552774756, "acc_stderr,none": 0.011654208652596471, "acc_norm,none": 0.5027203482045702, "acc_norm_stderr,none": 0.011665651503000737}, "xnli_ar": {"alias": "xnli_ar", "acc,none": 0.3389558232931727, "acc_stderr,none": 0.009487992732201527}, "xnli_de": {"alias": "xnli_de", "acc,none": 0.3345381526104418, "acc_stderr,none": 0.009457404390939166}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.3345381526104418, "acc_stderr,none": 0.009457404390939166}, "xnli_es": {"alias": "xnli_es", "acc,none": 0.334136546184739, "acc_stderr,none": 0.009454577602463625}, "xnli_fr": {"alias": "xnli_fr", "acc,none": 0.3329317269076305, "acc_stderr,none": 0.009446051001358226}, "xnli_hi": {"alias": "xnli_hi", "acc,none": 0.3353413654618474, "acc_stderr,none": 0.009463034891512704}, "xnli_ru": {"alias": "xnli_ru", "acc,none": 0.3373493975903614, "acc_stderr,none": 0.009476976849778584}, "xnli_sw": {"alias": "xnli_sw", "acc,none": 0.334136546184739, "acc_stderr,none": 0.00945457760246363}, "xnli_th": {"alias": "xnli_th", "acc,none": 0.334136546184739, "acc_stderr,none": 0.009454577602463621}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.3333333333333333, "acc_stderr,none": 0.009448900914617609}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.3365461847389558, "acc_stderr,none": 0.009471423054177131}} +{"created_at": "2025-06-26T23:53:40.437919", "global_step": 16500, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.20819112627986347, "acc_stderr,none": 0.011864866118448067, "acc_norm,none": 0.24744027303754265, "acc_norm_stderr,none": 0.012610352663292673}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.2638888888888889, "acc_stderr,none": 0.009043789220055144, "acc_norm,none": 0.2609427609427609, "acc_norm_stderr,none": 0.009011142493235973}, "belebele_arb_Arab": {"alias": "belebele_arb_Arab", "acc,none": 0.2311111111111111, "acc_stderr,none": 0.014059256663218198, "acc_norm,none": 0.2311111111111111, "acc_norm_stderr,none": 0.014059256663218198}, "belebele_ben_Beng": {"alias": "belebele_ben_Beng", "acc,none": 0.2311111111111111, "acc_stderr,none": 0.014059256663218176, "acc_norm,none": 0.2311111111111111, "acc_norm_stderr,none": 0.014059256663218176}, "belebele_deu_Latn": {"alias": "belebele_deu_Latn", "acc,none": 0.23, "acc_stderr,none": 0.014035549969945589, "acc_norm,none": 0.23, "acc_norm_stderr,none": 0.014035549969945589}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_fra_Latn": {"alias": "belebele_fra_Latn", "acc,none": 0.22777777777777777, "acc_stderr,none": 0.013987721523687973, "acc_norm,none": 0.22777777777777777, "acc_norm_stderr,none": 0.013987721523687973}, "belebele_hin_Deva": {"alias": "belebele_hin_Deva", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884528, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884528}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.22777777777777777, "acc_stderr,none": 0.013987721523687973, "acc_norm,none": 0.22777777777777777, "acc_norm_stderr,none": 0.013987721523687973}, "belebele_jpn_Jpan": {"alias": "belebele_jpn_Jpan", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_kor_Hang": {"alias": "belebele_kor_Hang", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.01401170515888452, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.01401170515888452}, "belebele_rus_Cyrl": {"alias": "belebele_rus_Cyrl", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884504, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884504}, "belebele_spa_Latn": {"alias": "belebele_spa_Latn", "acc,none": 0.23222222222222222, "acc_stderr,none": 0.014082825936214573, "acc_norm,none": 0.23222222222222222, "acc_norm_stderr,none": 0.014082825936214573}, "belebele_swh_Latn": {"alias": "belebele_swh_Latn", "acc,none": 0.23444444444444446, "acc_stderr,none": 0.014129554968110773, "acc_norm,none": 0.23444444444444446, "acc_norm_stderr,none": 0.014129554968110773}, "belebele_tel_Telu": {"alias": "belebele_tel_Telu", "acc,none": 0.23333333333333334, "acc_stderr,none": 0.014106258477755672, "acc_norm,none": 0.23333333333333334, "acc_norm_stderr,none": 0.014106258477755672}, "belebele_tha_Thai": {"alias": "belebele_tha_Thai", "acc,none": 0.2311111111111111, "acc_stderr,none": 0.014059256663218197, "acc_norm,none": 0.2311111111111111, "acc_norm_stderr,none": 0.014059256663218197}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.22555555555555556, "acc_stderr,none": 0.01393933491045812, "acc_norm,none": 0.22555555555555556, "acc_norm_stderr,none": 0.01393933491045812}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.25941047600079664, "acc_stderr,none": 0.004374153847826759, "acc_norm,none": 0.26239792869946227, "acc_norm_stderr,none": 0.004390386775400534}, "include_base_44_arabic": {"acc,none": 0.23369565217391305, "acc_stderr,none": 0.018000736674227705, "alias": "include_base_44_arabic"}, "include_base_44_arabic_few_shot_og_arts_humanities": {"alias": " - include_base_44_arabic_few_shot_og_arts_humanities", "acc,none": 0.24761904761904763, "acc_stderr,none": 0.04232473532055043}, "include_base_44_arabic_few_shot_og_business_commerce": {"alias": " - include_base_44_arabic_few_shot_og_business_commerce", "acc,none": 0.1951219512195122, "acc_stderr,none": 0.04403272848041175}, "include_base_44_arabic_few_shot_og_driving_license": {"alias": " - include_base_44_arabic_few_shot_og_driving_license", "acc,none": 0.3047619047619048, "acc_stderr,none": 0.0451367671816831}, "include_base_44_arabic_few_shot_og_general_knowledge": {"alias": " - include_base_44_arabic_few_shot_og_general_knowledge", "acc,none": 0.2571428571428571, "acc_stderr,none": 0.04285714285714284}, "include_base_44_arabic_few_shot_og_social_science": {"alias": " - include_base_44_arabic_few_shot_og_social_science", "acc,none": 0.17142857142857143, "acc_stderr,none": 0.03695642136439615}, "include_base_44_arabic_few_shot_og_stem": {"alias": " - include_base_44_arabic_few_shot_og_stem", "acc,none": 0.2, "acc_stderr,none": 0.057142857142857134}, "include_base_44_bengali": {"acc,none": 0.26094890510948904, "acc_stderr,none": 0.018821688469578022, "alias": "include_base_44_bengali"}, "include_base_44_bengali_few_shot_og_arts_humanities": {"alias": " - include_base_44_bengali_few_shot_og_arts_humanities", "acc,none": 0.27710843373493976, "acc_stderr,none": 0.034843315926805875}, "include_base_44_bengali_few_shot_og_general_knowledge": {"alias": " - include_base_44_bengali_few_shot_og_general_knowledge", "acc,none": 0.25903614457831325, "acc_stderr,none": 0.03410646614071857}, "include_base_44_bengali_few_shot_og_professional_certification": {"alias": " - include_base_44_bengali_few_shot_og_professional_certification", "acc,none": 0.25301204819277107, "acc_stderr,none": 0.03384429155233134}, "include_base_44_bengali_few_shot_og_stem": {"alias": " - include_base_44_bengali_few_shot_og_stem", "acc,none": 0.24, "acc_stderr,none": 0.06101187572589323}, "include_base_44_chinese": {"acc,none": 0.24587155963302754, "acc_stderr,none": 0.018517539247711934, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.049938324531150324}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.25287356321839083, "acc_stderr,none": 0.04687049503854671}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.323943661971831, "acc_stderr,none": 0.05593416612923642}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.23943661971830985, "acc_stderr,none": 0.05100514327793383}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.2413793103448276, "acc_stderr,none": 0.04614377668264891}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.051995626882956975}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.19718309859154928, "acc_stderr,none": 0.04755476905953276}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.1875, "acc_stderr,none": 0.10077822185373188}, "include_base_44_french": {"acc,none": 0.2553699284009546, "acc_stderr,none": 0.021198584539885237, "alias": "include_base_44_french"}, "include_base_44_french_few_shot_og_arts_humanities": {"alias": " - include_base_44_french_few_shot_og_arts_humanities", "acc,none": 0.22556390977443608, "acc_stderr,none": 0.025674657674902242}, "include_base_44_french_few_shot_og_driving_license": {"alias": " - include_base_44_french_few_shot_og_driving_license", "acc,none": 0.425531914893617, "acc_stderr,none": 0.07289875413448858}, "include_base_44_french_few_shot_og_health_oriented_education": {"alias": " - include_base_44_french_few_shot_og_health_oriented_education", "acc,none": 0.1875, "acc_stderr,none": 0.10077822185373188}, "include_base_44_french_few_shot_og_social_science": {"alias": " - include_base_44_french_few_shot_og_social_science", "acc,none": 0.25675675675675674, "acc_stderr,none": 0.051128728837659454}, "include_base_44_french_few_shot_og_stem": {"alias": " - include_base_44_french_few_shot_og_stem", "acc,none": 0.3125, "acc_stderr,none": 0.11967838846954226}, "include_base_44_german": {"acc,none": 0.30935251798561153, "acc_stderr,none": 0.03907965690081692, "alias": "include_base_44_german"}, "include_base_44_german_few_shot_og_driving_license": {"alias": " - include_base_44_german_few_shot_og_driving_license", "acc,none": 0.4782608695652174, "acc_stderr,none": 0.10649955403405124}, "include_base_44_german_few_shot_og_social_science": {"alias": " - include_base_44_german_few_shot_og_social_science", "acc,none": 0.2857142857142857, "acc_stderr,none": 0.04761904761904759}, "include_base_44_german_few_shot_og_stem": {"alias": " - include_base_44_german_few_shot_og_stem", "acc,none": 0.24, "acc_stderr,none": 0.08717797887081347}, "include_base_44_hindi": {"acc,none": 0.23948811700182815, "acc_stderr,none": 0.01830198403573232, "alias": "include_base_44_hindi"}, "include_base_44_hindi_few_shot_og_applied_science": {"alias": " - include_base_44_hindi_few_shot_og_applied_science", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295698}, "include_base_44_hindi_few_shot_og_arts_humanities": {"alias": " - include_base_44_hindi_few_shot_og_arts_humanities", "acc,none": 0.17708333333333334, "acc_stderr,none": 0.03916561682056947}, "include_base_44_hindi_few_shot_og_driving_license": {"alias": " - include_base_44_hindi_few_shot_og_driving_license", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.04993832453115031}, "include_base_44_hindi_few_shot_og_general_knowledge": {"alias": " - include_base_44_hindi_few_shot_og_general_knowledge", "acc,none": 0.29577464788732394, "acc_stderr,none": 0.05454906121418899}, "include_base_44_hindi_few_shot_og_health_oriented_education": {"alias": " - include_base_44_hindi_few_shot_og_health_oriented_education", "acc,none": 0.2676056338028169, "acc_stderr,none": 0.05291406220869697}, "include_base_44_hindi_few_shot_og_professional_certification": {"alias": " - include_base_44_hindi_few_shot_og_professional_certification", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295697}, "include_base_44_hindi_few_shot_og_social_science": {"alias": " - include_base_44_hindi_few_shot_og_social_science", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295697}, "include_base_44_hindi_few_shot_og_stem": {"alias": " - include_base_44_hindi_few_shot_og_stem", "acc,none": 0.16, "acc_stderr,none": 0.0748331477354788}, "include_base_44_italian": {"acc,none": 0.2645985401459854, "acc_stderr,none": 0.018829006450048022, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.17142857142857143, "acc_stderr,none": 0.06463490595976273}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.2994011976047904, "acc_stderr,none": 0.03554736535384932}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.25, "acc_stderr,none": 0.1305582419667734}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.3096774193548387, "acc_stderr,none": 0.0372580781179416}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.20958083832335328, "acc_stderr,none": 0.03159006158827181}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.25, "acc_stderr,none": 0.1305582419667734}, "include_base_44_japanese": {"acc,none": 0.2634730538922156, "acc_stderr,none": 0.01972538101830729, "alias": "include_base_44_japanese"}, "include_base_44_japanese_few_shot_og_driving_license": {"alias": " - include_base_44_japanese_few_shot_og_driving_license", "acc,none": 0.24242424242424243, "acc_stderr,none": 0.04329004329004327}, "include_base_44_japanese_few_shot_og_medical_license": {"alias": " - include_base_44_japanese_few_shot_og_medical_license", "acc,none": 0.2835820895522388, "acc_stderr,none": 0.03187187537919797}, "include_base_44_japanese_few_shot_og_professional_certification": {"alias": " - include_base_44_japanese_few_shot_og_professional_certification", "acc,none": 0.2537313432835821, "acc_stderr,none": 0.030769444967296024}, "include_base_44_korean": {"acc,none": 0.234, "acc_stderr,none": 0.01896985560532149, "alias": "include_base_44_korean"}, "include_base_44_korean_few_shot_og_professional_certification": {"alias": " - include_base_44_korean_few_shot_og_professional_certification", "acc,none": 0.24, "acc_stderr,none": 0.027065293652239003}, "include_base_44_korean_few_shot_og_social_science": {"alias": " - include_base_44_korean_few_shot_og_social_science", "acc,none": 0.228, "acc_stderr,none": 0.026587432487268487}, "include_base_44_russian": {"acc,none": 0.266304347826087, "acc_stderr,none": 0.018662425786691965, "alias": "include_base_44_russian"}, "include_base_44_russian_few_shot_og_applied_science": {"alias": " - include_base_44_russian_few_shot_og_applied_science", "acc,none": 0.2028985507246377, "acc_stderr,none": 0.04876877147472662}, "include_base_44_russian_few_shot_og_arts_humanities": {"alias": " - include_base_44_russian_few_shot_og_arts_humanities", "acc,none": 0.2028985507246377, "acc_stderr,none": 0.0487687714747266}, "include_base_44_russian_few_shot_og_business_commerce": {"alias": " - include_base_44_russian_few_shot_og_business_commerce", "acc,none": 0.2608695652173913, "acc_stderr,none": 0.05324977701702559}, "include_base_44_russian_few_shot_og_driving_license": {"alias": " - include_base_44_russian_few_shot_og_driving_license", "acc,none": 0.21428571428571427, "acc_stderr,none": 0.07896725691322383}, "include_base_44_russian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_russian_few_shot_og_health_oriented_education", "acc,none": 0.38823529411764707, "acc_stderr,none": 0.05317409082203827}, "include_base_44_russian_few_shot_og_marine_license": {"alias": " - include_base_44_russian_few_shot_og_marine_license", "acc,none": 0.37681159420289856, "acc_stderr,none": 0.05876481248527006}, "include_base_44_russian_few_shot_og_social_science": {"alias": " - include_base_44_russian_few_shot_og_social_science", "acc,none": 0.2608695652173913, "acc_stderr,none": 0.05324977701702559}, "include_base_44_russian_few_shot_og_stem": {"alias": " - include_base_44_russian_few_shot_og_stem", "acc,none": 0.19148936170212766, "acc_stderr,none": 0.040801273706799875}, "include_base_44_spanish": {"acc,none": 0.2672727272727273, "acc_stderr,none": 0.018850411012442, "alias": "include_base_44_spanish"}, "include_base_44_spanish_few_shot_og_arts_humanities": {"alias": " - include_base_44_spanish_few_shot_og_arts_humanities", "acc,none": 0.228, "acc_stderr,none": 0.026587432487268466}, "include_base_44_spanish_few_shot_og_health_oriented_education": {"alias": " - include_base_44_spanish_few_shot_og_health_oriented_education", "acc,none": 0.32, "acc_stderr,none": 0.09521904571390466}, "include_base_44_spanish_few_shot_og_social_science": {"alias": " - include_base_44_spanish_few_shot_og_social_science", "acc,none": 0.288, "acc_stderr,none": 0.028697004587398222}, "include_base_44_spanish_few_shot_og_stem": {"alias": " - include_base_44_spanish_few_shot_og_stem", "acc,none": 0.4, "acc_stderr,none": 0.1}, "include_base_44_telugu": {"acc,none": 0.23357664233576642, "acc_stderr,none": 0.018091554033247782, "alias": "include_base_44_telugu"}, "include_base_44_telugu_few_shot_og_applied_science": {"alias": " - include_base_44_telugu_few_shot_og_applied_science", "acc,none": 0.2469879518072289, "acc_stderr,none": 0.03357351982064536}, "include_base_44_telugu_few_shot_og_arts_humanities": {"alias": " - include_base_44_telugu_few_shot_og_arts_humanities", "acc,none": 0.225130890052356, "acc_stderr,none": 0.03030085741331515}, "include_base_44_telugu_few_shot_og_social_science": {"alias": " - include_base_44_telugu_few_shot_og_social_science", "acc,none": 0.21084337349397592, "acc_stderr,none": 0.0317555478662992}, "include_base_44_telugu_few_shot_og_stem": {"alias": " - include_base_44_telugu_few_shot_og_stem", "acc,none": 0.36, "acc_stderr,none": 0.09797958971132713}, "include_base_44_turkish": {"acc,none": 0.2846715328467153, "acc_stderr,none": 0.019292176829627593, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.3132530120481928, "acc_stderr,none": 0.03610805018031022}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.25903614457831325, "acc_stderr,none": 0.034106466140718564}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.25903614457831325, "acc_stderr,none": 0.034106466140718564}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.36, "acc_stderr,none": 0.06857142857142856}, "piqa": {"alias": "piqa", "acc,none": 0.5272034820457019, "acc_stderr,none": 0.01164854526242902, "acc_norm,none": 0.501088139281828, "acc_norm_stderr,none": 0.011665796539540878}, "xnli_ar": {"alias": "xnli_ar", "acc,none": 0.3345381526104418, "acc_stderr,none": 0.009457404390939167}, "xnli_de": {"alias": "xnli_de", "acc,none": 0.3325301204819277, "acc_stderr,none": 0.00944319336590334}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.3417670682730924, "acc_stderr,none": 0.009506977398287628}, "xnli_es": {"alias": "xnli_es", "acc,none": 0.3353413654618474, "acc_stderr,none": 0.009463034891512708}, "xnli_fr": {"alias": "xnli_fr", "acc,none": 0.3309236947791165, "acc_stderr,none": 0.009431685461463302}, "xnli_hi": {"alias": "xnli_hi", "acc,none": 0.3353413654618474, "acc_stderr,none": 0.009463034891512706}, "xnli_ru": {"alias": "xnli_ru", "acc,none": 0.3313253012048193, "acc_stderr,none": 0.009434574056101964}, "xnli_sw": {"alias": "xnli_sw", "acc,none": 0.3357429718875502, "acc_stderr,none": 0.00946583861733735}, "xnli_th": {"alias": "xnli_th", "acc,none": 0.35180722891566263, "acc_stderr,none": 0.009571764897113625}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.3329317269076305, "acc_stderr,none": 0.009446051001358225}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.3244979919678715, "acc_stderr,none": 0.009384407992489767}} +{"created_at": "2025-06-27T00:21:56.882175", "global_step": 17000, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.20733788395904437, "acc_stderr,none": 0.011846905782971371, "acc_norm,none": 0.2551194539249147, "acc_norm_stderr,none": 0.012739038695202102}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.26430976430976433, "acc_stderr,none": 0.009048410451863014, "acc_norm,none": 0.2638888888888889, "acc_norm_stderr,none": 0.009043789220055137}, "belebele_arb_Arab": {"alias": "belebele_arb_Arab", "acc,none": 0.22777777777777777, "acc_stderr,none": 0.013987721523687973, "acc_norm,none": 0.22777777777777777, "acc_norm_stderr,none": 0.013987721523687973}, "belebele_ben_Beng": {"alias": "belebele_ben_Beng", "acc,none": 0.22555555555555556, "acc_stderr,none": 0.013939334910458146, "acc_norm,none": 0.22555555555555556, "acc_norm_stderr,none": 0.013939334910458146}, "belebele_deu_Latn": {"alias": "belebele_deu_Latn", "acc,none": 0.22666666666666666, "acc_stderr,none": 0.013963598349030472, "acc_norm,none": 0.22666666666666666, "acc_norm_stderr,none": 0.013963598349030472}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.22777777777777777, "acc_stderr,none": 0.013987721523687975, "acc_norm,none": 0.22777777777777777, "acc_norm_stderr,none": 0.013987721523687975}, "belebele_fra_Latn": {"alias": "belebele_fra_Latn", "acc,none": 0.22777777777777777, "acc_stderr,none": 0.013987721523687975, "acc_norm,none": 0.22777777777777777, "acc_norm_stderr,none": 0.013987721523687975}, "belebele_hin_Deva": {"alias": "belebele_hin_Deva", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884528, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884528}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.23222222222222222, "acc_stderr,none": 0.014082825936214571, "acc_norm,none": 0.23222222222222222, "acc_norm_stderr,none": 0.014082825936214571}, "belebele_jpn_Jpan": {"alias": "belebele_jpn_Jpan", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_kor_Hang": {"alias": "belebele_kor_Hang", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.01401170515888452, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.01401170515888452}, "belebele_rus_Cyrl": {"alias": "belebele_rus_Cyrl", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884504, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884504}, "belebele_spa_Latn": {"alias": "belebele_spa_Latn", "acc,none": 0.23444444444444446, "acc_stderr,none": 0.014129554968110773, "acc_norm,none": 0.23444444444444446, "acc_norm_stderr,none": 0.014129554968110773}, "belebele_swh_Latn": {"alias": "belebele_swh_Latn", "acc,none": 0.23666666666666666, "acc_stderr,none": 0.014175742474391954, "acc_norm,none": 0.23666666666666666, "acc_norm_stderr,none": 0.014175742474391954}, "belebele_tel_Telu": {"alias": "belebele_tel_Telu", "acc,none": 0.24333333333333335, "acc_stderr,none": 0.014311107963683052, "acc_norm,none": 0.24333333333333335, "acc_norm_stderr,none": 0.014311107963683052}, "belebele_tha_Thai": {"alias": "belebele_tha_Thai", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.22777777777777777, "acc_stderr,none": 0.013987721523687971, "acc_norm,none": 0.22777777777777777, "acc_norm_stderr,none": 0.013987721523687971}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.2597092212706632, "acc_stderr,none": 0.00437578899121685, "acc_norm,none": 0.2625970922127066, "acc_norm_stderr,none": 0.004391459640545319}, "include_base_44_arabic": {"acc,none": 0.2318840579710145, "acc_stderr,none": 0.017944117828314055, "alias": "include_base_44_arabic"}, "include_base_44_arabic_few_shot_og_arts_humanities": {"alias": " - include_base_44_arabic_few_shot_og_arts_humanities", "acc,none": 0.2571428571428571, "acc_stderr,none": 0.04285714285714284}, "include_base_44_arabic_few_shot_og_business_commerce": {"alias": " - include_base_44_arabic_few_shot_og_business_commerce", "acc,none": 0.18292682926829268, "acc_stderr,none": 0.04295628044484376}, "include_base_44_arabic_few_shot_og_driving_license": {"alias": " - include_base_44_arabic_few_shot_og_driving_license", "acc,none": 0.29523809523809524, "acc_stderr,none": 0.044729159560441434}, "include_base_44_arabic_few_shot_og_general_knowledge": {"alias": " - include_base_44_arabic_few_shot_og_general_knowledge", "acc,none": 0.2571428571428571, "acc_stderr,none": 0.04285714285714284}, "include_base_44_arabic_few_shot_og_social_science": {"alias": " - include_base_44_arabic_few_shot_og_social_science", "acc,none": 0.1619047619047619, "acc_stderr,none": 0.036121005232589175}, "include_base_44_arabic_few_shot_og_stem": {"alias": " - include_base_44_arabic_few_shot_og_stem", "acc,none": 0.22, "acc_stderr,none": 0.05917804336345138}, "include_base_44_bengali": {"acc,none": 0.2645985401459854, "acc_stderr,none": 0.018872167190338976, "alias": "include_base_44_bengali"}, "include_base_44_bengali_few_shot_og_arts_humanities": {"alias": " - include_base_44_bengali_few_shot_og_arts_humanities", "acc,none": 0.2891566265060241, "acc_stderr,none": 0.03529486801511116}, "include_base_44_bengali_few_shot_og_general_knowledge": {"alias": " - include_base_44_bengali_few_shot_og_general_knowledge", "acc,none": 0.26506024096385544, "acc_stderr,none": 0.03436024037944967}, "include_base_44_bengali_few_shot_og_professional_certification": {"alias": " - include_base_44_bengali_few_shot_og_professional_certification", "acc,none": 0.26506024096385544, "acc_stderr,none": 0.03436024037944967}, "include_base_44_bengali_few_shot_og_stem": {"alias": " - include_base_44_bengali_few_shot_og_stem", "acc,none": 0.18, "acc_stderr,none": 0.0548839220351387}, "include_base_44_chinese": {"acc,none": 0.24587155963302754, "acc_stderr,none": 0.018517539247711934, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.049938324531150324}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.25287356321839083, "acc_stderr,none": 0.04687049503854671}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.323943661971831, "acc_stderr,none": 0.05593416612923642}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.23943661971830985, "acc_stderr,none": 0.05100514327793383}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.2413793103448276, "acc_stderr,none": 0.04614377668264891}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.051995626882956975}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.19718309859154928, "acc_stderr,none": 0.04755476905953276}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.1875, "acc_stderr,none": 0.10077822185373188}, "include_base_44_french": {"acc,none": 0.2553699284009546, "acc_stderr,none": 0.021249499218328364, "alias": "include_base_44_french"}, "include_base_44_french_few_shot_og_arts_humanities": {"alias": " - include_base_44_french_few_shot_og_arts_humanities", "acc,none": 0.22932330827067668, "acc_stderr,none": 0.02582481829225878}, "include_base_44_french_few_shot_og_driving_license": {"alias": " - include_base_44_french_few_shot_og_driving_license", "acc,none": 0.40425531914893614, "acc_stderr,none": 0.07235674844413013}, "include_base_44_french_few_shot_og_health_oriented_education": {"alias": " - include_base_44_french_few_shot_og_health_oriented_education", "acc,none": 0.1875, "acc_stderr,none": 0.10077822185373188}, "include_base_44_french_few_shot_og_social_science": {"alias": " - include_base_44_french_few_shot_og_social_science", "acc,none": 0.25675675675675674, "acc_stderr,none": 0.051128728837659454}, "include_base_44_french_few_shot_og_stem": {"alias": " - include_base_44_french_few_shot_og_stem", "acc,none": 0.3125, "acc_stderr,none": 0.11967838846954226}, "include_base_44_german": {"acc,none": 0.30935251798561153, "acc_stderr,none": 0.03907965690081692, "alias": "include_base_44_german"}, "include_base_44_german_few_shot_og_driving_license": {"alias": " - include_base_44_german_few_shot_og_driving_license", "acc,none": 0.4782608695652174, "acc_stderr,none": 0.10649955403405124}, "include_base_44_german_few_shot_og_social_science": {"alias": " - include_base_44_german_few_shot_og_social_science", "acc,none": 0.2857142857142857, "acc_stderr,none": 0.047619047619047596}, "include_base_44_german_few_shot_og_stem": {"alias": " - include_base_44_german_few_shot_og_stem", "acc,none": 0.24, "acc_stderr,none": 0.08717797887081347}, "include_base_44_hindi": {"acc,none": 0.23948811700182815, "acc_stderr,none": 0.01830198403573232, "alias": "include_base_44_hindi"}, "include_base_44_hindi_few_shot_og_applied_science": {"alias": " - include_base_44_hindi_few_shot_og_applied_science", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295698}, "include_base_44_hindi_few_shot_og_arts_humanities": {"alias": " - include_base_44_hindi_few_shot_og_arts_humanities", "acc,none": 0.17708333333333334, "acc_stderr,none": 0.03916561682056947}, "include_base_44_hindi_few_shot_og_driving_license": {"alias": " - include_base_44_hindi_few_shot_og_driving_license", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.04993832453115031}, "include_base_44_hindi_few_shot_og_general_knowledge": {"alias": " - include_base_44_hindi_few_shot_og_general_knowledge", "acc,none": 0.29577464788732394, "acc_stderr,none": 0.05454906121418899}, "include_base_44_hindi_few_shot_og_health_oriented_education": {"alias": " - include_base_44_hindi_few_shot_og_health_oriented_education", "acc,none": 0.2676056338028169, "acc_stderr,none": 0.05291406220869697}, "include_base_44_hindi_few_shot_og_professional_certification": {"alias": " - include_base_44_hindi_few_shot_og_professional_certification", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295697}, "include_base_44_hindi_few_shot_og_social_science": {"alias": " - include_base_44_hindi_few_shot_og_social_science", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295697}, "include_base_44_hindi_few_shot_og_stem": {"alias": " - include_base_44_hindi_few_shot_og_stem", "acc,none": 0.16, "acc_stderr,none": 0.0748331477354788}, "include_base_44_italian": {"acc,none": 0.2645985401459854, "acc_stderr,none": 0.01881981771869235, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.14285714285714285, "acc_stderr,none": 0.06001200360120039}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.30538922155688625, "acc_stderr,none": 0.03574732654202544}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.25, "acc_stderr,none": 0.1305582419667734}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.3032258064516129, "acc_stderr,none": 0.03703980981843178}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.2155688622754491, "acc_stderr,none": 0.031916582528798025}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.25, "acc_stderr,none": 0.1305582419667734}, "include_base_44_japanese": {"acc,none": 0.2634730538922156, "acc_stderr,none": 0.01972538101830729, "alias": "include_base_44_japanese"}, "include_base_44_japanese_few_shot_og_driving_license": {"alias": " - include_base_44_japanese_few_shot_og_driving_license", "acc,none": 0.24242424242424243, "acc_stderr,none": 0.04329004329004327}, "include_base_44_japanese_few_shot_og_medical_license": {"alias": " - include_base_44_japanese_few_shot_og_medical_license", "acc,none": 0.2835820895522388, "acc_stderr,none": 0.03187187537919797}, "include_base_44_japanese_few_shot_og_professional_certification": {"alias": " - include_base_44_japanese_few_shot_og_professional_certification", "acc,none": 0.2537313432835821, "acc_stderr,none": 0.030769444967296024}, "include_base_44_korean": {"acc,none": 0.234, "acc_stderr,none": 0.01896985560532149, "alias": "include_base_44_korean"}, "include_base_44_korean_few_shot_og_professional_certification": {"alias": " - include_base_44_korean_few_shot_og_professional_certification", "acc,none": 0.24, "acc_stderr,none": 0.027065293652239003}, "include_base_44_korean_few_shot_og_social_science": {"alias": " - include_base_44_korean_few_shot_og_social_science", "acc,none": 0.228, "acc_stderr,none": 0.026587432487268487}, "include_base_44_russian": {"acc,none": 0.26811594202898553, "acc_stderr,none": 0.018703757572449183, "alias": "include_base_44_russian"}, "include_base_44_russian_few_shot_og_applied_science": {"alias": " - include_base_44_russian_few_shot_og_applied_science", "acc,none": 0.2028985507246377, "acc_stderr,none": 0.04876877147472662}, "include_base_44_russian_few_shot_og_arts_humanities": {"alias": " - include_base_44_russian_few_shot_og_arts_humanities", "acc,none": 0.2028985507246377, "acc_stderr,none": 0.0487687714747266}, "include_base_44_russian_few_shot_og_business_commerce": {"alias": " - include_base_44_russian_few_shot_og_business_commerce", "acc,none": 0.2608695652173913, "acc_stderr,none": 0.05324977701702559}, "include_base_44_russian_few_shot_og_driving_license": {"alias": " - include_base_44_russian_few_shot_og_driving_license", "acc,none": 0.21428571428571427, "acc_stderr,none": 0.07896725691322383}, "include_base_44_russian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_russian_few_shot_og_health_oriented_education", "acc,none": 0.38823529411764707, "acc_stderr,none": 0.05317409082203827}, "include_base_44_russian_few_shot_og_marine_license": {"alias": " - include_base_44_russian_few_shot_og_marine_license", "acc,none": 0.37681159420289856, "acc_stderr,none": 0.05876481248527006}, "include_base_44_russian_few_shot_og_social_science": {"alias": " - include_base_44_russian_few_shot_og_social_science", "acc,none": 0.2753623188405797, "acc_stderr,none": 0.05416992765191318}, "include_base_44_russian_few_shot_og_stem": {"alias": " - include_base_44_russian_few_shot_og_stem", "acc,none": 0.19148936170212766, "acc_stderr,none": 0.040801273706799875}, "include_base_44_spanish": {"acc,none": 0.2672727272727273, "acc_stderr,none": 0.018850411012442, "alias": "include_base_44_spanish"}, "include_base_44_spanish_few_shot_og_arts_humanities": {"alias": " - include_base_44_spanish_few_shot_og_arts_humanities", "acc,none": 0.228, "acc_stderr,none": 0.026587432487268466}, "include_base_44_spanish_few_shot_og_health_oriented_education": {"alias": " - include_base_44_spanish_few_shot_og_health_oriented_education", "acc,none": 0.32, "acc_stderr,none": 0.09521904571390466}, "include_base_44_spanish_few_shot_og_social_science": {"alias": " - include_base_44_spanish_few_shot_og_social_science", "acc,none": 0.288, "acc_stderr,none": 0.028697004587398222}, "include_base_44_spanish_few_shot_og_stem": {"alias": " - include_base_44_spanish_few_shot_og_stem", "acc,none": 0.4, "acc_stderr,none": 0.1}, "include_base_44_telugu": {"acc,none": 0.2427007299270073, "acc_stderr,none": 0.018374454804188624, "alias": "include_base_44_telugu"}, "include_base_44_telugu_few_shot_og_applied_science": {"alias": " - include_base_44_telugu_few_shot_og_applied_science", "acc,none": 0.23493975903614459, "acc_stderr,none": 0.03300533186128922}, "include_base_44_telugu_few_shot_og_arts_humanities": {"alias": " - include_base_44_telugu_few_shot_og_arts_humanities", "acc,none": 0.2356020942408377, "acc_stderr,none": 0.030787364755364144}, "include_base_44_telugu_few_shot_og_social_science": {"alias": " - include_base_44_telugu_few_shot_og_social_science", "acc,none": 0.25301204819277107, "acc_stderr,none": 0.03384429155233133}, "include_base_44_telugu_few_shot_og_stem": {"alias": " - include_base_44_telugu_few_shot_og_stem", "acc,none": 0.28, "acc_stderr,none": 0.09165151389911681}, "include_base_44_turkish": {"acc,none": 0.29014598540145986, "acc_stderr,none": 0.019405491825144058, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.3192771084337349, "acc_stderr,none": 0.036293353299478595}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.25903614457831325, "acc_stderr,none": 0.034106466140718564}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.2710843373493976, "acc_stderr,none": 0.03460579907553026}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.36, "acc_stderr,none": 0.06857142857142856}, "piqa": {"alias": "piqa", "acc,none": 0.5239390642002176, "acc_stderr,none": 0.01165244562107926, "acc_norm,none": 0.5032644178454843, "acc_norm_stderr,none": 0.01166557553076037}, "xnli_ar": {"alias": "xnli_ar", "acc,none": 0.3333333333333333, "acc_stderr,none": 0.009448900914617617}, "xnli_de": {"alias": "xnli_de", "acc,none": 0.336144578313253, "acc_stderr,none": 0.009468634669293527}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.327710843373494, "acc_stderr,none": 0.009408296828870675}, "xnli_es": {"alias": "xnli_es", "acc,none": 0.334136546184739, "acc_stderr,none": 0.009454577602463623}, "xnli_fr": {"alias": "xnli_fr", "acc,none": 0.3357429718875502, "acc_stderr,none": 0.009465838617337349}, "xnli_hi": {"alias": "xnli_hi", "acc,none": 0.3333333333333333, "acc_stderr,none": 0.009448900914617609}, "xnli_ru": {"alias": "xnli_ru", "acc,none": 0.3401606425702811, "acc_stderr,none": 0.009496174608136407}, "xnli_sw": {"alias": "xnli_sw", "acc,none": 0.3317269076305221, "acc_stderr,none": 0.009437454900329115}, "xnli_th": {"alias": "xnli_th", "acc,none": 0.3309236947791165, "acc_stderr,none": 0.0094316854614633}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.3329317269076305, "acc_stderr,none": 0.009446051001358225}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.3373493975903614, "acc_stderr,none": 0.009476976849778591}} +{"created_at": "2025-06-27T00:48:58.333498", "global_step": 17500, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.19880546075085323, "acc_stderr,none": 0.011662850198175543, "acc_norm,none": 0.24744027303754265, "acc_norm_stderr,none": 0.01261035266329267}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.26641414141414144, "acc_stderr,none": 0.009071357971078685, "acc_norm,none": 0.26936026936026936, "acc_norm_stderr,none": 0.00910304320775699}, "belebele_arb_Arab": {"alias": "belebele_arb_Arab", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_ben_Beng": {"alias": "belebele_ben_Beng", "acc,none": 0.23222222222222222, "acc_stderr,none": 0.014082825936214571, "acc_norm,none": 0.23222222222222222, "acc_norm_stderr,none": 0.014082825936214571}, "belebele_deu_Latn": {"alias": "belebele_deu_Latn", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884509, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884509}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.2311111111111111, "acc_stderr,none": 0.0140592566632182, "acc_norm,none": 0.2311111111111111, "acc_norm_stderr,none": 0.0140592566632182}, "belebele_fra_Latn": {"alias": "belebele_fra_Latn", "acc,none": 0.23, "acc_stderr,none": 0.014035549969945592, "acc_norm,none": 0.23, "acc_norm_stderr,none": 0.014035549969945592}, "belebele_hin_Deva": {"alias": "belebele_hin_Deva", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884528, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884528}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884537, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884537}, "belebele_jpn_Jpan": {"alias": "belebele_jpn_Jpan", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_kor_Hang": {"alias": "belebele_kor_Hang", "acc,none": 0.23, "acc_stderr,none": 0.014035549969945584, "acc_norm,none": 0.23, "acc_norm_stderr,none": 0.014035549969945584}, "belebele_rus_Cyrl": {"alias": "belebele_rus_Cyrl", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884504, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884504}, "belebele_spa_Latn": {"alias": "belebele_spa_Latn", "acc,none": 0.23333333333333334, "acc_stderr,none": 0.014106258477755672, "acc_norm,none": 0.23333333333333334, "acc_norm_stderr,none": 0.014106258477755672}, "belebele_swh_Latn": {"alias": "belebele_swh_Latn", "acc,none": 0.23666666666666666, "acc_stderr,none": 0.014175742474391956, "acc_norm,none": 0.23666666666666666, "acc_norm_stderr,none": 0.014175742474391956}, "belebele_tel_Telu": {"alias": "belebele_tel_Telu", "acc,none": 0.23555555555555555, "acc_stderr,none": 0.014152716079132648, "acc_norm,none": 0.23555555555555555, "acc_norm_stderr,none": 0.014152716079132648}, "belebele_tha_Thai": {"alias": "belebele_tha_Thai", "acc,none": 0.2311111111111111, "acc_stderr,none": 0.014059256663218198, "acc_norm,none": 0.2311111111111111, "acc_norm_stderr,none": 0.014059256663218198}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.22777777777777777, "acc_stderr,none": 0.013987721523687971, "acc_norm,none": 0.22777777777777777, "acc_norm_stderr,none": 0.013987721523687971}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.23, "acc_stderr,none": 0.014035549969945592, "acc_norm,none": 0.23, "acc_norm_stderr,none": 0.014035549969945592}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.26020713005377416, "acc_stderr,none": 0.004378508362084369, "acc_norm,none": 0.2614021111332404, "acc_norm_stderr,none": 0.004385004998923461}, "include_base_44_arabic": {"acc,none": 0.2210144927536232, "acc_stderr,none": 0.017609778050480352, "alias": "include_base_44_arabic"}, "include_base_44_arabic_few_shot_og_arts_humanities": {"alias": " - include_base_44_arabic_few_shot_og_arts_humanities", "acc,none": 0.24761904761904763, "acc_stderr,none": 0.04232473532055043}, "include_base_44_arabic_few_shot_og_business_commerce": {"alias": " - include_base_44_arabic_few_shot_og_business_commerce", "acc,none": 0.14634146341463414, "acc_stderr,none": 0.03927202370241042}, "include_base_44_arabic_few_shot_og_driving_license": {"alias": " - include_base_44_arabic_few_shot_og_driving_license", "acc,none": 0.29523809523809524, "acc_stderr,none": 0.044729159560441434}, "include_base_44_arabic_few_shot_og_general_knowledge": {"alias": " - include_base_44_arabic_few_shot_og_general_knowledge", "acc,none": 0.24761904761904763, "acc_stderr,none": 0.0423247353205504}, "include_base_44_arabic_few_shot_og_social_science": {"alias": " - include_base_44_arabic_few_shot_og_social_science", "acc,none": 0.1619047619047619, "acc_stderr,none": 0.036121005232589175}, "include_base_44_arabic_few_shot_og_stem": {"alias": " - include_base_44_arabic_few_shot_og_stem", "acc,none": 0.2, "acc_stderr,none": 0.057142857142857134}, "include_base_44_bengali": {"acc,none": 0.28102189781021897, "acc_stderr,none": 0.019271719857565255, "alias": "include_base_44_bengali"}, "include_base_44_bengali_few_shot_og_arts_humanities": {"alias": " - include_base_44_bengali_few_shot_og_arts_humanities", "acc,none": 0.27710843373493976, "acc_stderr,none": 0.034843315926805875}, "include_base_44_bengali_few_shot_og_general_knowledge": {"alias": " - include_base_44_bengali_few_shot_og_general_knowledge", "acc,none": 0.28313253012048195, "acc_stderr,none": 0.03507295431370519}, "include_base_44_bengali_few_shot_og_professional_certification": {"alias": " - include_base_44_bengali_few_shot_og_professional_certification", "acc,none": 0.28313253012048195, "acc_stderr,none": 0.03507295431370519}, "include_base_44_bengali_few_shot_og_stem": {"alias": " - include_base_44_bengali_few_shot_og_stem", "acc,none": 0.28, "acc_stderr,none": 0.06414269805898185}, "include_base_44_chinese": {"acc,none": 0.24587155963302754, "acc_stderr,none": 0.018517539247711934, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.049938324531150324}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.25287356321839083, "acc_stderr,none": 0.04687049503854671}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.323943661971831, "acc_stderr,none": 0.05593416612923642}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.23943661971830985, "acc_stderr,none": 0.05100514327793383}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.2413793103448276, "acc_stderr,none": 0.04614377668264891}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.051995626882956975}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.19718309859154928, "acc_stderr,none": 0.04755476905953276}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.1875, "acc_stderr,none": 0.10077822185373188}, "include_base_44_french": {"acc,none": 0.2529832935560859, "acc_stderr,none": 0.02122572454377134, "alias": "include_base_44_french"}, "include_base_44_french_few_shot_og_arts_humanities": {"alias": " - include_base_44_french_few_shot_og_arts_humanities", "acc,none": 0.23308270676691728, "acc_stderr,none": 0.025972057450276594}, "include_base_44_french_few_shot_og_driving_license": {"alias": " - include_base_44_french_few_shot_og_driving_license", "acc,none": 0.3829787234042553, "acc_stderr,none": 0.07167347772513392}, "include_base_44_french_few_shot_og_health_oriented_education": {"alias": " - include_base_44_french_few_shot_og_health_oriented_education", "acc,none": 0.1875, "acc_stderr,none": 0.10077822185373188}, "include_base_44_french_few_shot_og_social_science": {"alias": " - include_base_44_french_few_shot_og_social_science", "acc,none": 0.24324324324324326, "acc_stderr,none": 0.050215421942053994}, "include_base_44_french_few_shot_og_stem": {"alias": " - include_base_44_french_few_shot_og_stem", "acc,none": 0.3125, "acc_stderr,none": 0.11967838846954226}, "include_base_44_german": {"acc,none": 0.28776978417266186, "acc_stderr,none": 0.038131024267939156, "alias": "include_base_44_german"}, "include_base_44_german_few_shot_og_driving_license": {"alias": " - include_base_44_german_few_shot_og_driving_license", "acc,none": 0.4782608695652174, "acc_stderr,none": 0.10649955403405122}, "include_base_44_german_few_shot_og_social_science": {"alias": " - include_base_44_german_few_shot_og_social_science", "acc,none": 0.25274725274725274, "acc_stderr,none": 0.045809518537328904}, "include_base_44_german_few_shot_og_stem": {"alias": " - include_base_44_german_few_shot_og_stem", "acc,none": 0.24, "acc_stderr,none": 0.08717797887081347}, "include_base_44_hindi": {"acc,none": 0.23948811700182815, "acc_stderr,none": 0.01830198403573232, "alias": "include_base_44_hindi"}, "include_base_44_hindi_few_shot_og_applied_science": {"alias": " - include_base_44_hindi_few_shot_og_applied_science", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295698}, "include_base_44_hindi_few_shot_og_arts_humanities": {"alias": " - include_base_44_hindi_few_shot_og_arts_humanities", "acc,none": 0.17708333333333334, "acc_stderr,none": 0.03916561682056947}, "include_base_44_hindi_few_shot_og_driving_license": {"alias": " - include_base_44_hindi_few_shot_og_driving_license", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.04993832453115031}, "include_base_44_hindi_few_shot_og_general_knowledge": {"alias": " - include_base_44_hindi_few_shot_og_general_knowledge", "acc,none": 0.29577464788732394, "acc_stderr,none": 0.05454906121418899}, "include_base_44_hindi_few_shot_og_health_oriented_education": {"alias": " - include_base_44_hindi_few_shot_og_health_oriented_education", "acc,none": 0.2676056338028169, "acc_stderr,none": 0.05291406220869697}, "include_base_44_hindi_few_shot_og_professional_certification": {"alias": " - include_base_44_hindi_few_shot_og_professional_certification", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295697}, "include_base_44_hindi_few_shot_og_social_science": {"alias": " - include_base_44_hindi_few_shot_og_social_science", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295697}, "include_base_44_hindi_few_shot_og_stem": {"alias": " - include_base_44_hindi_few_shot_og_stem", "acc,none": 0.16, "acc_stderr,none": 0.0748331477354788}, "include_base_44_italian": {"acc,none": 0.26824817518248173, "acc_stderr,none": 0.018904600559212635, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.14285714285714285, "acc_stderr,none": 0.06001200360120039}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.30538922155688625, "acc_stderr,none": 0.03574732654202544}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.25, "acc_stderr,none": 0.1305582419667734}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.3096774193548387, "acc_stderr,none": 0.0372580781179416}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.2215568862275449, "acc_stderr,none": 0.03223309610157498}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.25, "acc_stderr,none": 0.1305582419667734}, "include_base_44_japanese": {"acc,none": 0.2634730538922156, "acc_stderr,none": 0.01972538101830729, "alias": "include_base_44_japanese"}, "include_base_44_japanese_few_shot_og_driving_license": {"alias": " - include_base_44_japanese_few_shot_og_driving_license", "acc,none": 0.24242424242424243, "acc_stderr,none": 0.04329004329004327}, "include_base_44_japanese_few_shot_og_medical_license": {"alias": " - include_base_44_japanese_few_shot_og_medical_license", "acc,none": 0.2835820895522388, "acc_stderr,none": 0.03187187537919797}, "include_base_44_japanese_few_shot_og_professional_certification": {"alias": " - include_base_44_japanese_few_shot_og_professional_certification", "acc,none": 0.2537313432835821, "acc_stderr,none": 0.030769444967296024}, "include_base_44_korean": {"acc,none": 0.234, "acc_stderr,none": 0.01896985560532149, "alias": "include_base_44_korean"}, "include_base_44_korean_few_shot_og_professional_certification": {"alias": " - include_base_44_korean_few_shot_og_professional_certification", "acc,none": 0.24, "acc_stderr,none": 0.027065293652239003}, "include_base_44_korean_few_shot_og_social_science": {"alias": " - include_base_44_korean_few_shot_og_social_science", "acc,none": 0.228, "acc_stderr,none": 0.026587432487268487}, "include_base_44_russian": {"acc,none": 0.266304347826087, "acc_stderr,none": 0.018662425786691965, "alias": "include_base_44_russian"}, "include_base_44_russian_few_shot_og_applied_science": {"alias": " - include_base_44_russian_few_shot_og_applied_science", "acc,none": 0.2028985507246377, "acc_stderr,none": 0.04876877147472662}, "include_base_44_russian_few_shot_og_arts_humanities": {"alias": " - include_base_44_russian_few_shot_og_arts_humanities", "acc,none": 0.2028985507246377, "acc_stderr,none": 0.0487687714747266}, "include_base_44_russian_few_shot_og_business_commerce": {"alias": " - include_base_44_russian_few_shot_og_business_commerce", "acc,none": 0.2608695652173913, "acc_stderr,none": 0.05324977701702559}, "include_base_44_russian_few_shot_og_driving_license": {"alias": " - include_base_44_russian_few_shot_og_driving_license", "acc,none": 0.21428571428571427, "acc_stderr,none": 0.07896725691322383}, "include_base_44_russian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_russian_few_shot_og_health_oriented_education", "acc,none": 0.38823529411764707, "acc_stderr,none": 0.05317409082203827}, "include_base_44_russian_few_shot_og_marine_license": {"alias": " - include_base_44_russian_few_shot_og_marine_license", "acc,none": 0.37681159420289856, "acc_stderr,none": 0.05876481248527006}, "include_base_44_russian_few_shot_og_social_science": {"alias": " - include_base_44_russian_few_shot_og_social_science", "acc,none": 0.2608695652173913, "acc_stderr,none": 0.05324977701702559}, "include_base_44_russian_few_shot_og_stem": {"alias": " - include_base_44_russian_few_shot_og_stem", "acc,none": 0.19148936170212766, "acc_stderr,none": 0.040801273706799875}, "include_base_44_spanish": {"acc,none": 0.2672727272727273, "acc_stderr,none": 0.018850411012442, "alias": "include_base_44_spanish"}, "include_base_44_spanish_few_shot_og_arts_humanities": {"alias": " - include_base_44_spanish_few_shot_og_arts_humanities", "acc,none": 0.228, "acc_stderr,none": 0.026587432487268466}, "include_base_44_spanish_few_shot_og_health_oriented_education": {"alias": " - include_base_44_spanish_few_shot_og_health_oriented_education", "acc,none": 0.32, "acc_stderr,none": 0.09521904571390466}, "include_base_44_spanish_few_shot_og_social_science": {"alias": " - include_base_44_spanish_few_shot_og_social_science", "acc,none": 0.288, "acc_stderr,none": 0.028697004587398222}, "include_base_44_spanish_few_shot_og_stem": {"alias": " - include_base_44_spanish_few_shot_og_stem", "acc,none": 0.4, "acc_stderr,none": 0.1}, "include_base_44_telugu": {"acc,none": 0.23905109489051096, "acc_stderr,none": 0.01823050292016047, "alias": "include_base_44_telugu"}, "include_base_44_telugu_few_shot_og_applied_science": {"alias": " - include_base_44_telugu_few_shot_og_applied_science", "acc,none": 0.21084337349397592, "acc_stderr,none": 0.03175554786629921}, "include_base_44_telugu_few_shot_og_arts_humanities": {"alias": " - include_base_44_telugu_few_shot_og_arts_humanities", "acc,none": 0.2198952879581152, "acc_stderr,none": 0.030047449343709728}, "include_base_44_telugu_few_shot_og_social_science": {"alias": " - include_base_44_telugu_few_shot_og_social_science", "acc,none": 0.27710843373493976, "acc_stderr,none": 0.03484331592680589}, "include_base_44_telugu_few_shot_og_stem": {"alias": " - include_base_44_telugu_few_shot_og_stem", "acc,none": 0.32, "acc_stderr,none": 0.09521904571390465}, "include_base_44_turkish": {"acc,none": 0.291970802919708, "acc_stderr,none": 0.019446580804457603, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.3192771084337349, "acc_stderr,none": 0.036293353299478595}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.26506024096385544, "acc_stderr,none": 0.03436024037944966}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.2710843373493976, "acc_stderr,none": 0.03460579907553026}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.36, "acc_stderr,none": 0.06857142857142856}, "piqa": {"alias": "piqa", "acc,none": 0.5168661588683352, "acc_stderr,none": 0.011659185184878915, "acc_norm,none": 0.499455930359086, "acc_norm_stderr,none": 0.011665817258899173}, "xnli_ar": {"alias": "xnli_ar", "acc,none": 0.3309236947791165, "acc_stderr,none": 0.009431685461463299}, "xnli_de": {"alias": "xnli_de", "acc,none": 0.3337349397590361, "acc_stderr,none": 0.009451743112667057}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.3285140562248996, "acc_stderr,none": 0.009414190734131751}, "xnli_es": {"alias": "xnli_es", "acc,none": 0.3313253012048193, "acc_stderr,none": 0.009434574056101966}, "xnli_fr": {"alias": "xnli_fr", "acc,none": 0.3313253012048193, "acc_stderr,none": 0.009434574056101966}, "xnli_hi": {"alias": "xnli_hi", "acc,none": 0.3269076305220884, "acc_stderr,none": 0.009402371661447662}, "xnli_ru": {"alias": "xnli_ru", "acc,none": 0.3345381526104418, "acc_stderr,none": 0.009457404390939166}, "xnli_sw": {"alias": "xnli_sw", "acc,none": 0.3309236947791165, "acc_stderr,none": 0.009431685461463304}, "xnli_th": {"alias": "xnli_th", "acc,none": 0.3196787148594378, "acc_stderr,none": 0.009347628444867178}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.3345381526104418, "acc_stderr,none": 0.009457404390939166}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.3325301204819277, "acc_stderr,none": 0.009443193365903345}} +{"created_at": "2025-06-27T01:13:37.201871", "global_step": 18000, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.19965870307167236, "acc_stderr,none": 0.01168162575688869, "acc_norm,none": 0.24658703071672355, "acc_norm_stderr,none": 0.01259572626879013}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.2689393939393939, "acc_stderr,none": 0.00909854809300918, "acc_norm,none": 0.265993265993266, "acc_norm_stderr,none": 0.009066789565615694}, "belebele_arb_Arab": {"alias": "belebele_arb_Arab", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_ben_Beng": {"alias": "belebele_ben_Beng", "acc,none": 0.22777777777777777, "acc_stderr,none": 0.013987721523687945, "acc_norm,none": 0.22777777777777777, "acc_norm_stderr,none": 0.013987721523687945}, "belebele_deu_Latn": {"alias": "belebele_deu_Latn", "acc,none": 0.22555555555555556, "acc_stderr,none": 0.013939334910458153, "acc_norm,none": 0.22555555555555556, "acc_norm_stderr,none": 0.013939334910458153}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_fra_Latn": {"alias": "belebele_fra_Latn", "acc,none": 0.22777777777777777, "acc_stderr,none": 0.013987721523687973, "acc_norm,none": 0.22777777777777777, "acc_norm_stderr,none": 0.013987721523687973}, "belebele_hin_Deva": {"alias": "belebele_hin_Deva", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884528, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884528}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.23, "acc_stderr,none": 0.014035549969945584, "acc_norm,none": 0.23, "acc_norm_stderr,none": 0.014035549969945584}, "belebele_jpn_Jpan": {"alias": "belebele_jpn_Jpan", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_kor_Hang": {"alias": "belebele_kor_Hang", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.01401170515888452, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.01401170515888452}, "belebele_rus_Cyrl": {"alias": "belebele_rus_Cyrl", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884504, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884504}, "belebele_spa_Latn": {"alias": "belebele_spa_Latn", "acc,none": 0.22777777777777777, "acc_stderr,none": 0.013987721523687971, "acc_norm,none": 0.22777777777777777, "acc_norm_stderr,none": 0.013987721523687971}, "belebele_swh_Latn": {"alias": "belebele_swh_Latn", "acc,none": 0.23555555555555555, "acc_stderr,none": 0.014152716079132653, "acc_norm,none": 0.23555555555555555, "acc_norm_stderr,none": 0.014152716079132653}, "belebele_tel_Telu": {"alias": "belebele_tel_Telu", "acc,none": 0.22777777777777777, "acc_stderr,none": 0.013987721523687966, "acc_norm,none": 0.22777777777777777, "acc_norm_stderr,none": 0.013987721523687966}, "belebele_tha_Thai": {"alias": "belebele_tha_Thai", "acc,none": 0.2311111111111111, "acc_stderr,none": 0.014059256663218197, "acc_norm,none": 0.2311111111111111, "acc_norm_stderr,none": 0.014059256663218197}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.22666666666666666, "acc_stderr,none": 0.013963598349030493, "acc_norm,none": 0.22666666666666666, "acc_norm_stderr,none": 0.013963598349030493}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.25941047600079664, "acc_stderr,none": 0.004374153847826759, "acc_norm,none": 0.26180043815972914, "acc_norm_stderr,none": 0.00438716120308796}, "include_base_44_arabic": {"acc,none": 0.22282608695652173, "acc_stderr,none": 0.01767514227930449, "alias": "include_base_44_arabic"}, "include_base_44_arabic_few_shot_og_arts_humanities": {"alias": " - include_base_44_arabic_few_shot_og_arts_humanities", "acc,none": 0.24761904761904763, "acc_stderr,none": 0.04232473532055043}, "include_base_44_arabic_few_shot_og_business_commerce": {"alias": " - include_base_44_arabic_few_shot_og_business_commerce", "acc,none": 0.15853658536585366, "acc_stderr,none": 0.040582599273365695}, "include_base_44_arabic_few_shot_og_driving_license": {"alias": " - include_base_44_arabic_few_shot_og_driving_license", "acc,none": 0.29523809523809524, "acc_stderr,none": 0.044729159560441434}, "include_base_44_arabic_few_shot_og_general_knowledge": {"alias": " - include_base_44_arabic_few_shot_og_general_knowledge", "acc,none": 0.24761904761904763, "acc_stderr,none": 0.0423247353205504}, "include_base_44_arabic_few_shot_og_social_science": {"alias": " - include_base_44_arabic_few_shot_og_social_science", "acc,none": 0.1619047619047619, "acc_stderr,none": 0.036121005232589175}, "include_base_44_arabic_few_shot_og_stem": {"alias": " - include_base_44_arabic_few_shot_og_stem", "acc,none": 0.2, "acc_stderr,none": 0.057142857142857134}, "include_base_44_bengali": {"acc,none": 0.27007299270072993, "acc_stderr,none": 0.01894507336780089, "alias": "include_base_44_bengali"}, "include_base_44_bengali_few_shot_og_arts_humanities": {"alias": " - include_base_44_bengali_few_shot_og_arts_humanities", "acc,none": 0.28313253012048195, "acc_stderr,none": 0.03507295431370518}, "include_base_44_bengali_few_shot_og_general_knowledge": {"alias": " - include_base_44_bengali_few_shot_og_general_knowledge", "acc,none": 0.3132530120481928, "acc_stderr,none": 0.03610805018031023}, "include_base_44_bengali_few_shot_og_professional_certification": {"alias": " - include_base_44_bengali_few_shot_og_professional_certification", "acc,none": 0.2469879518072289, "acc_stderr,none": 0.03357351982064536}, "include_base_44_bengali_few_shot_og_stem": {"alias": " - include_base_44_bengali_few_shot_og_stem", "acc,none": 0.16, "acc_stderr,none": 0.05237229365663814}, "include_base_44_chinese": {"acc,none": 0.24587155963302754, "acc_stderr,none": 0.018517539247711934, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.049938324531150324}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.25287356321839083, "acc_stderr,none": 0.04687049503854671}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.323943661971831, "acc_stderr,none": 0.05593416612923642}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.23943661971830985, "acc_stderr,none": 0.05100514327793383}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.2413793103448276, "acc_stderr,none": 0.04614377668264891}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.051995626882956975}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.19718309859154928, "acc_stderr,none": 0.04755476905953276}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.1875, "acc_stderr,none": 0.10077822185373188}, "include_base_44_french": {"acc,none": 0.2529832935560859, "acc_stderr,none": 0.021175427759512762, "alias": "include_base_44_french"}, "include_base_44_french_few_shot_og_arts_humanities": {"alias": " - include_base_44_french_few_shot_og_arts_humanities", "acc,none": 0.22556390977443608, "acc_stderr,none": 0.025674657674902242}, "include_base_44_french_few_shot_og_driving_license": {"alias": " - include_base_44_french_few_shot_og_driving_license", "acc,none": 0.40425531914893614, "acc_stderr,none": 0.07235674844413013}, "include_base_44_french_few_shot_og_health_oriented_education": {"alias": " - include_base_44_french_few_shot_og_health_oriented_education", "acc,none": 0.1875, "acc_stderr,none": 0.10077822185373188}, "include_base_44_french_few_shot_og_social_science": {"alias": " - include_base_44_french_few_shot_og_social_science", "acc,none": 0.25675675675675674, "acc_stderr,none": 0.051128728837659454}, "include_base_44_french_few_shot_og_stem": {"alias": " - include_base_44_french_few_shot_og_stem", "acc,none": 0.3125, "acc_stderr,none": 0.11967838846954226}, "include_base_44_german": {"acc,none": 0.30935251798561153, "acc_stderr,none": 0.03898314836059536, "alias": "include_base_44_german"}, "include_base_44_german_few_shot_og_driving_license": {"alias": " - include_base_44_german_few_shot_og_driving_license", "acc,none": 0.4782608695652174, "acc_stderr,none": 0.10649955403405124}, "include_base_44_german_few_shot_og_social_science": {"alias": " - include_base_44_german_few_shot_og_social_science", "acc,none": 0.2967032967032967, "acc_stderr,none": 0.04815143362682777}, "include_base_44_german_few_shot_og_stem": {"alias": " - include_base_44_german_few_shot_og_stem", "acc,none": 0.2, "acc_stderr,none": 0.08164965809277261}, "include_base_44_hindi": {"acc,none": 0.23948811700182815, "acc_stderr,none": 0.01830198403573232, "alias": "include_base_44_hindi"}, "include_base_44_hindi_few_shot_og_applied_science": {"alias": " - include_base_44_hindi_few_shot_og_applied_science", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295698}, "include_base_44_hindi_few_shot_og_arts_humanities": {"alias": " - include_base_44_hindi_few_shot_og_arts_humanities", "acc,none": 0.17708333333333334, "acc_stderr,none": 0.03916561682056947}, "include_base_44_hindi_few_shot_og_driving_license": {"alias": " - include_base_44_hindi_few_shot_og_driving_license", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.04993832453115031}, "include_base_44_hindi_few_shot_og_general_knowledge": {"alias": " - include_base_44_hindi_few_shot_og_general_knowledge", "acc,none": 0.29577464788732394, "acc_stderr,none": 0.05454906121418899}, "include_base_44_hindi_few_shot_og_health_oriented_education": {"alias": " - include_base_44_hindi_few_shot_og_health_oriented_education", "acc,none": 0.2676056338028169, "acc_stderr,none": 0.05291406220869697}, "include_base_44_hindi_few_shot_og_professional_certification": {"alias": " - include_base_44_hindi_few_shot_og_professional_certification", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295697}, "include_base_44_hindi_few_shot_og_social_science": {"alias": " - include_base_44_hindi_few_shot_og_social_science", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295697}, "include_base_44_hindi_few_shot_og_stem": {"alias": " - include_base_44_hindi_few_shot_og_stem", "acc,none": 0.16, "acc_stderr,none": 0.0748331477354788}, "include_base_44_italian": {"acc,none": 0.2591240875912409, "acc_stderr,none": 0.01872192692113196, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.17142857142857143, "acc_stderr,none": 0.06463490595976273}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.2934131736526946, "acc_stderr,none": 0.03534016139050468}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.25, "acc_stderr,none": 0.1305582419667734}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.2967741935483871, "acc_stderr,none": 0.03681290636819265}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.20958083832335328, "acc_stderr,none": 0.03159006158827181}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.25, "acc_stderr,none": 0.1305582419667734}, "include_base_44_japanese": {"acc,none": 0.2634730538922156, "acc_stderr,none": 0.01972538101830729, "alias": "include_base_44_japanese"}, "include_base_44_japanese_few_shot_og_driving_license": {"alias": " - include_base_44_japanese_few_shot_og_driving_license", "acc,none": 0.24242424242424243, "acc_stderr,none": 0.04329004329004327}, "include_base_44_japanese_few_shot_og_medical_license": {"alias": " - include_base_44_japanese_few_shot_og_medical_license", "acc,none": 0.2835820895522388, "acc_stderr,none": 0.03187187537919797}, "include_base_44_japanese_few_shot_og_professional_certification": {"alias": " - include_base_44_japanese_few_shot_og_professional_certification", "acc,none": 0.2537313432835821, "acc_stderr,none": 0.030769444967296024}, "include_base_44_korean": {"acc,none": 0.234, "acc_stderr,none": 0.01896985560532149, "alias": "include_base_44_korean"}, "include_base_44_korean_few_shot_og_professional_certification": {"alias": " - include_base_44_korean_few_shot_og_professional_certification", "acc,none": 0.24, "acc_stderr,none": 0.027065293652239003}, "include_base_44_korean_few_shot_og_social_science": {"alias": " - include_base_44_korean_few_shot_og_social_science", "acc,none": 0.228, "acc_stderr,none": 0.026587432487268487}, "include_base_44_russian": {"acc,none": 0.266304347826087, "acc_stderr,none": 0.018662425786691965, "alias": "include_base_44_russian"}, "include_base_44_russian_few_shot_og_applied_science": {"alias": " - include_base_44_russian_few_shot_og_applied_science", "acc,none": 0.2028985507246377, "acc_stderr,none": 0.04876877147472662}, "include_base_44_russian_few_shot_og_arts_humanities": {"alias": " - include_base_44_russian_few_shot_og_arts_humanities", "acc,none": 0.2028985507246377, "acc_stderr,none": 0.0487687714747266}, "include_base_44_russian_few_shot_og_business_commerce": {"alias": " - include_base_44_russian_few_shot_og_business_commerce", "acc,none": 0.2608695652173913, "acc_stderr,none": 0.05324977701702559}, "include_base_44_russian_few_shot_og_driving_license": {"alias": " - include_base_44_russian_few_shot_og_driving_license", "acc,none": 0.21428571428571427, "acc_stderr,none": 0.07896725691322383}, "include_base_44_russian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_russian_few_shot_og_health_oriented_education", "acc,none": 0.38823529411764707, "acc_stderr,none": 0.05317409082203827}, "include_base_44_russian_few_shot_og_marine_license": {"alias": " - include_base_44_russian_few_shot_og_marine_license", "acc,none": 0.37681159420289856, "acc_stderr,none": 0.05876481248527006}, "include_base_44_russian_few_shot_og_social_science": {"alias": " - include_base_44_russian_few_shot_og_social_science", "acc,none": 0.2608695652173913, "acc_stderr,none": 0.05324977701702559}, "include_base_44_russian_few_shot_og_stem": {"alias": " - include_base_44_russian_few_shot_og_stem", "acc,none": 0.19148936170212766, "acc_stderr,none": 0.040801273706799875}, "include_base_44_spanish": {"acc,none": 0.2672727272727273, "acc_stderr,none": 0.018850411012442, "alias": "include_base_44_spanish"}, "include_base_44_spanish_few_shot_og_arts_humanities": {"alias": " - include_base_44_spanish_few_shot_og_arts_humanities", "acc,none": 0.228, "acc_stderr,none": 0.026587432487268466}, "include_base_44_spanish_few_shot_og_health_oriented_education": {"alias": " - include_base_44_spanish_few_shot_og_health_oriented_education", "acc,none": 0.32, "acc_stderr,none": 0.09521904571390466}, "include_base_44_spanish_few_shot_og_social_science": {"alias": " - include_base_44_spanish_few_shot_og_social_science", "acc,none": 0.288, "acc_stderr,none": 0.028697004587398222}, "include_base_44_spanish_few_shot_og_stem": {"alias": " - include_base_44_spanish_few_shot_og_stem", "acc,none": 0.4, "acc_stderr,none": 0.1}, "include_base_44_telugu": {"acc,none": 0.24817518248175183, "acc_stderr,none": 0.01850508189304122, "alias": "include_base_44_telugu"}, "include_base_44_telugu_few_shot_og_applied_science": {"alias": " - include_base_44_telugu_few_shot_og_applied_science", "acc,none": 0.25301204819277107, "acc_stderr,none": 0.033844291552331374}, "include_base_44_telugu_few_shot_og_arts_humanities": {"alias": " - include_base_44_telugu_few_shot_og_arts_humanities", "acc,none": 0.2356020942408377, "acc_stderr,none": 0.030787364755364144}, "include_base_44_telugu_few_shot_og_social_science": {"alias": " - include_base_44_telugu_few_shot_og_social_science", "acc,none": 0.2469879518072289, "acc_stderr,none": 0.03357351982064537}, "include_base_44_telugu_few_shot_og_stem": {"alias": " - include_base_44_telugu_few_shot_og_stem", "acc,none": 0.32, "acc_stderr,none": 0.09521904571390465}, "include_base_44_turkish": {"acc,none": 0.291970802919708, "acc_stderr,none": 0.019446580804457603, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.3192771084337349, "acc_stderr,none": 0.036293353299478595}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.26506024096385544, "acc_stderr,none": 0.03436024037944966}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.2710843373493976, "acc_stderr,none": 0.03460579907553026}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.36, "acc_stderr,none": 0.06857142857142856}, "piqa": {"alias": "piqa", "acc,none": 0.515233949945593, "acc_stderr,none": 0.011660408257153634, "acc_norm,none": 0.5065288356909684, "acc_norm_stderr,none": 0.01166482959521097}, "xnli_ar": {"alias": "xnli_ar", "acc,none": 0.3349397590361446, "acc_stderr,none": 0.009460223484996458}, "xnli_de": {"alias": "xnli_de", "acc,none": 0.3321285140562249, "acc_stderr,none": 0.009440328001240639}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.3285140562248996, "acc_stderr,none": 0.00941419073413175}, "xnli_es": {"alias": "xnli_es", "acc,none": 0.3325301204819277, "acc_stderr,none": 0.00944319336590334}, "xnli_fr": {"alias": "xnli_fr", "acc,none": 0.3377510040160643, "acc_stderr,none": 0.009479742273956473}, "xnli_hi": {"alias": "xnli_hi", "acc,none": 0.3349397590361446, "acc_stderr,none": 0.009460223484996465}, "xnli_ru": {"alias": "xnli_ru", "acc,none": 0.3313253012048193, "acc_stderr,none": 0.00943457405610196}, "xnli_sw": {"alias": "xnli_sw", "acc,none": 0.3429718875502008, "acc_stderr,none": 0.009514999934033463}, "xnli_th": {"alias": "xnli_th", "acc,none": 0.3305220883534137, "acc_stderr,none": 0.00942878910928983}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.3325301204819277, "acc_stderr,none": 0.00944319336590334}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.3285140562248996, "acc_stderr,none": 0.009414190734131755}} +{"created_at": "2025-06-27T01:40:18.300652", "global_step": 18500, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.20136518771331058, "acc_stderr,none": 0.011718927477444269, "acc_norm,none": 0.24488054607508533, "acc_norm_stderr,none": 0.012566273985131356}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.2680976430976431, "acc_stderr,none": 0.009089526578213703, "acc_norm,none": 0.2647306397306397, "acc_norm_stderr,none": 0.009053021086173963}, "belebele_arb_Arab": {"alias": "belebele_arb_Arab", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_ben_Beng": {"alias": "belebele_ben_Beng", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884507, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884507}, "belebele_deu_Latn": {"alias": "belebele_deu_Latn", "acc,none": 0.22555555555555556, "acc_stderr,none": 0.013939334910458148, "acc_norm,none": 0.22555555555555556, "acc_norm_stderr,none": 0.013939334910458148}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.2311111111111111, "acc_stderr,none": 0.0140592566632182, "acc_norm,none": 0.2311111111111111, "acc_norm_stderr,none": 0.0140592566632182}, "belebele_fra_Latn": {"alias": "belebele_fra_Latn", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_hin_Deva": {"alias": "belebele_hin_Deva", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884528, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884528}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884535, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884535}, "belebele_jpn_Jpan": {"alias": "belebele_jpn_Jpan", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_kor_Hang": {"alias": "belebele_kor_Hang", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.01401170515888452, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.01401170515888452}, "belebele_rus_Cyrl": {"alias": "belebele_rus_Cyrl", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884504, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884504}, "belebele_spa_Latn": {"alias": "belebele_spa_Latn", "acc,none": 0.2311111111111111, "acc_stderr,none": 0.014059256663218197, "acc_norm,none": 0.2311111111111111, "acc_norm_stderr,none": 0.014059256663218197}, "belebele_swh_Latn": {"alias": "belebele_swh_Latn", "acc,none": 0.23444444444444446, "acc_stderr,none": 0.014129554968110773, "acc_norm,none": 0.23444444444444446, "acc_norm_stderr,none": 0.014129554968110773}, "belebele_tel_Telu": {"alias": "belebele_tel_Telu", "acc,none": 0.23555555555555555, "acc_stderr,none": 0.01415271607913265, "acc_norm,none": 0.23555555555555555, "acc_norm_stderr,none": 0.01415271607913265}, "belebele_tha_Thai": {"alias": "belebele_tha_Thai", "acc,none": 0.23222222222222222, "acc_stderr,none": 0.014082825936214573, "acc_norm,none": 0.23222222222222222, "acc_norm_stderr,none": 0.014082825936214573}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.22777777777777777, "acc_stderr,none": 0.013987721523687971, "acc_norm,none": 0.22777777777777777, "acc_norm_stderr,none": 0.013987721523687971}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.23, "acc_stderr,none": 0.014035549969945592, "acc_norm,none": 0.23, "acc_norm_stderr,none": 0.014035549969945592}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.2588129854610635, "acc_stderr,none": 0.004370875625258998, "acc_norm,none": 0.2619000199163513, "acc_norm_stderr,none": 0.00438769952585488}, "include_base_44_arabic": {"acc,none": 0.22282608695652173, "acc_stderr,none": 0.01767514227930449, "alias": "include_base_44_arabic"}, "include_base_44_arabic_few_shot_og_arts_humanities": {"alias": " - include_base_44_arabic_few_shot_og_arts_humanities", "acc,none": 0.24761904761904763, "acc_stderr,none": 0.04232473532055043}, "include_base_44_arabic_few_shot_og_business_commerce": {"alias": " - include_base_44_arabic_few_shot_og_business_commerce", "acc,none": 0.15853658536585366, "acc_stderr,none": 0.040582599273365695}, "include_base_44_arabic_few_shot_og_driving_license": {"alias": " - include_base_44_arabic_few_shot_og_driving_license", "acc,none": 0.29523809523809524, "acc_stderr,none": 0.044729159560441434}, "include_base_44_arabic_few_shot_og_general_knowledge": {"alias": " - include_base_44_arabic_few_shot_og_general_knowledge", "acc,none": 0.24761904761904763, "acc_stderr,none": 0.0423247353205504}, "include_base_44_arabic_few_shot_og_social_science": {"alias": " - include_base_44_arabic_few_shot_og_social_science", "acc,none": 0.1619047619047619, "acc_stderr,none": 0.036121005232589175}, "include_base_44_arabic_few_shot_og_stem": {"alias": " - include_base_44_arabic_few_shot_og_stem", "acc,none": 0.2, "acc_stderr,none": 0.057142857142857134}, "include_base_44_bengali": {"acc,none": 0.2773722627737226, "acc_stderr,none": 0.019098982389354032, "alias": "include_base_44_bengali"}, "include_base_44_bengali_few_shot_og_arts_humanities": {"alias": " - include_base_44_bengali_few_shot_og_arts_humanities", "acc,none": 0.3072289156626506, "acc_stderr,none": 0.03591566797824664}, "include_base_44_bengali_few_shot_og_general_knowledge": {"alias": " - include_base_44_bengali_few_shot_og_general_knowledge", "acc,none": 0.3192771084337349, "acc_stderr,none": 0.03629335329947859}, "include_base_44_bengali_few_shot_og_professional_certification": {"alias": " - include_base_44_bengali_few_shot_og_professional_certification", "acc,none": 0.22289156626506024, "acc_stderr,none": 0.03240004825594687}, "include_base_44_bengali_few_shot_og_stem": {"alias": " - include_base_44_bengali_few_shot_og_stem", "acc,none": 0.22, "acc_stderr,none": 0.05917804336345138}, "include_base_44_chinese": {"acc,none": 0.24587155963302754, "acc_stderr,none": 0.018517539247711934, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.049938324531150324}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.25287356321839083, "acc_stderr,none": 0.04687049503854671}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.323943661971831, "acc_stderr,none": 0.05593416612923642}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.23943661971830985, "acc_stderr,none": 0.05100514327793383}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.2413793103448276, "acc_stderr,none": 0.04614377668264891}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.051995626882956975}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.19718309859154928, "acc_stderr,none": 0.04755476905953276}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.1875, "acc_stderr,none": 0.10077822185373188}, "include_base_44_french": {"acc,none": 0.25059665871121717, "acc_stderr,none": 0.021185910208552854, "alias": "include_base_44_french"}, "include_base_44_french_few_shot_og_arts_humanities": {"alias": " - include_base_44_french_few_shot_og_arts_humanities", "acc,none": 0.22932330827067668, "acc_stderr,none": 0.02582481829225878}, "include_base_44_french_few_shot_og_driving_license": {"alias": " - include_base_44_french_few_shot_og_driving_license", "acc,none": 0.3617021276595745, "acc_stderr,none": 0.07084485475872632}, "include_base_44_french_few_shot_og_health_oriented_education": {"alias": " - include_base_44_french_few_shot_og_health_oriented_education", "acc,none": 0.1875, "acc_stderr,none": 0.10077822185373188}, "include_base_44_french_few_shot_og_social_science": {"alias": " - include_base_44_french_few_shot_og_social_science", "acc,none": 0.25675675675675674, "acc_stderr,none": 0.051128728837659454}, "include_base_44_french_few_shot_og_stem": {"alias": " - include_base_44_french_few_shot_og_stem", "acc,none": 0.3125, "acc_stderr,none": 0.11967838846954226}, "include_base_44_german": {"acc,none": 0.28776978417266186, "acc_stderr,none": 0.038602689498982774, "alias": "include_base_44_german"}, "include_base_44_german_few_shot_og_driving_license": {"alias": " - include_base_44_german_few_shot_og_driving_license", "acc,none": 0.391304347826087, "acc_stderr,none": 0.10405096111532161}, "include_base_44_german_few_shot_og_social_science": {"alias": " - include_base_44_german_few_shot_og_social_science", "acc,none": 0.27472527472527475, "acc_stderr,none": 0.04705213398778437}, "include_base_44_german_few_shot_og_stem": {"alias": " - include_base_44_german_few_shot_og_stem", "acc,none": 0.24, "acc_stderr,none": 0.08717797887081347}, "include_base_44_hindi": {"acc,none": 0.23948811700182815, "acc_stderr,none": 0.01830198403573232, "alias": "include_base_44_hindi"}, "include_base_44_hindi_few_shot_og_applied_science": {"alias": " - include_base_44_hindi_few_shot_og_applied_science", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295698}, "include_base_44_hindi_few_shot_og_arts_humanities": {"alias": " - include_base_44_hindi_few_shot_og_arts_humanities", "acc,none": 0.17708333333333334, "acc_stderr,none": 0.03916561682056947}, "include_base_44_hindi_few_shot_og_driving_license": {"alias": " - include_base_44_hindi_few_shot_og_driving_license", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.04993832453115031}, "include_base_44_hindi_few_shot_og_general_knowledge": {"alias": " - include_base_44_hindi_few_shot_og_general_knowledge", "acc,none": 0.29577464788732394, "acc_stderr,none": 0.05454906121418899}, "include_base_44_hindi_few_shot_og_health_oriented_education": {"alias": " - include_base_44_hindi_few_shot_og_health_oriented_education", "acc,none": 0.2676056338028169, "acc_stderr,none": 0.05291406220869697}, "include_base_44_hindi_few_shot_og_professional_certification": {"alias": " - include_base_44_hindi_few_shot_og_professional_certification", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295697}, "include_base_44_hindi_few_shot_og_social_science": {"alias": " - include_base_44_hindi_few_shot_og_social_science", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295697}, "include_base_44_hindi_few_shot_og_stem": {"alias": " - include_base_44_hindi_few_shot_og_stem", "acc,none": 0.16, "acc_stderr,none": 0.0748331477354788}, "include_base_44_italian": {"acc,none": 0.2645985401459854, "acc_stderr,none": 0.018866191346031236, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.2, "acc_stderr,none": 0.06859943405700354}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.2994011976047904, "acc_stderr,none": 0.03554736535384932}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.25, "acc_stderr,none": 0.1305582419667734}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.2967741935483871, "acc_stderr,none": 0.03681290636819265}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.2155688622754491, "acc_stderr,none": 0.031916582528798025}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.25, "acc_stderr,none": 0.1305582419667734}, "include_base_44_japanese": {"acc,none": 0.2634730538922156, "acc_stderr,none": 0.01972538101830729, "alias": "include_base_44_japanese"}, "include_base_44_japanese_few_shot_og_driving_license": {"alias": " - include_base_44_japanese_few_shot_og_driving_license", "acc,none": 0.24242424242424243, "acc_stderr,none": 0.04329004329004327}, "include_base_44_japanese_few_shot_og_medical_license": {"alias": " - include_base_44_japanese_few_shot_og_medical_license", "acc,none": 0.2835820895522388, "acc_stderr,none": 0.03187187537919797}, "include_base_44_japanese_few_shot_og_professional_certification": {"alias": " - include_base_44_japanese_few_shot_og_professional_certification", "acc,none": 0.2537313432835821, "acc_stderr,none": 0.030769444967296024}, "include_base_44_korean": {"acc,none": 0.234, "acc_stderr,none": 0.01896985560532149, "alias": "include_base_44_korean"}, "include_base_44_korean_few_shot_og_professional_certification": {"alias": " - include_base_44_korean_few_shot_og_professional_certification", "acc,none": 0.24, "acc_stderr,none": 0.027065293652239003}, "include_base_44_korean_few_shot_og_social_science": {"alias": " - include_base_44_korean_few_shot_og_social_science", "acc,none": 0.228, "acc_stderr,none": 0.026587432487268487}, "include_base_44_russian": {"acc,none": 0.266304347826087, "acc_stderr,none": 0.018662425786691965, "alias": "include_base_44_russian"}, "include_base_44_russian_few_shot_og_applied_science": {"alias": " - include_base_44_russian_few_shot_og_applied_science", "acc,none": 0.2028985507246377, "acc_stderr,none": 0.04876877147472662}, "include_base_44_russian_few_shot_og_arts_humanities": {"alias": " - include_base_44_russian_few_shot_og_arts_humanities", "acc,none": 0.2028985507246377, "acc_stderr,none": 0.0487687714747266}, "include_base_44_russian_few_shot_og_business_commerce": {"alias": " - include_base_44_russian_few_shot_og_business_commerce", "acc,none": 0.2608695652173913, "acc_stderr,none": 0.05324977701702559}, "include_base_44_russian_few_shot_og_driving_license": {"alias": " - include_base_44_russian_few_shot_og_driving_license", "acc,none": 0.21428571428571427, "acc_stderr,none": 0.07896725691322383}, "include_base_44_russian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_russian_few_shot_og_health_oriented_education", "acc,none": 0.38823529411764707, "acc_stderr,none": 0.05317409082203827}, "include_base_44_russian_few_shot_og_marine_license": {"alias": " - include_base_44_russian_few_shot_og_marine_license", "acc,none": 0.37681159420289856, "acc_stderr,none": 0.05876481248527006}, "include_base_44_russian_few_shot_og_social_science": {"alias": " - include_base_44_russian_few_shot_og_social_science", "acc,none": 0.2608695652173913, "acc_stderr,none": 0.05324977701702559}, "include_base_44_russian_few_shot_og_stem": {"alias": " - include_base_44_russian_few_shot_og_stem", "acc,none": 0.19148936170212766, "acc_stderr,none": 0.040801273706799875}, "include_base_44_spanish": {"acc,none": 0.2672727272727273, "acc_stderr,none": 0.018850411012442, "alias": "include_base_44_spanish"}, "include_base_44_spanish_few_shot_og_arts_humanities": {"alias": " - include_base_44_spanish_few_shot_og_arts_humanities", "acc,none": 0.228, "acc_stderr,none": 0.026587432487268466}, "include_base_44_spanish_few_shot_og_health_oriented_education": {"alias": " - include_base_44_spanish_few_shot_og_health_oriented_education", "acc,none": 0.32, "acc_stderr,none": 0.09521904571390466}, "include_base_44_spanish_few_shot_og_social_science": {"alias": " - include_base_44_spanish_few_shot_og_social_science", "acc,none": 0.288, "acc_stderr,none": 0.028697004587398222}, "include_base_44_spanish_few_shot_og_stem": {"alias": " - include_base_44_spanish_few_shot_og_stem", "acc,none": 0.4, "acc_stderr,none": 0.1}, "include_base_44_telugu": {"acc,none": 0.22262773722627738, "acc_stderr,none": 0.017825637245714242, "alias": "include_base_44_telugu"}, "include_base_44_telugu_few_shot_og_applied_science": {"alias": " - include_base_44_telugu_few_shot_og_applied_science", "acc,none": 0.21084337349397592, "acc_stderr,none": 0.03175554786629921}, "include_base_44_telugu_few_shot_og_arts_humanities": {"alias": " - include_base_44_telugu_few_shot_og_arts_humanities", "acc,none": 0.2198952879581152, "acc_stderr,none": 0.030047449343709728}, "include_base_44_telugu_few_shot_og_social_science": {"alias": " - include_base_44_telugu_few_shot_og_social_science", "acc,none": 0.2289156626506024, "acc_stderr,none": 0.03270745277352477}, "include_base_44_telugu_few_shot_og_stem": {"alias": " - include_base_44_telugu_few_shot_og_stem", "acc,none": 0.28, "acc_stderr,none": 0.0916515138991168}, "include_base_44_turkish": {"acc,none": 0.28832116788321166, "acc_stderr,none": 0.019373705269132024, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.3132530120481928, "acc_stderr,none": 0.03610805018031022}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.2710843373493976, "acc_stderr,none": 0.03460579907553026}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.25903614457831325, "acc_stderr,none": 0.034106466140718564}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.36, "acc_stderr,none": 0.06857142857142856}, "piqa": {"alias": "piqa", "acc,none": 0.5233949945593036, "acc_stderr,none": 0.011653047155927788, "acc_norm,none": 0.5097932535364527, "acc_norm_stderr,none": 0.011663586263283223}, "xnli_ar": {"alias": "xnli_ar", "acc,none": 0.3301204819277108, "acc_stderr,none": 0.009425884992430708}, "xnli_de": {"alias": "xnli_de", "acc,none": 0.334136546184739, "acc_stderr,none": 0.009454577602463621}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.3232931726907631, "acc_stderr,none": 0.009375319942199674}, "xnli_es": {"alias": "xnli_es", "acc,none": 0.3337349397590361, "acc_stderr,none": 0.009451743112667053}, "xnli_fr": {"alias": "xnli_fr", "acc,none": 0.3329317269076305, "acc_stderr,none": 0.009446051001358226}, "xnli_hi": {"alias": "xnli_hi", "acc,none": 0.3353413654618474, "acc_stderr,none": 0.009463034891512708}, "xnli_ru": {"alias": "xnli_ru", "acc,none": 0.3337349397590361, "acc_stderr,none": 0.009451743112667055}, "xnli_sw": {"alias": "xnli_sw", "acc,none": 0.3329317269076305, "acc_stderr,none": 0.009446051001358228}, "xnli_th": {"alias": "xnli_th", "acc,none": 0.3305220883534137, "acc_stderr,none": 0.009428789109289827}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.3337349397590361, "acc_stderr,none": 0.009451743112667057}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.3337349397590361, "acc_stderr,none": 0.009451743112667058}} +{"created_at": "2025-06-27T02:09:55.183169", "global_step": 19000, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.19880546075085323, "acc_stderr,none": 0.011662850198175537, "acc_norm,none": 0.24573378839590443, "acc_norm_stderr,none": 0.012581033453730113}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.2668350168350168, "acc_stderr,none": 0.009075915859267255, "acc_norm,none": 0.26515151515151514, "acc_norm_stderr,none": 0.009057621139172616}, "belebele_arb_Arab": {"alias": "belebele_arb_Arab", "acc,none": 0.23, "acc_stderr,none": 0.014035549969945592, "acc_norm,none": 0.23, "acc_norm_stderr,none": 0.014035549969945592}, "belebele_ben_Beng": {"alias": "belebele_ben_Beng", "acc,none": 0.23555555555555555, "acc_stderr,none": 0.014152716079132628, "acc_norm,none": 0.23555555555555555, "acc_norm_stderr,none": 0.014152716079132628}, "belebele_deu_Latn": {"alias": "belebele_deu_Latn", "acc,none": 0.22555555555555556, "acc_stderr,none": 0.013939334910458148, "acc_norm,none": 0.22555555555555556, "acc_norm_stderr,none": 0.013939334910458148}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.2311111111111111, "acc_stderr,none": 0.0140592566632182, "acc_norm,none": 0.2311111111111111, "acc_norm_stderr,none": 0.0140592566632182}, "belebele_fra_Latn": {"alias": "belebele_fra_Latn", "acc,none": 0.22666666666666666, "acc_stderr,none": 0.013963598349030494, "acc_norm,none": 0.22666666666666666, "acc_norm_stderr,none": 0.013963598349030494}, "belebele_hin_Deva": {"alias": "belebele_hin_Deva", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884528, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884528}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.2311111111111111, "acc_stderr,none": 0.014059256663218198, "acc_norm,none": 0.2311111111111111, "acc_norm_stderr,none": 0.014059256663218198}, "belebele_jpn_Jpan": {"alias": "belebele_jpn_Jpan", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_kor_Hang": {"alias": "belebele_kor_Hang", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.01401170515888452, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.01401170515888452}, "belebele_rus_Cyrl": {"alias": "belebele_rus_Cyrl", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884504, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884504}, "belebele_spa_Latn": {"alias": "belebele_spa_Latn", "acc,none": 0.22666666666666666, "acc_stderr,none": 0.013963598349030491, "acc_norm,none": 0.22666666666666666, "acc_norm_stderr,none": 0.013963598349030491}, "belebele_swh_Latn": {"alias": "belebele_swh_Latn", "acc,none": 0.23555555555555555, "acc_stderr,none": 0.014152716079132655, "acc_norm,none": 0.23555555555555555, "acc_norm_stderr,none": 0.014152716079132655}, "belebele_tel_Telu": {"alias": "belebele_tel_Telu", "acc,none": 0.23222222222222222, "acc_stderr,none": 0.014082825936214573, "acc_norm,none": 0.23222222222222222, "acc_norm_stderr,none": 0.014082825936214573}, "belebele_tha_Thai": {"alias": "belebele_tha_Thai", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884533, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884533}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.23, "acc_stderr,none": 0.014035549969945592, "acc_norm,none": 0.23, "acc_norm_stderr,none": 0.014035549969945592}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.23, "acc_stderr,none": 0.014035549969945592, "acc_norm,none": 0.23, "acc_norm_stderr,none": 0.014035549969945592}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.2592113124875523, "acc_stderr,none": 0.004373062283376512, "acc_norm,none": 0.25951005775741887, "acc_norm_stderr,none": 0.004374699189284861}, "include_base_44_arabic": {"acc,none": 0.22644927536231885, "acc_stderr,none": 0.01779214728409493, "alias": "include_base_44_arabic"}, "include_base_44_arabic_few_shot_og_arts_humanities": {"alias": " - include_base_44_arabic_few_shot_og_arts_humanities", "acc,none": 0.24761904761904763, "acc_stderr,none": 0.04232473532055043}, "include_base_44_arabic_few_shot_og_business_commerce": {"alias": " - include_base_44_arabic_few_shot_og_business_commerce", "acc,none": 0.17073170731707318, "acc_stderr,none": 0.04180826184428597}, "include_base_44_arabic_few_shot_og_driving_license": {"alias": " - include_base_44_arabic_few_shot_og_driving_license", "acc,none": 0.29523809523809524, "acc_stderr,none": 0.044729159560441434}, "include_base_44_arabic_few_shot_og_general_knowledge": {"alias": " - include_base_44_arabic_few_shot_og_general_knowledge", "acc,none": 0.24761904761904763, "acc_stderr,none": 0.0423247353205504}, "include_base_44_arabic_few_shot_og_social_science": {"alias": " - include_base_44_arabic_few_shot_og_social_science", "acc,none": 0.1619047619047619, "acc_stderr,none": 0.036121005232589175}, "include_base_44_arabic_few_shot_og_stem": {"alias": " - include_base_44_arabic_few_shot_og_stem", "acc,none": 0.22, "acc_stderr,none": 0.05917804336345138}, "include_base_44_bengali": {"acc,none": 0.26277372262773724, "acc_stderr,none": 0.018867262488688943, "alias": "include_base_44_bengali"}, "include_base_44_bengali_few_shot_og_arts_humanities": {"alias": " - include_base_44_bengali_few_shot_og_arts_humanities", "acc,none": 0.2710843373493976, "acc_stderr,none": 0.034605799075530276}, "include_base_44_bengali_few_shot_og_general_knowledge": {"alias": " - include_base_44_bengali_few_shot_og_general_knowledge", "acc,none": 0.26506024096385544, "acc_stderr,none": 0.03436024037944967}, "include_base_44_bengali_few_shot_og_professional_certification": {"alias": " - include_base_44_bengali_few_shot_og_professional_certification", "acc,none": 0.25903614457831325, "acc_stderr,none": 0.03410646614071856}, "include_base_44_bengali_few_shot_og_stem": {"alias": " - include_base_44_bengali_few_shot_og_stem", "acc,none": 0.24, "acc_stderr,none": 0.06101187572589319}, "include_base_44_chinese": {"acc,none": 0.24587155963302754, "acc_stderr,none": 0.018517539247711934, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.049938324531150324}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.25287356321839083, "acc_stderr,none": 0.04687049503854671}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.323943661971831, "acc_stderr,none": 0.05593416612923642}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.23943661971830985, "acc_stderr,none": 0.05100514327793383}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.2413793103448276, "acc_stderr,none": 0.04614377668264891}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.051995626882956975}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.19718309859154928, "acc_stderr,none": 0.04755476905953276}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.1875, "acc_stderr,none": 0.10077822185373188}, "include_base_44_french": {"acc,none": 0.25059665871121717, "acc_stderr,none": 0.021185910208552854, "alias": "include_base_44_french"}, "include_base_44_french_few_shot_og_arts_humanities": {"alias": " - include_base_44_french_few_shot_og_arts_humanities", "acc,none": 0.22932330827067668, "acc_stderr,none": 0.02582481829225878}, "include_base_44_french_few_shot_og_driving_license": {"alias": " - include_base_44_french_few_shot_og_driving_license", "acc,none": 0.3617021276595745, "acc_stderr,none": 0.07084485475872632}, "include_base_44_french_few_shot_og_health_oriented_education": {"alias": " - include_base_44_french_few_shot_og_health_oriented_education", "acc,none": 0.1875, "acc_stderr,none": 0.10077822185373188}, "include_base_44_french_few_shot_og_social_science": {"alias": " - include_base_44_french_few_shot_og_social_science", "acc,none": 0.25675675675675674, "acc_stderr,none": 0.051128728837659454}, "include_base_44_french_few_shot_og_stem": {"alias": " - include_base_44_french_few_shot_og_stem", "acc,none": 0.3125, "acc_stderr,none": 0.11967838846954226}, "include_base_44_german": {"acc,none": 0.2733812949640288, "acc_stderr,none": 0.037717834366849924, "alias": "include_base_44_german"}, "include_base_44_german_few_shot_og_driving_license": {"alias": " - include_base_44_german_few_shot_og_driving_license", "acc,none": 0.43478260869565216, "acc_stderr,none": 0.10568965974008647}, "include_base_44_german_few_shot_og_social_science": {"alias": " - include_base_44_german_few_shot_og_social_science", "acc,none": 0.24175824175824176, "acc_stderr,none": 0.04513082148355002}, "include_base_44_german_few_shot_og_stem": {"alias": " - include_base_44_german_few_shot_og_stem", "acc,none": 0.24, "acc_stderr,none": 0.08717797887081347}, "include_base_44_hindi": {"acc,none": 0.23948811700182815, "acc_stderr,none": 0.01830198403573232, "alias": "include_base_44_hindi"}, "include_base_44_hindi_few_shot_og_applied_science": {"alias": " - include_base_44_hindi_few_shot_og_applied_science", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295698}, "include_base_44_hindi_few_shot_og_arts_humanities": {"alias": " - include_base_44_hindi_few_shot_og_arts_humanities", "acc,none": 0.17708333333333334, "acc_stderr,none": 0.03916561682056947}, "include_base_44_hindi_few_shot_og_driving_license": {"alias": " - include_base_44_hindi_few_shot_og_driving_license", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.04993832453115031}, "include_base_44_hindi_few_shot_og_general_knowledge": {"alias": " - include_base_44_hindi_few_shot_og_general_knowledge", "acc,none": 0.29577464788732394, "acc_stderr,none": 0.05454906121418899}, "include_base_44_hindi_few_shot_og_health_oriented_education": {"alias": " - include_base_44_hindi_few_shot_og_health_oriented_education", "acc,none": 0.2676056338028169, "acc_stderr,none": 0.05291406220869697}, "include_base_44_hindi_few_shot_og_professional_certification": {"alias": " - include_base_44_hindi_few_shot_og_professional_certification", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295697}, "include_base_44_hindi_few_shot_og_social_science": {"alias": " - include_base_44_hindi_few_shot_og_social_science", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295697}, "include_base_44_hindi_few_shot_og_stem": {"alias": " - include_base_44_hindi_few_shot_og_stem", "acc,none": 0.16, "acc_stderr,none": 0.0748331477354788}, "include_base_44_italian": {"acc,none": 0.26277372262773724, "acc_stderr,none": 0.01881002094090936, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.17142857142857143, "acc_stderr,none": 0.06463490595976273}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.2994011976047904, "acc_stderr,none": 0.03554736535384932}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.25, "acc_stderr,none": 0.1305582419667734}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.2967741935483871, "acc_stderr,none": 0.03681290636819265}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.2155688622754491, "acc_stderr,none": 0.031916582528798025}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.25, "acc_stderr,none": 0.1305582419667734}, "include_base_44_japanese": {"acc,none": 0.2634730538922156, "acc_stderr,none": 0.01972538101830729, "alias": "include_base_44_japanese"}, "include_base_44_japanese_few_shot_og_driving_license": {"alias": " - include_base_44_japanese_few_shot_og_driving_license", "acc,none": 0.24242424242424243, "acc_stderr,none": 0.04329004329004327}, "include_base_44_japanese_few_shot_og_medical_license": {"alias": " - include_base_44_japanese_few_shot_og_medical_license", "acc,none": 0.2835820895522388, "acc_stderr,none": 0.03187187537919797}, "include_base_44_japanese_few_shot_og_professional_certification": {"alias": " - include_base_44_japanese_few_shot_og_professional_certification", "acc,none": 0.2537313432835821, "acc_stderr,none": 0.030769444967296024}, "include_base_44_korean": {"acc,none": 0.234, "acc_stderr,none": 0.01896985560532149, "alias": "include_base_44_korean"}, "include_base_44_korean_few_shot_og_professional_certification": {"alias": " - include_base_44_korean_few_shot_og_professional_certification", "acc,none": 0.24, "acc_stderr,none": 0.027065293652239003}, "include_base_44_korean_few_shot_og_social_science": {"alias": " - include_base_44_korean_few_shot_og_social_science", "acc,none": 0.228, "acc_stderr,none": 0.026587432487268487}, "include_base_44_russian": {"acc,none": 0.266304347826087, "acc_stderr,none": 0.018662425786691965, "alias": "include_base_44_russian"}, "include_base_44_russian_few_shot_og_applied_science": {"alias": " - include_base_44_russian_few_shot_og_applied_science", "acc,none": 0.2028985507246377, "acc_stderr,none": 0.04876877147472662}, "include_base_44_russian_few_shot_og_arts_humanities": {"alias": " - include_base_44_russian_few_shot_og_arts_humanities", "acc,none": 0.2028985507246377, "acc_stderr,none": 0.0487687714747266}, "include_base_44_russian_few_shot_og_business_commerce": {"alias": " - include_base_44_russian_few_shot_og_business_commerce", "acc,none": 0.2608695652173913, "acc_stderr,none": 0.05324977701702559}, "include_base_44_russian_few_shot_og_driving_license": {"alias": " - include_base_44_russian_few_shot_og_driving_license", "acc,none": 0.21428571428571427, "acc_stderr,none": 0.07896725691322383}, "include_base_44_russian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_russian_few_shot_og_health_oriented_education", "acc,none": 0.38823529411764707, "acc_stderr,none": 0.05317409082203827}, "include_base_44_russian_few_shot_og_marine_license": {"alias": " - include_base_44_russian_few_shot_og_marine_license", "acc,none": 0.37681159420289856, "acc_stderr,none": 0.05876481248527006}, "include_base_44_russian_few_shot_og_social_science": {"alias": " - include_base_44_russian_few_shot_og_social_science", "acc,none": 0.2608695652173913, "acc_stderr,none": 0.05324977701702559}, "include_base_44_russian_few_shot_og_stem": {"alias": " - include_base_44_russian_few_shot_og_stem", "acc,none": 0.19148936170212766, "acc_stderr,none": 0.040801273706799875}, "include_base_44_spanish": {"acc,none": 0.2672727272727273, "acc_stderr,none": 0.018850411012442, "alias": "include_base_44_spanish"}, "include_base_44_spanish_few_shot_og_arts_humanities": {"alias": " - include_base_44_spanish_few_shot_og_arts_humanities", "acc,none": 0.228, "acc_stderr,none": 0.026587432487268466}, "include_base_44_spanish_few_shot_og_health_oriented_education": {"alias": " - include_base_44_spanish_few_shot_og_health_oriented_education", "acc,none": 0.32, "acc_stderr,none": 0.09521904571390466}, "include_base_44_spanish_few_shot_og_social_science": {"alias": " - include_base_44_spanish_few_shot_og_social_science", "acc,none": 0.288, "acc_stderr,none": 0.028697004587398222}, "include_base_44_spanish_few_shot_og_stem": {"alias": " - include_base_44_spanish_few_shot_og_stem", "acc,none": 0.4, "acc_stderr,none": 0.1}, "include_base_44_telugu": {"acc,none": 0.21715328467153286, "acc_stderr,none": 0.017611410845039595, "alias": "include_base_44_telugu"}, "include_base_44_telugu_few_shot_og_applied_science": {"alias": " - include_base_44_telugu_few_shot_og_applied_science", "acc,none": 0.1686746987951807, "acc_stderr,none": 0.029152009627856544}, "include_base_44_telugu_few_shot_og_arts_humanities": {"alias": " - include_base_44_telugu_few_shot_og_arts_humanities", "acc,none": 0.2198952879581152, "acc_stderr,none": 0.030047449343709728}, "include_base_44_telugu_few_shot_og_social_science": {"alias": " - include_base_44_telugu_few_shot_og_social_science", "acc,none": 0.25903614457831325, "acc_stderr,none": 0.03410646614071856}, "include_base_44_telugu_few_shot_og_stem": {"alias": " - include_base_44_telugu_few_shot_og_stem", "acc,none": 0.24, "acc_stderr,none": 0.08717797887081347}, "include_base_44_turkish": {"acc,none": 0.29014598540145986, "acc_stderr,none": 0.019405491825144054, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.3192771084337349, "acc_stderr,none": 0.036293353299478595}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.2710843373493976, "acc_stderr,none": 0.034605799075530255}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.25903614457831325, "acc_stderr,none": 0.034106466140718564}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.36, "acc_stderr,none": 0.06857142857142856}, "piqa": {"alias": "piqa", "acc,none": 0.5168661588683352, "acc_stderr,none": 0.011659185184878913, "acc_norm,none": 0.501088139281828, "acc_norm_stderr,none": 0.011665796539540878}, "xnli_ar": {"alias": "xnli_ar", "acc,none": 0.3329317269076305, "acc_stderr,none": 0.009446051001358232}, "xnli_de": {"alias": "xnli_de", "acc,none": 0.3353413654618474, "acc_stderr,none": 0.009463034891512706}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.3220883534136546, "acc_stderr,none": 0.009366160807072053}, "xnli_es": {"alias": "xnli_es", "acc,none": 0.3321285140562249, "acc_stderr,none": 0.009440328001240637}, "xnli_fr": {"alias": "xnli_fr", "acc,none": 0.3353413654618474, "acc_stderr,none": 0.009463034891512708}, "xnli_hi": {"alias": "xnli_hi", "acc,none": 0.3377510040160643, "acc_stderr,none": 0.009479742273956478}, "xnli_ru": {"alias": "xnli_ru", "acc,none": 0.3309236947791165, "acc_stderr,none": 0.009431685461463302}, "xnli_sw": {"alias": "xnli_sw", "acc,none": 0.3353413654618474, "acc_stderr,none": 0.009463034891512706}, "xnli_th": {"alias": "xnli_th", "acc,none": 0.3281124497991968, "acc_stderr,none": 0.009411247685593387}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.3333333333333333, "acc_stderr,none": 0.009448900914617609}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.3333333333333333, "acc_stderr,none": 0.009448900914617621}} +{"created_at": "2025-06-27T02:33:24.787933", "global_step": 19500, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.1945392491467577, "acc_stderr,none": 0.011567709174648727, "acc_norm,none": 0.24573378839590443, "acc_norm_stderr,none": 0.012581033453730113}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.265993265993266, "acc_stderr,none": 0.009066789565615692, "acc_norm,none": 0.26430976430976433, "acc_norm_stderr,none": 0.009048410451863014}, "belebele_arb_Arab": {"alias": "belebele_arb_Arab", "acc,none": 0.23, "acc_stderr,none": 0.014035549969945592, "acc_norm,none": 0.23, "acc_norm_stderr,none": 0.014035549969945592}, "belebele_ben_Beng": {"alias": "belebele_ben_Beng", "acc,none": 0.22666666666666666, "acc_stderr,none": 0.013963598349030468, "acc_norm,none": 0.22666666666666666, "acc_norm_stderr,none": 0.013963598349030468}, "belebele_deu_Latn": {"alias": "belebele_deu_Latn", "acc,none": 0.22666666666666666, "acc_stderr,none": 0.013963598349030468, "acc_norm,none": 0.22666666666666666, "acc_norm_stderr,none": 0.013963598349030468}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.2311111111111111, "acc_stderr,none": 0.0140592566632182, "acc_norm,none": 0.2311111111111111, "acc_norm_stderr,none": 0.0140592566632182}, "belebele_fra_Latn": {"alias": "belebele_fra_Latn", "acc,none": 0.22666666666666666, "acc_stderr,none": 0.013963598349030494, "acc_norm,none": 0.22666666666666666, "acc_norm_stderr,none": 0.013963598349030494}, "belebele_hin_Deva": {"alias": "belebele_hin_Deva", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884528, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884528}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.23, "acc_stderr,none": 0.014035549969945585, "acc_norm,none": 0.23, "acc_norm_stderr,none": 0.014035549969945585}, "belebele_jpn_Jpan": {"alias": "belebele_jpn_Jpan", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_kor_Hang": {"alias": "belebele_kor_Hang", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.01401170515888452, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.01401170515888452}, "belebele_rus_Cyrl": {"alias": "belebele_rus_Cyrl", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884504, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884504}, "belebele_spa_Latn": {"alias": "belebele_spa_Latn", "acc,none": 0.2311111111111111, "acc_stderr,none": 0.014059256663218197, "acc_norm,none": 0.2311111111111111, "acc_norm_stderr,none": 0.014059256663218197}, "belebele_swh_Latn": {"alias": "belebele_swh_Latn", "acc,none": 0.23555555555555555, "acc_stderr,none": 0.014152716079132656, "acc_norm,none": 0.23555555555555555, "acc_norm_stderr,none": 0.014152716079132656}, "belebele_tel_Telu": {"alias": "belebele_tel_Telu", "acc,none": 0.23333333333333334, "acc_stderr,none": 0.014106258477755675, "acc_norm,none": 0.23333333333333334, "acc_norm_stderr,none": 0.014106258477755675}, "belebele_tha_Thai": {"alias": "belebele_tha_Thai", "acc,none": 0.23333333333333334, "acc_stderr,none": 0.014106258477755675, "acc_norm,none": 0.23333333333333334, "acc_norm_stderr,none": 0.014106258477755675}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.22555555555555556, "acc_stderr,none": 0.013939334910458118, "acc_norm,none": 0.22555555555555556, "acc_norm_stderr,none": 0.013939334910458118}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.23, "acc_stderr,none": 0.014035549969945592, "acc_norm,none": 0.23, "acc_norm_stderr,none": 0.014035549969945592}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.25871340370444135, "acc_stderr,none": 0.004370328224831787, "acc_norm,none": 0.26309500099581756, "acc_norm_stderr,none": 0.004394136724173005}, "include_base_44_arabic": {"acc,none": 0.22282608695652173, "acc_stderr,none": 0.01767514227930449, "alias": "include_base_44_arabic"}, "include_base_44_arabic_few_shot_og_arts_humanities": {"alias": " - include_base_44_arabic_few_shot_og_arts_humanities", "acc,none": 0.24761904761904763, "acc_stderr,none": 0.04232473532055043}, "include_base_44_arabic_few_shot_og_business_commerce": {"alias": " - include_base_44_arabic_few_shot_og_business_commerce", "acc,none": 0.15853658536585366, "acc_stderr,none": 0.040582599273365695}, "include_base_44_arabic_few_shot_og_driving_license": {"alias": " - include_base_44_arabic_few_shot_og_driving_license", "acc,none": 0.29523809523809524, "acc_stderr,none": 0.044729159560441434}, "include_base_44_arabic_few_shot_og_general_knowledge": {"alias": " - include_base_44_arabic_few_shot_og_general_knowledge", "acc,none": 0.24761904761904763, "acc_stderr,none": 0.0423247353205504}, "include_base_44_arabic_few_shot_og_social_science": {"alias": " - include_base_44_arabic_few_shot_og_social_science", "acc,none": 0.1619047619047619, "acc_stderr,none": 0.036121005232589175}, "include_base_44_arabic_few_shot_og_stem": {"alias": " - include_base_44_arabic_few_shot_og_stem", "acc,none": 0.2, "acc_stderr,none": 0.057142857142857134}, "include_base_44_bengali": {"acc,none": 0.2791970802919708, "acc_stderr,none": 0.019172796768839057, "alias": "include_base_44_bengali"}, "include_base_44_bengali_few_shot_og_arts_humanities": {"alias": " - include_base_44_bengali_few_shot_og_arts_humanities", "acc,none": 0.3132530120481928, "acc_stderr,none": 0.036108050180310235}, "include_base_44_bengali_few_shot_og_general_knowledge": {"alias": " - include_base_44_bengali_few_shot_og_general_knowledge", "acc,none": 0.28313253012048195, "acc_stderr,none": 0.03507295431370519}, "include_base_44_bengali_few_shot_og_professional_certification": {"alias": " - include_base_44_bengali_few_shot_og_professional_certification", "acc,none": 0.2710843373493976, "acc_stderr,none": 0.03460579907553027}, "include_base_44_bengali_few_shot_og_stem": {"alias": " - include_base_44_bengali_few_shot_og_stem", "acc,none": 0.18, "acc_stderr,none": 0.0548839220351387}, "include_base_44_chinese": {"acc,none": 0.24587155963302754, "acc_stderr,none": 0.018517539247711934, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.049938324531150324}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.25287356321839083, "acc_stderr,none": 0.04687049503854671}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.323943661971831, "acc_stderr,none": 0.05593416612923642}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.23943661971830985, "acc_stderr,none": 0.05100514327793383}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.2413793103448276, "acc_stderr,none": 0.04614377668264891}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.051995626882956975}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.19718309859154928, "acc_stderr,none": 0.04755476905953276}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.1875, "acc_stderr,none": 0.10077822185373188}, "include_base_44_french": {"acc,none": 0.2529832935560859, "acc_stderr,none": 0.02122061874473364, "alias": "include_base_44_french"}, "include_base_44_french_few_shot_og_arts_humanities": {"alias": " - include_base_44_french_few_shot_og_arts_humanities", "acc,none": 0.22932330827067668, "acc_stderr,none": 0.02582481829225878}, "include_base_44_french_few_shot_og_driving_license": {"alias": " - include_base_44_french_few_shot_og_driving_license", "acc,none": 0.3829787234042553, "acc_stderr,none": 0.07167347772513391}, "include_base_44_french_few_shot_og_health_oriented_education": {"alias": " - include_base_44_french_few_shot_og_health_oriented_education", "acc,none": 0.1875, "acc_stderr,none": 0.10077822185373188}, "include_base_44_french_few_shot_og_social_science": {"alias": " - include_base_44_french_few_shot_og_social_science", "acc,none": 0.25675675675675674, "acc_stderr,none": 0.051128728837659454}, "include_base_44_french_few_shot_og_stem": {"alias": " - include_base_44_french_few_shot_og_stem", "acc,none": 0.3125, "acc_stderr,none": 0.11967838846954226}, "include_base_44_german": {"acc,none": 0.2805755395683453, "acc_stderr,none": 0.038070659451604184, "alias": "include_base_44_german"}, "include_base_44_german_few_shot_og_driving_license": {"alias": " - include_base_44_german_few_shot_og_driving_license", "acc,none": 0.43478260869565216, "acc_stderr,none": 0.10568965974008647}, "include_base_44_german_few_shot_og_social_science": {"alias": " - include_base_44_german_few_shot_og_social_science", "acc,none": 0.25274725274725274, "acc_stderr,none": 0.045809518537328904}, "include_base_44_german_few_shot_og_stem": {"alias": " - include_base_44_german_few_shot_og_stem", "acc,none": 0.24, "acc_stderr,none": 0.08717797887081347}, "include_base_44_hindi": {"acc,none": 0.23948811700182815, "acc_stderr,none": 0.01830198403573232, "alias": "include_base_44_hindi"}, "include_base_44_hindi_few_shot_og_applied_science": {"alias": " - include_base_44_hindi_few_shot_og_applied_science", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295698}, "include_base_44_hindi_few_shot_og_arts_humanities": {"alias": " - include_base_44_hindi_few_shot_og_arts_humanities", "acc,none": 0.17708333333333334, "acc_stderr,none": 0.03916561682056947}, "include_base_44_hindi_few_shot_og_driving_license": {"alias": " - include_base_44_hindi_few_shot_og_driving_license", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.04993832453115031}, "include_base_44_hindi_few_shot_og_general_knowledge": {"alias": " - include_base_44_hindi_few_shot_og_general_knowledge", "acc,none": 0.29577464788732394, "acc_stderr,none": 0.05454906121418899}, "include_base_44_hindi_few_shot_og_health_oriented_education": {"alias": " - include_base_44_hindi_few_shot_og_health_oriented_education", "acc,none": 0.2676056338028169, "acc_stderr,none": 0.05291406220869697}, "include_base_44_hindi_few_shot_og_professional_certification": {"alias": " - include_base_44_hindi_few_shot_og_professional_certification", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295697}, "include_base_44_hindi_few_shot_og_social_science": {"alias": " - include_base_44_hindi_few_shot_og_social_science", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295697}, "include_base_44_hindi_few_shot_og_stem": {"alias": " - include_base_44_hindi_few_shot_og_stem", "acc,none": 0.16, "acc_stderr,none": 0.0748331477354788}, "include_base_44_italian": {"acc,none": 0.2664233576642336, "acc_stderr,none": 0.01890943577662801, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.17142857142857143, "acc_stderr,none": 0.06463490595976273}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.2994011976047904, "acc_stderr,none": 0.03554736535384932}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.25, "acc_stderr,none": 0.1305582419667734}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.2967741935483871, "acc_stderr,none": 0.03681290636819265}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.2275449101796407, "acc_stderr,none": 0.032539894331085194}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.25, "acc_stderr,none": 0.1305582419667734}, "include_base_44_japanese": {"acc,none": 0.2634730538922156, "acc_stderr,none": 0.01972538101830729, "alias": "include_base_44_japanese"}, "include_base_44_japanese_few_shot_og_driving_license": {"alias": " - include_base_44_japanese_few_shot_og_driving_license", "acc,none": 0.24242424242424243, "acc_stderr,none": 0.04329004329004327}, "include_base_44_japanese_few_shot_og_medical_license": {"alias": " - include_base_44_japanese_few_shot_og_medical_license", "acc,none": 0.2835820895522388, "acc_stderr,none": 0.03187187537919797}, "include_base_44_japanese_few_shot_og_professional_certification": {"alias": " - include_base_44_japanese_few_shot_og_professional_certification", "acc,none": 0.2537313432835821, "acc_stderr,none": 0.030769444967296024}, "include_base_44_korean": {"acc,none": 0.234, "acc_stderr,none": 0.01896985560532149, "alias": "include_base_44_korean"}, "include_base_44_korean_few_shot_og_professional_certification": {"alias": " - include_base_44_korean_few_shot_og_professional_certification", "acc,none": 0.24, "acc_stderr,none": 0.027065293652239003}, "include_base_44_korean_few_shot_og_social_science": {"alias": " - include_base_44_korean_few_shot_og_social_science", "acc,none": 0.228, "acc_stderr,none": 0.026587432487268487}, "include_base_44_russian": {"acc,none": 0.266304347826087, "acc_stderr,none": 0.018662425786691965, "alias": "include_base_44_russian"}, "include_base_44_russian_few_shot_og_applied_science": {"alias": " - include_base_44_russian_few_shot_og_applied_science", "acc,none": 0.2028985507246377, "acc_stderr,none": 0.04876877147472662}, "include_base_44_russian_few_shot_og_arts_humanities": {"alias": " - include_base_44_russian_few_shot_og_arts_humanities", "acc,none": 0.2028985507246377, "acc_stderr,none": 0.0487687714747266}, "include_base_44_russian_few_shot_og_business_commerce": {"alias": " - include_base_44_russian_few_shot_og_business_commerce", "acc,none": 0.2608695652173913, "acc_stderr,none": 0.05324977701702559}, "include_base_44_russian_few_shot_og_driving_license": {"alias": " - include_base_44_russian_few_shot_og_driving_license", "acc,none": 0.21428571428571427, "acc_stderr,none": 0.07896725691322383}, "include_base_44_russian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_russian_few_shot_og_health_oriented_education", "acc,none": 0.38823529411764707, "acc_stderr,none": 0.05317409082203827}, "include_base_44_russian_few_shot_og_marine_license": {"alias": " - include_base_44_russian_few_shot_og_marine_license", "acc,none": 0.37681159420289856, "acc_stderr,none": 0.05876481248527006}, "include_base_44_russian_few_shot_og_social_science": {"alias": " - include_base_44_russian_few_shot_og_social_science", "acc,none": 0.2608695652173913, "acc_stderr,none": 0.05324977701702559}, "include_base_44_russian_few_shot_og_stem": {"alias": " - include_base_44_russian_few_shot_og_stem", "acc,none": 0.19148936170212766, "acc_stderr,none": 0.040801273706799875}, "include_base_44_spanish": {"acc,none": 0.2672727272727273, "acc_stderr,none": 0.018850411012442, "alias": "include_base_44_spanish"}, "include_base_44_spanish_few_shot_og_arts_humanities": {"alias": " - include_base_44_spanish_few_shot_og_arts_humanities", "acc,none": 0.228, "acc_stderr,none": 0.026587432487268466}, "include_base_44_spanish_few_shot_og_health_oriented_education": {"alias": " - include_base_44_spanish_few_shot_og_health_oriented_education", "acc,none": 0.32, "acc_stderr,none": 0.09521904571390466}, "include_base_44_spanish_few_shot_og_social_science": {"alias": " - include_base_44_spanish_few_shot_og_social_science", "acc,none": 0.288, "acc_stderr,none": 0.028697004587398222}, "include_base_44_spanish_few_shot_og_stem": {"alias": " - include_base_44_spanish_few_shot_og_stem", "acc,none": 0.4, "acc_stderr,none": 0.1}, "include_base_44_telugu": {"acc,none": 0.23357664233576642, "acc_stderr,none": 0.018131360120844424, "alias": "include_base_44_telugu"}, "include_base_44_telugu_few_shot_og_applied_science": {"alias": " - include_base_44_telugu_few_shot_og_applied_science", "acc,none": 0.23493975903614459, "acc_stderr,none": 0.03300533186128922}, "include_base_44_telugu_few_shot_og_arts_humanities": {"alias": " - include_base_44_telugu_few_shot_og_arts_humanities", "acc,none": 0.2198952879581152, "acc_stderr,none": 0.030047449343709724}, "include_base_44_telugu_few_shot_og_social_science": {"alias": " - include_base_44_telugu_few_shot_og_social_science", "acc,none": 0.24096385542168675, "acc_stderr,none": 0.033293941190735296}, "include_base_44_telugu_few_shot_og_stem": {"alias": " - include_base_44_telugu_few_shot_og_stem", "acc,none": 0.28, "acc_stderr,none": 0.0916515138991168}, "include_base_44_turkish": {"acc,none": 0.28832116788321166, "acc_stderr,none": 0.019356049035526374, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.3132530120481928, "acc_stderr,none": 0.03610805018031022}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.26506024096385544, "acc_stderr,none": 0.03436024037944966}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.25903614457831325, "acc_stderr,none": 0.034106466140718564}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.38, "acc_stderr,none": 0.06934092056863767}, "piqa": {"alias": "piqa", "acc,none": 0.514689880304679, "acc_stderr,none": 0.011660788281735487, "acc_norm,none": 0.5, "acc_norm_stderr,none": 0.011665824165343952}, "xnli_ar": {"alias": "xnli_ar", "acc,none": 0.3353413654618474, "acc_stderr,none": 0.00946303489151271}, "xnli_de": {"alias": "xnli_de", "acc,none": 0.3369477911646586, "acc_stderr,none": 0.009474203778757708}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.3180722891566265, "acc_stderr,none": 0.009335114267528848}, "xnli_es": {"alias": "xnli_es", "acc,none": 0.3325301204819277, "acc_stderr,none": 0.00944319336590334}, "xnli_fr": {"alias": "xnli_fr", "acc,none": 0.3329317269076305, "acc_stderr,none": 0.009446051001358226}, "xnli_hi": {"alias": "xnli_hi", "acc,none": 0.3377510040160643, "acc_stderr,none": 0.009479742273956477}, "xnli_ru": {"alias": "xnli_ru", "acc,none": 0.3289156626506024, "acc_stderr,none": 0.009417125981806732}, "xnli_sw": {"alias": "xnli_sw", "acc,none": 0.3333333333333333, "acc_stderr,none": 0.009448900914617614}, "xnli_th": {"alias": "xnli_th", "acc,none": 0.3397590361445783, "acc_stderr,none": 0.009493454925438252}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.3333333333333333, "acc_stderr,none": 0.009448900914617609}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.342570281124498, "acc_stderr,none": 0.009512333319470377}} +{"created_at": "2025-06-27T03:00:29.834510", "global_step": 20000, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.19795221843003413, "acc_stderr,none": 0.011643990971573405, "acc_norm,none": 0.24829351535836178, "acc_norm_stderr,none": 0.012624912868089762}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.26851851851851855, "acc_stderr,none": 0.009094042554994857, "acc_norm,none": 0.2638888888888889, "acc_norm_stderr,none": 0.009043789220055134}, "belebele_arb_Arab": {"alias": "belebele_arb_Arab", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_ben_Beng": {"alias": "belebele_ben_Beng", "acc,none": 0.23, "acc_stderr,none": 0.014035549969945594, "acc_norm,none": 0.23, "acc_norm_stderr,none": 0.014035549969945594}, "belebele_deu_Latn": {"alias": "belebele_deu_Latn", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.01401170515888451, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.01401170515888451}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.2311111111111111, "acc_stderr,none": 0.0140592566632182, "acc_norm,none": 0.2311111111111111, "acc_norm_stderr,none": 0.0140592566632182}, "belebele_fra_Latn": {"alias": "belebele_fra_Latn", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_hin_Deva": {"alias": "belebele_hin_Deva", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884528, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884528}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884535, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884535}, "belebele_jpn_Jpan": {"alias": "belebele_jpn_Jpan", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884532, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884532}, "belebele_kor_Hang": {"alias": "belebele_kor_Hang", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.01401170515888452, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.01401170515888452}, "belebele_rus_Cyrl": {"alias": "belebele_rus_Cyrl", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884504, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884504}, "belebele_spa_Latn": {"alias": "belebele_spa_Latn", "acc,none": 0.23444444444444446, "acc_stderr,none": 0.014129554968110773, "acc_norm,none": 0.23444444444444446, "acc_norm_stderr,none": 0.014129554968110773}, "belebele_swh_Latn": {"alias": "belebele_swh_Latn", "acc,none": 0.23777777777777778, "acc_stderr,none": 0.0141986348093082, "acc_norm,none": 0.23777777777777778, "acc_norm_stderr,none": 0.0141986348093082}, "belebele_tel_Telu": {"alias": "belebele_tel_Telu", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884523, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884523}, "belebele_tha_Thai": {"alias": "belebele_tha_Thai", "acc,none": 0.23222222222222222, "acc_stderr,none": 0.014082825936214573, "acc_norm,none": 0.23222222222222222, "acc_norm_stderr,none": 0.014082825936214573}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.22777777777777777, "acc_stderr,none": 0.013987721523687971, "acc_norm,none": 0.22777777777777777, "acc_norm_stderr,none": 0.013987721523687971}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.23, "acc_stderr,none": 0.014035549969945592, "acc_norm,none": 0.23, "acc_norm_stderr,none": 0.014035549969945592}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.25871340370444135, "acc_stderr,none": 0.004370328224831787, "acc_norm,none": 0.26289583748257317, "acc_norm_stderr,none": 0.004393066760916823}, "include_base_44_arabic": {"acc,none": 0.22282608695652173, "acc_stderr,none": 0.01767514227930449, "alias": "include_base_44_arabic"}, "include_base_44_arabic_few_shot_og_arts_humanities": {"alias": " - include_base_44_arabic_few_shot_og_arts_humanities", "acc,none": 0.24761904761904763, "acc_stderr,none": 0.04232473532055043}, "include_base_44_arabic_few_shot_og_business_commerce": {"alias": " - include_base_44_arabic_few_shot_og_business_commerce", "acc,none": 0.15853658536585366, "acc_stderr,none": 0.040582599273365695}, "include_base_44_arabic_few_shot_og_driving_license": {"alias": " - include_base_44_arabic_few_shot_og_driving_license", "acc,none": 0.29523809523809524, "acc_stderr,none": 0.044729159560441434}, "include_base_44_arabic_few_shot_og_general_knowledge": {"alias": " - include_base_44_arabic_few_shot_og_general_knowledge", "acc,none": 0.24761904761904763, "acc_stderr,none": 0.0423247353205504}, "include_base_44_arabic_few_shot_og_social_science": {"alias": " - include_base_44_arabic_few_shot_og_social_science", "acc,none": 0.1619047619047619, "acc_stderr,none": 0.036121005232589175}, "include_base_44_arabic_few_shot_og_stem": {"alias": " - include_base_44_arabic_few_shot_og_stem", "acc,none": 0.2, "acc_stderr,none": 0.057142857142857134}, "include_base_44_bengali": {"acc,none": 0.26824817518248173, "acc_stderr,none": 0.01892447609978878, "alias": "include_base_44_bengali"}, "include_base_44_bengali_few_shot_og_arts_humanities": {"alias": " - include_base_44_bengali_few_shot_og_arts_humanities", "acc,none": 0.3253012048192771, "acc_stderr,none": 0.03647168523683229}, "include_base_44_bengali_few_shot_og_general_knowledge": {"alias": " - include_base_44_bengali_few_shot_og_general_knowledge", "acc,none": 0.23493975903614459, "acc_stderr,none": 0.03300533186128922}, "include_base_44_bengali_few_shot_og_professional_certification": {"alias": " - include_base_44_bengali_few_shot_og_professional_certification", "acc,none": 0.2469879518072289, "acc_stderr,none": 0.03357351982064536}, "include_base_44_bengali_few_shot_og_stem": {"alias": " - include_base_44_bengali_few_shot_og_stem", "acc,none": 0.26, "acc_stderr,none": 0.06266203485560375}, "include_base_44_chinese": {"acc,none": 0.24587155963302754, "acc_stderr,none": 0.018517539247711934, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.049938324531150324}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.25287356321839083, "acc_stderr,none": 0.04687049503854671}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.323943661971831, "acc_stderr,none": 0.05593416612923642}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.23943661971830985, "acc_stderr,none": 0.05100514327793383}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.2413793103448276, "acc_stderr,none": 0.04614377668264891}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.051995626882956975}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.19718309859154928, "acc_stderr,none": 0.04755476905953276}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.1875, "acc_stderr,none": 0.10077822185373188}, "include_base_44_french": {"acc,none": 0.25059665871121717, "acc_stderr,none": 0.021185910208552854, "alias": "include_base_44_french"}, "include_base_44_french_few_shot_og_arts_humanities": {"alias": " - include_base_44_french_few_shot_og_arts_humanities", "acc,none": 0.22932330827067668, "acc_stderr,none": 0.02582481829225878}, "include_base_44_french_few_shot_og_driving_license": {"alias": " - include_base_44_french_few_shot_og_driving_license", "acc,none": 0.3617021276595745, "acc_stderr,none": 0.07084485475872632}, "include_base_44_french_few_shot_og_health_oriented_education": {"alias": " - include_base_44_french_few_shot_og_health_oriented_education", "acc,none": 0.1875, "acc_stderr,none": 0.10077822185373188}, "include_base_44_french_few_shot_og_social_science": {"alias": " - include_base_44_french_few_shot_og_social_science", "acc,none": 0.25675675675675674, "acc_stderr,none": 0.051128728837659454}, "include_base_44_french_few_shot_og_stem": {"alias": " - include_base_44_french_few_shot_og_stem", "acc,none": 0.3125, "acc_stderr,none": 0.11967838846954226}, "include_base_44_german": {"acc,none": 0.28776978417266186, "acc_stderr,none": 0.038131024267939156, "alias": "include_base_44_german"}, "include_base_44_german_few_shot_og_driving_license": {"alias": " - include_base_44_german_few_shot_og_driving_license", "acc,none": 0.4782608695652174, "acc_stderr,none": 0.10649955403405124}, "include_base_44_german_few_shot_og_social_science": {"alias": " - include_base_44_german_few_shot_og_social_science", "acc,none": 0.25274725274725274, "acc_stderr,none": 0.045809518537328904}, "include_base_44_german_few_shot_og_stem": {"alias": " - include_base_44_german_few_shot_og_stem", "acc,none": 0.24, "acc_stderr,none": 0.08717797887081347}, "include_base_44_hindi": {"acc,none": 0.23948811700182815, "acc_stderr,none": 0.01830198403573232, "alias": "include_base_44_hindi"}, "include_base_44_hindi_few_shot_og_applied_science": {"alias": " - include_base_44_hindi_few_shot_og_applied_science", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295698}, "include_base_44_hindi_few_shot_og_arts_humanities": {"alias": " - include_base_44_hindi_few_shot_og_arts_humanities", "acc,none": 0.17708333333333334, "acc_stderr,none": 0.03916561682056947}, "include_base_44_hindi_few_shot_og_driving_license": {"alias": " - include_base_44_hindi_few_shot_og_driving_license", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.04993832453115031}, "include_base_44_hindi_few_shot_og_general_knowledge": {"alias": " - include_base_44_hindi_few_shot_og_general_knowledge", "acc,none": 0.29577464788732394, "acc_stderr,none": 0.05454906121418899}, "include_base_44_hindi_few_shot_og_health_oriented_education": {"alias": " - include_base_44_hindi_few_shot_og_health_oriented_education", "acc,none": 0.2676056338028169, "acc_stderr,none": 0.05291406220869697}, "include_base_44_hindi_few_shot_og_professional_certification": {"alias": " - include_base_44_hindi_few_shot_og_professional_certification", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295697}, "include_base_44_hindi_few_shot_og_social_science": {"alias": " - include_base_44_hindi_few_shot_og_social_science", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295697}, "include_base_44_hindi_few_shot_og_stem": {"alias": " - include_base_44_hindi_few_shot_og_stem", "acc,none": 0.16, "acc_stderr,none": 0.0748331477354788}, "include_base_44_italian": {"acc,none": 0.2645985401459854, "acc_stderr,none": 0.018872641302432616, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.17142857142857143, "acc_stderr,none": 0.06463490595976273}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.2994011976047904, "acc_stderr,none": 0.03554736535384932}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.25, "acc_stderr,none": 0.1305582419667734}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.2903225806451613, "acc_stderr,none": 0.0365772070654091}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.2275449101796407, "acc_stderr,none": 0.032539894331085194}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.25, "acc_stderr,none": 0.1305582419667734}, "include_base_44_japanese": {"acc,none": 0.2634730538922156, "acc_stderr,none": 0.01972538101830729, "alias": "include_base_44_japanese"}, "include_base_44_japanese_few_shot_og_driving_license": {"alias": " - include_base_44_japanese_few_shot_og_driving_license", "acc,none": 0.24242424242424243, "acc_stderr,none": 0.04329004329004327}, "include_base_44_japanese_few_shot_og_medical_license": {"alias": " - include_base_44_japanese_few_shot_og_medical_license", "acc,none": 0.2835820895522388, "acc_stderr,none": 0.03187187537919797}, "include_base_44_japanese_few_shot_og_professional_certification": {"alias": " - include_base_44_japanese_few_shot_og_professional_certification", "acc,none": 0.2537313432835821, "acc_stderr,none": 0.030769444967296024}, "include_base_44_korean": {"acc,none": 0.234, "acc_stderr,none": 0.01896985560532149, "alias": "include_base_44_korean"}, "include_base_44_korean_few_shot_og_professional_certification": {"alias": " - include_base_44_korean_few_shot_og_professional_certification", "acc,none": 0.24, "acc_stderr,none": 0.027065293652239003}, "include_base_44_korean_few_shot_og_social_science": {"alias": " - include_base_44_korean_few_shot_og_social_science", "acc,none": 0.228, "acc_stderr,none": 0.026587432487268487}, "include_base_44_russian": {"acc,none": 0.266304347826087, "acc_stderr,none": 0.018662425786691965, "alias": "include_base_44_russian"}, "include_base_44_russian_few_shot_og_applied_science": {"alias": " - include_base_44_russian_few_shot_og_applied_science", "acc,none": 0.2028985507246377, "acc_stderr,none": 0.04876877147472662}, "include_base_44_russian_few_shot_og_arts_humanities": {"alias": " - include_base_44_russian_few_shot_og_arts_humanities", "acc,none": 0.2028985507246377, "acc_stderr,none": 0.0487687714747266}, "include_base_44_russian_few_shot_og_business_commerce": {"alias": " - include_base_44_russian_few_shot_og_business_commerce", "acc,none": 0.2608695652173913, "acc_stderr,none": 0.05324977701702559}, "include_base_44_russian_few_shot_og_driving_license": {"alias": " - include_base_44_russian_few_shot_og_driving_license", "acc,none": 0.21428571428571427, "acc_stderr,none": 0.07896725691322383}, "include_base_44_russian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_russian_few_shot_og_health_oriented_education", "acc,none": 0.38823529411764707, "acc_stderr,none": 0.05317409082203827}, "include_base_44_russian_few_shot_og_marine_license": {"alias": " - include_base_44_russian_few_shot_og_marine_license", "acc,none": 0.37681159420289856, "acc_stderr,none": 0.05876481248527006}, "include_base_44_russian_few_shot_og_social_science": {"alias": " - include_base_44_russian_few_shot_og_social_science", "acc,none": 0.2608695652173913, "acc_stderr,none": 0.05324977701702559}, "include_base_44_russian_few_shot_og_stem": {"alias": " - include_base_44_russian_few_shot_og_stem", "acc,none": 0.19148936170212766, "acc_stderr,none": 0.040801273706799875}, "include_base_44_spanish": {"acc,none": 0.2672727272727273, "acc_stderr,none": 0.018850411012442, "alias": "include_base_44_spanish"}, "include_base_44_spanish_few_shot_og_arts_humanities": {"alias": " - include_base_44_spanish_few_shot_og_arts_humanities", "acc,none": 0.228, "acc_stderr,none": 0.026587432487268466}, "include_base_44_spanish_few_shot_og_health_oriented_education": {"alias": " - include_base_44_spanish_few_shot_og_health_oriented_education", "acc,none": 0.32, "acc_stderr,none": 0.09521904571390466}, "include_base_44_spanish_few_shot_og_social_science": {"alias": " - include_base_44_spanish_few_shot_og_social_science", "acc,none": 0.288, "acc_stderr,none": 0.028697004587398222}, "include_base_44_spanish_few_shot_og_stem": {"alias": " - include_base_44_spanish_few_shot_og_stem", "acc,none": 0.4, "acc_stderr,none": 0.1}, "include_base_44_telugu": {"acc,none": 0.22992700729927007, "acc_stderr,none": 0.018039988814061676, "alias": "include_base_44_telugu"}, "include_base_44_telugu_few_shot_og_applied_science": {"alias": " - include_base_44_telugu_few_shot_og_applied_science", "acc,none": 0.23493975903614459, "acc_stderr,none": 0.03300533186128922}, "include_base_44_telugu_few_shot_og_arts_humanities": {"alias": " - include_base_44_telugu_few_shot_og_arts_humanities", "acc,none": 0.225130890052356, "acc_stderr,none": 0.03030085741331515}, "include_base_44_telugu_few_shot_og_social_science": {"alias": " - include_base_44_telugu_few_shot_og_social_science", "acc,none": 0.2289156626506024, "acc_stderr,none": 0.03270745277352477}, "include_base_44_telugu_few_shot_og_stem": {"alias": " - include_base_44_telugu_few_shot_og_stem", "acc,none": 0.24, "acc_stderr,none": 0.08717797887081347}, "include_base_44_turkish": {"acc,none": 0.2846715328467153, "acc_stderr,none": 0.019291129356210843, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.3132530120481928, "acc_stderr,none": 0.03610805018031022}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.26506024096385544, "acc_stderr,none": 0.03436024037944966}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.25301204819277107, "acc_stderr,none": 0.03384429155233134}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.36, "acc_stderr,none": 0.06857142857142856}, "piqa": {"alias": "piqa", "acc,none": 0.515233949945593, "acc_stderr,none": 0.011660408257153638, "acc_norm,none": 0.501088139281828, "acc_norm_stderr,none": 0.011665796539540873}, "xnli_ar": {"alias": "xnli_ar", "acc,none": 0.3353413654618474, "acc_stderr,none": 0.00946303489151271}, "xnli_de": {"alias": "xnli_de", "acc,none": 0.3377510040160643, "acc_stderr,none": 0.009479742273956477}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.3244979919678715, "acc_stderr,none": 0.009384407992489761}, "xnli_es": {"alias": "xnli_es", "acc,none": 0.3325301204819277, "acc_stderr,none": 0.00944319336590334}, "xnli_fr": {"alias": "xnli_fr", "acc,none": 0.3345381526104418, "acc_stderr,none": 0.009457404390939167}, "xnli_hi": {"alias": "xnli_hi", "acc,none": 0.334136546184739, "acc_stderr,none": 0.009454577602463621}, "xnli_ru": {"alias": "xnli_ru", "acc,none": 0.329718875502008, "acc_stderr,none": 0.00942297310370646}, "xnli_sw": {"alias": "xnli_sw", "acc,none": 0.3240963855421687, "acc_stderr,none": 0.009381386527922643}, "xnli_th": {"alias": "xnli_th", "acc,none": 0.3377510040160643, "acc_stderr,none": 0.009479742273956473}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.3337349397590361, "acc_stderr,none": 0.009451743112667057}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.3393574297188755, "acc_stderr,none": 0.009490727635646755}}