Upload google-gemma-2-2b/metrics.eval.jsonl with huggingface_hub
Browse files
google-gemma-2-2b/metrics.eval.jsonl
CHANGED
@@ -41,3 +41,9 @@
|
|
41 |
{"created_at": "2025-08-16T11:47:10.715607", "global_step": 84000, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.34897610921501704, "acc_stderr,none": 0.013928933461382501, "acc_norm,none": 0.38054607508532423, "acc_norm_stderr,none": 0.01418827771234982}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.7117003367003367, "acc_stderr,none": 0.009294774252029623, "acc_norm,none": 0.6452020202020202, "acc_norm_stderr,none": 0.009817629113069694}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.24888888888888888, "acc_stderr,none": 0.014420323451642536, "acc_norm,none": 0.24888888888888888, "acc_norm_stderr,none": 0.014420323451642536}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.22666666666666666, "acc_stderr,none": 0.013963598349030465, "acc_norm,none": 0.22666666666666666, "acc_norm_stderr,none": 0.013963598349030465}, "belebele_pes_Arab": {"alias": "belebele_pes_Arab", "acc,none": 0.23222222222222222, "acc_stderr,none": 0.014082825936214571, "acc_norm,none": 0.23222222222222222, "acc_norm_stderr,none": 0.014082825936214571}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.2611111111111111, "acc_stderr,none": 0.014649486385262122, "acc_norm,none": 0.2611111111111111, "acc_norm_stderr,none": 0.014649486385262122}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.25666666666666665, "acc_stderr,none": 0.01456789134238004, "acc_norm,none": 0.25666666666666665, "acc_norm_stderr,none": 0.01456789134238004}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.4296952798247361, "acc_stderr,none": 0.004940208641372078, "acc_norm,none": 0.5622385978888668, "acc_norm_stderr,none": 0.004950973231188741}, "include_base_44_chinese": {"acc,none": 0.24954128440366974, "acc_stderr,none": 0.018641749804857447, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.2676056338028169, "acc_stderr,none": 0.05291406220869697}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.25287356321839083, "acc_stderr,none": 0.04687049503854671}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.29577464788732394, "acc_stderr,none": 0.054549061214188996}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.23943661971830985, "acc_stderr,none": 0.05100514327793383}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.25287356321839083, "acc_stderr,none": 0.04687049503854671}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.2112676056338028, "acc_stderr,none": 0.04879016359348949}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.23943661971830985, "acc_stderr,none": 0.05100514327793383}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.1875, "acc_stderr,none": 0.10077822185373188}, "include_base_44_italian": {"acc,none": 0.24452554744525548, "acc_stderr,none": 0.01840716058598386, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.22857142857142856, "acc_stderr,none": 0.0720144043214405}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.2215568862275449, "acc_stderr,none": 0.03223309610157498}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.25, "acc_stderr,none": 0.1305582419667734}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.21935483870967742, "acc_stderr,none": 0.03334571892189038}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.2874251497005988, "acc_stderr,none": 0.03512558647799084}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.3333333333333333, "acc_stderr,none": 0.14213381090374033}, "include_base_44_turkish": {"acc,none": 0.2354014598540146, "acc_stderr,none": 0.018163913425664667, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.24096385542168675, "acc_stderr,none": 0.03329394119073528}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.21686746987951808, "acc_stderr,none": 0.03208284450356365}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.2289156626506024, "acc_stderr,none": 0.03270745277352477}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.3, "acc_stderr,none": 0.06546536707079771}, "piqa": {"alias": "piqa", "acc,none": 0.7279651795429815, "acc_stderr,none": 0.010382763786247378, "acc_norm,none": 0.7328618063112078, "acc_norm_stderr,none": 0.010323440492612437}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.5293172690763053, "acc_stderr,none": 0.010004830045543986}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.4606425702811245, "acc_stderr,none": 0.009990976095711897}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.3385542168674699, "acc_stderr,none": 0.009485250208516876}}
|
42 |
{"created_at": "2025-08-16T13:43:26.061693", "global_step": 86000, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.34897610921501704, "acc_stderr,none": 0.013928933461382501, "acc_norm,none": 0.37542662116040953, "acc_norm_stderr,none": 0.01415063143511173}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.7175925925925926, "acc_stderr,none": 0.009237303403479332, "acc_norm,none": 0.6452020202020202, "acc_norm_stderr,none": 0.009817629113069696}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.25, "acc_stderr,none": 0.014441782171967503, "acc_norm,none": 0.25, "acc_norm_stderr,none": 0.014441782171967503}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.2311111111111111, "acc_stderr,none": 0.014059256663218167, "acc_norm,none": 0.2311111111111111, "acc_norm_stderr,none": 0.014059256663218167}, "belebele_pes_Arab": {"alias": "belebele_pes_Arab", "acc,none": 0.2477777777777778, "acc_stderr,none": 0.014398737377336084, "acc_norm,none": 0.2477777777777778, "acc_norm_stderr,none": 0.014398737377336084}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.27111111111111114, "acc_stderr,none": 0.014826016446581946, "acc_norm,none": 0.27111111111111114, "acc_norm_stderr,none": 0.014826016446581946}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.25555555555555554, "acc_stderr,none": 0.014547185072254276, "acc_norm,none": 0.25555555555555554, "acc_norm_stderr,none": 0.014547185072254276}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.43218482374029077, "acc_stderr,none": 0.00494367338827628, "acc_norm,none": 0.5642302330213105, "acc_norm_stderr,none": 0.004948439229523913}, "include_base_44_chinese": {"acc,none": 0.23669724770642203, "acc_stderr,none": 0.01824525299924668, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295696}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.2413793103448276, "acc_stderr,none": 0.046143776682648914}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.323943661971831, "acc_stderr,none": 0.05593416612923642}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.18309859154929578, "acc_stderr,none": 0.04622514734921429}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.25287356321839083, "acc_stderr,none": 0.04687049503854671}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.04993832453115031}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.18309859154929578, "acc_stderr,none": 0.04622514734921427}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.1875, "acc_stderr,none": 0.10077822185373188}, "include_base_44_italian": {"acc,none": 0.24452554744525548, "acc_stderr,none": 0.018404640854853313, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.2571428571428571, "acc_stderr,none": 0.07495496847387484}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.23952095808383234, "acc_stderr,none": 0.03312541599851861}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.25, "acc_stderr,none": 0.1305582419667734}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.2, "acc_stderr,none": 0.03223291856101518}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.2874251497005988, "acc_stderr,none": 0.03512558647799085}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.25, "acc_stderr,none": 0.1305582419667734}, "include_base_44_turkish": {"acc,none": 0.2281021897810219, "acc_stderr,none": 0.017971498448249964, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.20481927710843373, "acc_stderr,none": 0.03141784291663926}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.2469879518072289, "acc_stderr,none": 0.03357351982064536}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.22289156626506024, "acc_stderr,none": 0.032400048255946876}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.26, "acc_stderr,none": 0.06266203485560373}, "piqa": {"alias": "piqa", "acc,none": 0.733949945593036, "acc_stderr,none": 0.010310039263352831, "acc_norm,none": 0.7279651795429815, "acc_norm_stderr,none": 0.010382763786247374}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.5341365461847389, "acc_stderr,none": 0.00999868806610266}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.4686746987951807, "acc_stderr,none": 0.010002384719762112}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.336144578313253, "acc_stderr,none": 0.009468634669293527}}
|
43 |
{"created_at": "2025-08-16T15:40:11.581054", "global_step": 88000, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.35665529010238906, "acc_stderr,none": 0.013998056902620197, "acc_norm,none": 0.38139931740614336, "acc_norm_stderr,none": 0.014194389086685261}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.7188552188552189, "acc_stderr,none": 0.009224735470287002, "acc_norm,none": 0.6519360269360269, "acc_norm_stderr,none": 0.009774627600259012}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.24444444444444444, "acc_stderr,none": 0.014333209259632404, "acc_norm,none": 0.24444444444444444, "acc_norm_stderr,none": 0.014333209259632404}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.23, "acc_stderr,none": 0.014035549969945594, "acc_norm,none": 0.23, "acc_norm_stderr,none": 0.014035549969945594}, "belebele_pes_Arab": {"alias": "belebele_pes_Arab", "acc,none": 0.24, "acc_stderr,none": 0.01424401987979265, "acc_norm,none": 0.24, "acc_norm_stderr,none": 0.01424401987979265}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.2588888888888889, "acc_stderr,none": 0.014608933836168495, "acc_norm,none": 0.2588888888888889, "acc_norm_stderr,none": 0.014608933836168495}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884535, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884535}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.43457478589922327, "acc_stderr,none": 0.004946879874422679, "acc_norm,none": 0.5674168492332204, "acc_norm_stderr,none": 0.004944215937021392}, "include_base_44_chinese": {"acc,none": 0.24770642201834864, "acc_stderr,none": 0.01855129402868047, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295697}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.2413793103448276, "acc_stderr,none": 0.046143776682648914}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.3380281690140845, "acc_stderr,none": 0.05653887739133514}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.23943661971830985, "acc_stderr,none": 0.05100514327793383}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.25287356321839083, "acc_stderr,none": 0.04687049503854671}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.2112676056338028, "acc_stderr,none": 0.04879016359348949}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.2112676056338028, "acc_stderr,none": 0.04879016359348949}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.1875, "acc_stderr,none": 0.10077822185373188}, "include_base_44_italian": {"acc,none": 0.25547445255474455, "acc_stderr,none": 0.018670810682622625, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.2857142857142857, "acc_stderr,none": 0.07747516350666293}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.2275449101796407, "acc_stderr,none": 0.0325398943310852}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.25, "acc_stderr,none": 0.1305582419667734}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.22580645161290322, "acc_stderr,none": 0.033692449539812465}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.2994011976047904, "acc_stderr,none": 0.03554736535384932}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.3333333333333333, "acc_stderr,none": 0.14213381090374033}, "include_base_44_turkish": {"acc,none": 0.22445255474452555, "acc_stderr,none": 0.017880233753581148, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.22289156626506024, "acc_stderr,none": 0.03240004825594688}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.24096385542168675, "acc_stderr,none": 0.0332939411907353}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.21686746987951808, "acc_stderr,none": 0.03208284450356365}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.2, "acc_stderr,none": 0.057142857142857134}, "piqa": {"alias": "piqa", "acc,none": 0.735582154515778, "acc_stderr,none": 0.010289787244767173, "acc_norm,none": 0.7312295973884657, "acc_norm_stderr,none": 0.010343392940090016}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.5088353413654618, "acc_stderr,none": 0.010020508033762624}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.4642570281124498, "acc_stderr,none": 0.00999643246851036}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.336144578313253, "acc_stderr,none": 0.009468634669293527}}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
41 |
{"created_at": "2025-08-16T11:47:10.715607", "global_step": 84000, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.34897610921501704, "acc_stderr,none": 0.013928933461382501, "acc_norm,none": 0.38054607508532423, "acc_norm_stderr,none": 0.01418827771234982}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.7117003367003367, "acc_stderr,none": 0.009294774252029623, "acc_norm,none": 0.6452020202020202, "acc_norm_stderr,none": 0.009817629113069694}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.24888888888888888, "acc_stderr,none": 0.014420323451642536, "acc_norm,none": 0.24888888888888888, "acc_norm_stderr,none": 0.014420323451642536}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.22666666666666666, "acc_stderr,none": 0.013963598349030465, "acc_norm,none": 0.22666666666666666, "acc_norm_stderr,none": 0.013963598349030465}, "belebele_pes_Arab": {"alias": "belebele_pes_Arab", "acc,none": 0.23222222222222222, "acc_stderr,none": 0.014082825936214571, "acc_norm,none": 0.23222222222222222, "acc_norm_stderr,none": 0.014082825936214571}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.2611111111111111, "acc_stderr,none": 0.014649486385262122, "acc_norm,none": 0.2611111111111111, "acc_norm_stderr,none": 0.014649486385262122}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.25666666666666665, "acc_stderr,none": 0.01456789134238004, "acc_norm,none": 0.25666666666666665, "acc_norm_stderr,none": 0.01456789134238004}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.4296952798247361, "acc_stderr,none": 0.004940208641372078, "acc_norm,none": 0.5622385978888668, "acc_norm_stderr,none": 0.004950973231188741}, "include_base_44_chinese": {"acc,none": 0.24954128440366974, "acc_stderr,none": 0.018641749804857447, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.2676056338028169, "acc_stderr,none": 0.05291406220869697}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.25287356321839083, "acc_stderr,none": 0.04687049503854671}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.29577464788732394, "acc_stderr,none": 0.054549061214188996}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.23943661971830985, "acc_stderr,none": 0.05100514327793383}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.25287356321839083, "acc_stderr,none": 0.04687049503854671}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.2112676056338028, "acc_stderr,none": 0.04879016359348949}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.23943661971830985, "acc_stderr,none": 0.05100514327793383}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.1875, "acc_stderr,none": 0.10077822185373188}, "include_base_44_italian": {"acc,none": 0.24452554744525548, "acc_stderr,none": 0.01840716058598386, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.22857142857142856, "acc_stderr,none": 0.0720144043214405}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.2215568862275449, "acc_stderr,none": 0.03223309610157498}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.25, "acc_stderr,none": 0.1305582419667734}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.21935483870967742, "acc_stderr,none": 0.03334571892189038}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.2874251497005988, "acc_stderr,none": 0.03512558647799084}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.3333333333333333, "acc_stderr,none": 0.14213381090374033}, "include_base_44_turkish": {"acc,none": 0.2354014598540146, "acc_stderr,none": 0.018163913425664667, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.24096385542168675, "acc_stderr,none": 0.03329394119073528}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.21686746987951808, "acc_stderr,none": 0.03208284450356365}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.2289156626506024, "acc_stderr,none": 0.03270745277352477}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.3, "acc_stderr,none": 0.06546536707079771}, "piqa": {"alias": "piqa", "acc,none": 0.7279651795429815, "acc_stderr,none": 0.010382763786247378, "acc_norm,none": 0.7328618063112078, "acc_norm_stderr,none": 0.010323440492612437}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.5293172690763053, "acc_stderr,none": 0.010004830045543986}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.4606425702811245, "acc_stderr,none": 0.009990976095711897}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.3385542168674699, "acc_stderr,none": 0.009485250208516876}}
|
42 |
{"created_at": "2025-08-16T13:43:26.061693", "global_step": 86000, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.34897610921501704, "acc_stderr,none": 0.013928933461382501, "acc_norm,none": 0.37542662116040953, "acc_norm_stderr,none": 0.01415063143511173}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.7175925925925926, "acc_stderr,none": 0.009237303403479332, "acc_norm,none": 0.6452020202020202, "acc_norm_stderr,none": 0.009817629113069696}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.25, "acc_stderr,none": 0.014441782171967503, "acc_norm,none": 0.25, "acc_norm_stderr,none": 0.014441782171967503}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.2311111111111111, "acc_stderr,none": 0.014059256663218167, "acc_norm,none": 0.2311111111111111, "acc_norm_stderr,none": 0.014059256663218167}, "belebele_pes_Arab": {"alias": "belebele_pes_Arab", "acc,none": 0.2477777777777778, "acc_stderr,none": 0.014398737377336084, "acc_norm,none": 0.2477777777777778, "acc_norm_stderr,none": 0.014398737377336084}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.27111111111111114, "acc_stderr,none": 0.014826016446581946, "acc_norm,none": 0.27111111111111114, "acc_norm_stderr,none": 0.014826016446581946}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.25555555555555554, "acc_stderr,none": 0.014547185072254276, "acc_norm,none": 0.25555555555555554, "acc_norm_stderr,none": 0.014547185072254276}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.43218482374029077, "acc_stderr,none": 0.00494367338827628, "acc_norm,none": 0.5642302330213105, "acc_norm_stderr,none": 0.004948439229523913}, "include_base_44_chinese": {"acc,none": 0.23669724770642203, "acc_stderr,none": 0.01824525299924668, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295696}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.2413793103448276, "acc_stderr,none": 0.046143776682648914}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.323943661971831, "acc_stderr,none": 0.05593416612923642}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.18309859154929578, "acc_stderr,none": 0.04622514734921429}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.25287356321839083, "acc_stderr,none": 0.04687049503854671}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.04993832453115031}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.18309859154929578, "acc_stderr,none": 0.04622514734921427}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.1875, "acc_stderr,none": 0.10077822185373188}, "include_base_44_italian": {"acc,none": 0.24452554744525548, "acc_stderr,none": 0.018404640854853313, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.2571428571428571, "acc_stderr,none": 0.07495496847387484}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.23952095808383234, "acc_stderr,none": 0.03312541599851861}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.25, "acc_stderr,none": 0.1305582419667734}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.2, "acc_stderr,none": 0.03223291856101518}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.2874251497005988, "acc_stderr,none": 0.03512558647799085}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.25, "acc_stderr,none": 0.1305582419667734}, "include_base_44_turkish": {"acc,none": 0.2281021897810219, "acc_stderr,none": 0.017971498448249964, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.20481927710843373, "acc_stderr,none": 0.03141784291663926}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.2469879518072289, "acc_stderr,none": 0.03357351982064536}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.22289156626506024, "acc_stderr,none": 0.032400048255946876}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.26, "acc_stderr,none": 0.06266203485560373}, "piqa": {"alias": "piqa", "acc,none": 0.733949945593036, "acc_stderr,none": 0.010310039263352831, "acc_norm,none": 0.7279651795429815, "acc_norm_stderr,none": 0.010382763786247374}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.5341365461847389, "acc_stderr,none": 0.00999868806610266}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.4686746987951807, "acc_stderr,none": 0.010002384719762112}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.336144578313253, "acc_stderr,none": 0.009468634669293527}}
|
43 |
{"created_at": "2025-08-16T15:40:11.581054", "global_step": 88000, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.35665529010238906, "acc_stderr,none": 0.013998056902620197, "acc_norm,none": 0.38139931740614336, "acc_norm_stderr,none": 0.014194389086685261}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.7188552188552189, "acc_stderr,none": 0.009224735470287002, "acc_norm,none": 0.6519360269360269, "acc_norm_stderr,none": 0.009774627600259012}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.24444444444444444, "acc_stderr,none": 0.014333209259632404, "acc_norm,none": 0.24444444444444444, "acc_norm_stderr,none": 0.014333209259632404}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.23, "acc_stderr,none": 0.014035549969945594, "acc_norm,none": 0.23, "acc_norm_stderr,none": 0.014035549969945594}, "belebele_pes_Arab": {"alias": "belebele_pes_Arab", "acc,none": 0.24, "acc_stderr,none": 0.01424401987979265, "acc_norm,none": 0.24, "acc_norm_stderr,none": 0.01424401987979265}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.2588888888888889, "acc_stderr,none": 0.014608933836168495, "acc_norm,none": 0.2588888888888889, "acc_norm_stderr,none": 0.014608933836168495}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884535, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884535}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.43457478589922327, "acc_stderr,none": 0.004946879874422679, "acc_norm,none": 0.5674168492332204, "acc_norm_stderr,none": 0.004944215937021392}, "include_base_44_chinese": {"acc,none": 0.24770642201834864, "acc_stderr,none": 0.01855129402868047, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.2535211267605634, "acc_stderr,none": 0.05199562688295697}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.2413793103448276, "acc_stderr,none": 0.046143776682648914}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.3380281690140845, "acc_stderr,none": 0.05653887739133514}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.23943661971830985, "acc_stderr,none": 0.05100514327793383}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.25287356321839083, "acc_stderr,none": 0.04687049503854671}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.2112676056338028, "acc_stderr,none": 0.04879016359348949}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.2112676056338028, "acc_stderr,none": 0.04879016359348949}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.1875, "acc_stderr,none": 0.10077822185373188}, "include_base_44_italian": {"acc,none": 0.25547445255474455, "acc_stderr,none": 0.018670810682622625, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.2857142857142857, "acc_stderr,none": 0.07747516350666293}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.2275449101796407, "acc_stderr,none": 0.0325398943310852}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.25, "acc_stderr,none": 0.1305582419667734}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.22580645161290322, "acc_stderr,none": 0.033692449539812465}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.2994011976047904, "acc_stderr,none": 0.03554736535384932}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.3333333333333333, "acc_stderr,none": 0.14213381090374033}, "include_base_44_turkish": {"acc,none": 0.22445255474452555, "acc_stderr,none": 0.017880233753581148, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.22289156626506024, "acc_stderr,none": 0.03240004825594688}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.24096385542168675, "acc_stderr,none": 0.0332939411907353}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.21686746987951808, "acc_stderr,none": 0.03208284450356365}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.2, "acc_stderr,none": 0.057142857142857134}, "piqa": {"alias": "piqa", "acc,none": 0.735582154515778, "acc_stderr,none": 0.010289787244767173, "acc_norm,none": 0.7312295973884657, "acc_norm_stderr,none": 0.010343392940090016}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.5088353413654618, "acc_stderr,none": 0.010020508033762624}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.4642570281124498, "acc_stderr,none": 0.00999643246851036}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.336144578313253, "acc_stderr,none": 0.009468634669293527}}
|
44 |
+
{"created_at": "2025-08-16T17:36:25.992970", "global_step": 90000, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.35409556313993173, "acc_stderr,none": 0.01397545412275656, "acc_norm,none": 0.38054607508532423, "acc_norm_stderr,none": 0.014188277712349822}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.7188552188552189, "acc_stderr,none": 0.009224735470287002, "acc_norm,none": 0.656986531986532, "acc_norm_stderr,none": 0.00974096566648923}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.2411111111111111, "acc_stderr,none": 0.01426651388657891, "acc_norm,none": 0.2411111111111111, "acc_norm_stderr,none": 0.01426651388657891}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.22666666666666666, "acc_stderr,none": 0.01396359834903047, "acc_norm,none": 0.22666666666666666, "acc_norm_stderr,none": 0.01396359834903047}, "belebele_pes_Arab": {"alias": "belebele_pes_Arab", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.014011705158884526, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.014011705158884526}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.26222222222222225, "acc_stderr,none": 0.014669580202217888, "acc_norm,none": 0.26222222222222225, "acc_norm_stderr,none": 0.014669580202217888}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.22333333333333333, "acc_stderr,none": 0.013890384297198695, "acc_norm,none": 0.22333333333333333, "acc_norm_stderr,none": 0.013890384297198695}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.4358693487353117, "acc_stderr,none": 0.004948567856373872, "acc_norm,none": 0.5673172674765983, "acc_norm_stderr,none": 0.004944351065545866}, "include_base_44_chinese": {"acc,none": 0.24403669724770644, "acc_stderr,none": 0.018447835523733126, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.049938324531150324}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.26436781609195403, "acc_stderr,none": 0.04755382188278443}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.323943661971831, "acc_stderr,none": 0.05593416612923642}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.23943661971830985, "acc_stderr,none": 0.05100514327793383}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.26436781609195403, "acc_stderr,none": 0.04755382188278444}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.2112676056338028, "acc_stderr,none": 0.048790163593489484}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.18309859154929578, "acc_stderr,none": 0.04622514734921427}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.1875, "acc_stderr,none": 0.10077822185373188}, "include_base_44_italian": {"acc,none": 0.23722627737226276, "acc_stderr,none": 0.018145295885040397, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.14285714285714285, "acc_stderr,none": 0.06001200360120041}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.25149700598802394, "acc_stderr,none": 0.033675118801687026}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.25, "acc_stderr,none": 0.1305582419667734}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.1870967741935484, "acc_stderr,none": 0.03142622480848987}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.2754491017964072, "acc_stderr,none": 0.034673771737174536}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.4166666666666667, "acc_stderr,none": 0.14864709750264077}, "include_base_44_turkish": {"acc,none": 0.23175182481751824, "acc_stderr,none": 0.01804802090609801, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.24096385542168675, "acc_stderr,none": 0.03329394119073529}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.24096385542168675, "acc_stderr,none": 0.0332939411907353}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.24096385542168675, "acc_stderr,none": 0.033293941190735296}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.14, "acc_stderr,none": 0.04956957592256417}, "piqa": {"alias": "piqa", "acc,none": 0.73449401523395, "acc_stderr,none": 0.010303308653024427, "acc_norm,none": 0.73449401523395, "acc_norm_stderr,none": 0.010303308653024427}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.5253012048192771, "acc_stderr,none": 0.010009233363499395}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.46265060240963857, "acc_stderr,none": 0.00999407262056142}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.3409638554216867, "acc_stderr,none": 0.009501591178361541}}
|
45 |
+
{"created_at": "2025-08-16T19:33:37.185020", "global_step": 92000, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.3532423208191126, "acc_stderr,none": 0.013967822714840055, "acc_norm,none": 0.3916382252559727, "acc_norm_stderr,none": 0.014264122124938213}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.7234848484848485, "acc_stderr,none": 0.00917788010146828, "acc_norm,none": 0.6603535353535354, "acc_norm_stderr,none": 0.009717845628687464}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.24333333333333335, "acc_stderr,none": 0.014311107963683047, "acc_norm,none": 0.24333333333333335, "acc_norm_stderr,none": 0.014311107963683047}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.22111111111111112, "acc_stderr,none": 0.013840863699859532, "acc_norm,none": 0.22111111111111112, "acc_norm_stderr,none": 0.013840863699859532}, "belebele_pes_Arab": {"alias": "belebele_pes_Arab", "acc,none": 0.23555555555555555, "acc_stderr,none": 0.014152716079132625, "acc_norm,none": 0.23555555555555555, "acc_norm_stderr,none": 0.014152716079132625}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.26666666666666666, "acc_stderr,none": 0.014748750651470884, "acc_norm,none": 0.26666666666666666, "acc_norm_stderr,none": 0.014748750651470884}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.22333333333333333, "acc_stderr,none": 0.0138903842971987, "acc_norm,none": 0.22333333333333333, "acc_norm_stderr,none": 0.0138903842971987}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.43487353116908983, "acc_stderr,none": 0.004947272454226203, "acc_norm,none": 0.5706034654451304, "acc_norm_stderr,none": 0.004939784311448983}, "include_base_44_chinese": {"acc,none": 0.23669724770642203, "acc_stderr,none": 0.018273305976599714, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.29577464788732394, "acc_stderr,none": 0.05454906121418899}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.21839080459770116, "acc_stderr,none": 0.044551545932103705}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.2676056338028169, "acc_stderr,none": 0.05291406220869697}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.16901408450704225, "acc_stderr,none": 0.04479290819909663}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.25287356321839083, "acc_stderr,none": 0.04687049503854671}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.04993832453115031}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.23943661971830985, "acc_stderr,none": 0.05100514327793383}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.1875, "acc_stderr,none": 0.10077822185373188}, "include_base_44_italian": {"acc,none": 0.2427007299270073, "acc_stderr,none": 0.018319707627104494, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.2571428571428571, "acc_stderr,none": 0.07495496847387484}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.2874251497005988, "acc_stderr,none": 0.03512558647799083}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.25, "acc_stderr,none": 0.1305582419667734}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.18064516129032257, "acc_stderr,none": 0.031001936155783012}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.24550898203592814, "acc_stderr,none": 0.03340463153945589}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.3333333333333333, "acc_stderr,none": 0.14213381090374033}, "include_base_44_turkish": {"acc,none": 0.24635036496350365, "acc_stderr,none": 0.018435164454892936, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.27710843373493976, "acc_stderr,none": 0.034843315926805875}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.21084337349397592, "acc_stderr,none": 0.03175554786629919}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.24096385542168675, "acc_stderr,none": 0.0332939411907353}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.28, "acc_stderr,none": 0.06414269805898186}, "piqa": {"alias": "piqa", "acc,none": 0.7421109902067464, "acc_stderr,none": 0.010206956662056274, "acc_norm,none": 0.735582154515778, "acc_norm_stderr,none": 0.010289787244767165}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.5273092369477912, "acc_stderr,none": 0.010007112889731992}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.4674698795180723, "acc_stderr,none": 0.010000839483876022}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.3369477911646586, "acc_stderr,none": 0.009474203778757708}}
|
46 |
+
{"created_at": "2025-08-16T21:30:33.770961", "global_step": 94000, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.3515358361774744, "acc_stderr,none": 0.013952413699600942, "acc_norm,none": 0.39078498293515357, "acc_norm_stderr,none": 0.014258563880513778}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.7213804713804713, "acc_stderr,none": 0.009199329195026352, "acc_norm,none": 0.6536195286195287, "acc_norm_stderr,none": 0.00976354207569573}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.24555555555555555, "acc_stderr,none": 0.014355180865342966, "acc_norm,none": 0.24555555555555555, "acc_norm_stderr,none": 0.014355180865342966}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.2288888888888889, "acc_stderr,none": 0.01401170515888452, "acc_norm,none": 0.2288888888888889, "acc_norm_stderr,none": 0.01401170515888452}, "belebele_pes_Arab": {"alias": "belebele_pes_Arab", "acc,none": 0.24, "acc_stderr,none": 0.014244019879792647, "acc_norm,none": 0.24, "acc_norm_stderr,none": 0.014244019879792647}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.2733333333333333, "acc_stderr,none": 0.01486394440941748, "acc_norm,none": 0.2733333333333333, "acc_norm_stderr,none": 0.01486394440941748}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.24888888888888888, "acc_stderr,none": 0.01442032345164254, "acc_norm,none": 0.24888888888888888, "acc_norm_stderr,none": 0.01442032345164254}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.4371639115714001, "acc_stderr,none": 0.004950221546187575, "acc_norm,none": 0.5717984465245967, "acc_norm_stderr,none": 0.004938068627349494}, "include_base_44_chinese": {"acc,none": 0.24220183486238533, "acc_stderr,none": 0.018339752280638798, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.29577464788732394, "acc_stderr,none": 0.05454906121418899}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.22988505747126436, "acc_stderr,none": 0.045371581852507746}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.352112676056338, "acc_stderr,none": 0.05708756925195619}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.16901408450704225, "acc_stderr,none": 0.044792908199096614}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.21839080459770116, "acc_stderr,none": 0.044551545932103705}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.04993832453115032}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.049938324531150324}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.1875, "acc_stderr,none": 0.10077822185373188}, "include_base_44_italian": {"acc,none": 0.24087591240875914, "acc_stderr,none": 0.018311525678026724, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.22857142857142856, "acc_stderr,none": 0.0720144043214405}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.25149700598802394, "acc_stderr,none": 0.033675118801687026}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.25, "acc_stderr,none": 0.1305582419667734}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.1935483870967742, "acc_stderr,none": 0.031836372336763036}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.2694610778443114, "acc_stderr,none": 0.03443623453899478}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.3333333333333333, "acc_stderr,none": 0.14213381090374033}, "include_base_44_turkish": {"acc,none": 0.2281021897810219, "acc_stderr,none": 0.017983907749376898, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.2289156626506024, "acc_stderr,none": 0.03270745277352477}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.22289156626506024, "acc_stderr,none": 0.03240004825594687}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.24096385542168675, "acc_stderr,none": 0.033293941190735296}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.2, "acc_stderr,none": 0.05714285714285715}, "piqa": {"alias": "piqa", "acc,none": 0.7383025027203483, "acc_stderr,none": 0.010255630772708229, "acc_norm,none": 0.73449401523395, "acc_norm_stderr,none": 0.010303308653024427}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.5240963855421686, "acc_stderr,none": 0.01001042775321067}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.46546184738955826, "acc_stderr,none": 0.009998133936261186}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.3385542168674699, "acc_stderr,none": 0.009485250208516876}}
|
47 |
+
{"created_at": "2025-08-16T23:27:11.873127", "global_step": 96000, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.3515358361774744, "acc_stderr,none": 0.013952413699600938, "acc_norm,none": 0.3890784982935154, "acc_norm_stderr,none": 0.014247309976045607}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.7234848484848485, "acc_stderr,none": 0.00917788010146828, "acc_norm,none": 0.6590909090909091, "acc_norm_stderr,none": 0.00972657959342402}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.25222222222222224, "acc_stderr,none": 0.014484319811433903, "acc_norm,none": 0.25222222222222224, "acc_norm_stderr,none": 0.014484319811433903}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.2311111111111111, "acc_stderr,none": 0.014059256663218186, "acc_norm,none": 0.2311111111111111, "acc_norm_stderr,none": 0.014059256663218186}, "belebele_pes_Arab": {"alias": "belebele_pes_Arab", "acc,none": 0.23666666666666666, "acc_stderr,none": 0.014175742474391954, "acc_norm,none": 0.23666666666666666, "acc_norm_stderr,none": 0.014175742474391954}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.2633333333333333, "acc_stderr,none": 0.014689553047342518, "acc_norm,none": 0.2633333333333333, "acc_norm_stderr,none": 0.014689553047342518}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.2511111111111111, "acc_stderr,none": 0.01446311410517081, "acc_norm,none": 0.2511111111111111, "acc_norm_stderr,none": 0.01446311410517081}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.43706432981477794, "acc_stderr,none": 0.004950095555964668, "acc_norm,none": 0.5733917546305517, "acc_norm_stderr,none": 0.004935735300348873}, "include_base_44_chinese": {"acc,none": 0.24403669724770644, "acc_stderr,none": 0.01838667406905434, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.30985915492957744, "acc_stderr,none": 0.055271596817883316}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.25287356321839083, "acc_stderr,none": 0.04687049503854671}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.323943661971831, "acc_stderr,none": 0.05593416612923642}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.15492957746478872, "acc_stderr,none": 0.043247857666407805}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.22988505747126436, "acc_stderr,none": 0.045371581852507746}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.22535211267605634, "acc_stderr,none": 0.04993832453115031}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.23943661971830985, "acc_stderr,none": 0.05100514327793383}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.125, "acc_stderr,none": 0.08539125638299665}, "include_base_44_italian": {"acc,none": 0.24087591240875914, "acc_stderr,none": 0.018299723638499344, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.22857142857142856, "acc_stderr,none": 0.0720144043214405}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.25748502994011974, "acc_stderr,none": 0.033937086485697064}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.25, "acc_stderr,none": 0.1305582419667734}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.1870967741935484, "acc_stderr,none": 0.03142622480848987}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.2694610778443114, "acc_stderr,none": 0.034436234538994775}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.3333333333333333, "acc_stderr,none": 0.14213381090374033}, "include_base_44_turkish": {"acc,none": 0.2518248175182482, "acc_stderr,none": 0.018595403113466222, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.25903614457831325, "acc_stderr,none": 0.03410646614071854}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.2289156626506024, "acc_stderr,none": 0.03270745277352477}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.2710843373493976, "acc_stderr,none": 0.03460579907553027}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.24, "acc_stderr,none": 0.06101187572589323}, "piqa": {"alias": "piqa", "acc,none": 0.735038084874864, "acc_stderr,none": 0.010296557993316066, "acc_norm,none": 0.7366702937976061, "acc_norm_stderr,none": 0.010276185322196766}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.5244979919678715, "acc_stderr,none": 0.010010036112667887}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.4634538152610442, "acc_stderr,none": 0.009995265580368909}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.3393574297188755, "acc_stderr,none": 0.009490727635646755}}
|
48 |
+
{"created_at": "2025-08-17T02:24:36.863580", "global_step": 98000, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.34897610921501704, "acc_stderr,none": 0.013928933461382501, "acc_norm,none": 0.38822525597269625, "acc_norm_stderr,none": 0.014241614207414042}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.7205387205387206, "acc_stderr,none": 0.009207838142597239, "acc_norm,none": 0.6611952861952862, "acc_norm_stderr,none": 0.009711980224301649}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.24555555555555555, "acc_stderr,none": 0.014355180865342966, "acc_norm,none": 0.24555555555555555, "acc_norm_stderr,none": 0.014355180865342966}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.23555555555555555, "acc_stderr,none": 0.014152716079132642, "acc_norm,none": 0.23555555555555555, "acc_norm_stderr,none": 0.014152716079132642}, "belebele_pes_Arab": {"alias": "belebele_pes_Arab", "acc,none": 0.23666666666666666, "acc_stderr,none": 0.014175742474391946, "acc_norm,none": 0.23666666666666666, "acc_norm_stderr,none": 0.014175742474391946}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.25666666666666665, "acc_stderr,none": 0.014567891342380034, "acc_norm,none": 0.25666666666666665, "acc_norm_stderr,none": 0.014567891342380034}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.2477777777777778, "acc_stderr,none": 0.014398737377336079, "acc_norm,none": 0.2477777777777778, "acc_norm_stderr,none": 0.014398737377336079}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.4367655845449114, "acc_stderr,none": 0.00494971636889049, "acc_norm,none": 0.5742879904401513, "acc_norm_stderr,none": 0.004934399802594907}, "include_base_44_chinese": {"acc,none": 0.23486238532110093, "acc_stderr,none": 0.018158295323249782, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.29577464788732394, "acc_stderr,none": 0.05454906121418899}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.22988505747126436, "acc_stderr,none": 0.045371581852507746}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.323943661971831, "acc_stderr,none": 0.05593416612923642}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.16901408450704225, "acc_stderr,none": 0.044792908199096614}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.20689655172413793, "acc_stderr,none": 0.04368097459950702}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.23943661971830985, "acc_stderr,none": 0.05100514327793383}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.2112676056338028, "acc_stderr,none": 0.048790163593489484}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.125, "acc_stderr,none": 0.08539125638299665}, "include_base_44_italian": {"acc,none": 0.2354014598540146, "acc_stderr,none": 0.018164822337580114, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.2, "acc_stderr,none": 0.06859943405700354}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.23952095808383234, "acc_stderr,none": 0.03312541599851861}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.25, "acc_stderr,none": 0.1305582419667734}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.1935483870967742, "acc_stderr,none": 0.031836372336763036}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.2694610778443114, "acc_stderr,none": 0.03443623453899478}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.3333333333333333, "acc_stderr,none": 0.14213381090374033}, "include_base_44_turkish": {"acc,none": 0.24452554744525548, "acc_stderr,none": 0.018416323167035547, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.25301204819277107, "acc_stderr,none": 0.03384429155233137}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.22289156626506024, "acc_stderr,none": 0.03240004825594687}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.25903614457831325, "acc_stderr,none": 0.034106466140718564}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.24, "acc_stderr,none": 0.061011875725893235}, "piqa": {"alias": "piqa", "acc,none": 0.7404787812840044, "acc_stderr,none": 0.010227939888173922, "acc_norm,none": 0.7366702937976061, "acc_norm_stderr,none": 0.010276185322196766}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.5200803212851406, "acc_stderr,none": 0.010013987419234089}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.45863453815261046, "acc_stderr,none": 0.009987716412406571}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.3453815261044177, "acc_stderr,none": 0.009530841175865182}}
|
49 |
+
{"created_at": "2025-08-17T03:20:02.530763", "global_step": 100000, "arc_challenge": {"alias": "arc_challenge", "acc,none": 0.3515358361774744, "acc_stderr,none": 0.013952413699600938, "acc_norm,none": 0.3890784982935154, "acc_norm_stderr,none": 0.014247309976045609}, "arc_easy": {"alias": "arc_easy", "acc,none": 0.7213804713804713, "acc_stderr,none": 0.009199329195026348, "acc_norm,none": 0.6565656565656566, "acc_norm_stderr,none": 0.009743817368960019}, "belebele_eng_Latn": {"alias": "belebele_eng_Latn", "acc,none": 0.25, "acc_stderr,none": 0.014441782171967503, "acc_norm,none": 0.25, "acc_norm_stderr,none": 0.014441782171967503}, "belebele_ita_Latn": {"alias": "belebele_ita_Latn", "acc,none": 0.22555555555555556, "acc_stderr,none": 0.013939334910458128, "acc_norm,none": 0.22555555555555556, "acc_norm_stderr,none": 0.013939334910458128}, "belebele_pes_Arab": {"alias": "belebele_pes_Arab", "acc,none": 0.23555555555555555, "acc_stderr,none": 0.014152716079132637, "acc_norm,none": 0.23555555555555555, "acc_norm_stderr,none": 0.014152716079132637}, "belebele_tur_Latn": {"alias": "belebele_tur_Latn", "acc,none": 0.26222222222222225, "acc_stderr,none": 0.014669580202217886, "acc_norm,none": 0.26222222222222225, "acc_norm_stderr,none": 0.014669580202217886}, "belebele_zho_Hans": {"alias": "belebele_zho_Hans", "acc,none": 0.25222222222222224, "acc_stderr,none": 0.0144843198114339, "acc_norm,none": 0.25222222222222224, "acc_norm_stderr,none": 0.0144843198114339}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.43776140211113324, "acc_stderr,none": 0.004950973231188733, "acc_norm,none": 0.5734913363871739, "acc_norm_stderr,none": 0.004935587729948867}, "include_base_44_chinese": {"acc,none": 0.24220183486238533, "acc_stderr,none": 0.018312636514962906, "alias": "include_base_44_chinese"}, "include_base_44_chinese_few_shot_og_applied_science": {"alias": " - include_base_44_chinese_few_shot_og_applied_science", "acc,none": 0.29577464788732394, "acc_stderr,none": 0.05454906121418899}, "include_base_44_chinese_few_shot_og_arts_humanities": {"alias": " - include_base_44_chinese_few_shot_og_arts_humanities", "acc,none": 0.25287356321839083, "acc_stderr,none": 0.04687049503854671}, "include_base_44_chinese_few_shot_og_business_commerce": {"alias": " - include_base_44_chinese_few_shot_og_business_commerce", "acc,none": 0.3380281690140845, "acc_stderr,none": 0.05653887739133514}, "include_base_44_chinese_few_shot_og_driving_license": {"alias": " - include_base_44_chinese_few_shot_og_driving_license", "acc,none": 0.14084507042253522, "acc_stderr,none": 0.04157742116654289}, "include_base_44_chinese_few_shot_og_health_oriented_education": {"alias": " - include_base_44_chinese_few_shot_og_health_oriented_education", "acc,none": 0.2413793103448276, "acc_stderr,none": 0.04614377668264891}, "include_base_44_chinese_few_shot_og_professional_certification": {"alias": " - include_base_44_chinese_few_shot_og_professional_certification", "acc,none": 0.2112676056338028, "acc_stderr,none": 0.04879016359348949}, "include_base_44_chinese_few_shot_og_social_science": {"alias": " - include_base_44_chinese_few_shot_og_social_science", "acc,none": 0.23943661971830985, "acc_stderr,none": 0.05100514327793383}, "include_base_44_chinese_few_shot_og_stem": {"alias": " - include_base_44_chinese_few_shot_og_stem", "acc,none": 0.125, "acc_stderr,none": 0.08539125638299665}, "include_base_44_italian": {"acc,none": 0.25, "acc_stderr,none": 0.018524972627127996, "alias": "include_base_44_italian"}, "include_base_44_italian_few_shot_og_applied_science": {"alias": " - include_base_44_italian_few_shot_og_applied_science", "acc,none": 0.22857142857142856, "acc_stderr,none": 0.0720144043214405}, "include_base_44_italian_few_shot_og_arts_humanities": {"alias": " - include_base_44_italian_few_shot_og_arts_humanities", "acc,none": 0.2694610778443114, "acc_stderr,none": 0.034436234538994775}, "include_base_44_italian_few_shot_og_health_oriented_education": {"alias": " - include_base_44_italian_few_shot_og_health_oriented_education", "acc,none": 0.25, "acc_stderr,none": 0.1305582419667734}, "include_base_44_italian_few_shot_og_professional_certification": {"alias": " - include_base_44_italian_few_shot_og_professional_certification", "acc,none": 0.1935483870967742, "acc_stderr,none": 0.031836372336763036}, "include_base_44_italian_few_shot_og_social_science": {"alias": " - include_base_44_italian_few_shot_og_social_science", "acc,none": 0.281437125748503, "acc_stderr,none": 0.034903504674283575}, "include_base_44_italian_few_shot_og_stem": {"alias": " - include_base_44_italian_few_shot_og_stem", "acc,none": 0.3333333333333333, "acc_stderr,none": 0.14213381090374033}, "include_base_44_turkish": {"acc,none": 0.24087591240875914, "acc_stderr,none": 0.018316200346274143, "alias": "include_base_44_turkish"}, "include_base_44_turkish_few_shot_og_arts_humanities": {"alias": " - include_base_44_turkish_few_shot_og_arts_humanities", "acc,none": 0.24096385542168675, "acc_stderr,none": 0.03329394119073529}, "include_base_44_turkish_few_shot_og_business_commerce": {"alias": " - include_base_44_turkish_few_shot_og_business_commerce", "acc,none": 0.21686746987951808, "acc_stderr,none": 0.03208284450356365}, "include_base_44_turkish_few_shot_og_social_science": {"alias": " - include_base_44_turkish_few_shot_og_social_science", "acc,none": 0.26506024096385544, "acc_stderr,none": 0.03436024037944967}, "include_base_44_turkish_few_shot_og_stem": {"alias": " - include_base_44_turkish_few_shot_og_stem", "acc,none": 0.24, "acc_stderr,none": 0.06101187572589323}, "piqa": {"alias": "piqa", "acc,none": 0.7393906420021763, "acc_stderr,none": 0.010241826155811621, "acc_norm,none": 0.735582154515778, "acc_norm_stderr,none": 0.010289787244767163}, "xnli_en": {"alias": "xnli_en", "acc,none": 0.5224899598393574, "acc_stderr,none": 0.010011929439394}, "xnli_tr": {"alias": "xnli_tr", "acc,none": 0.463855421686747, "acc_stderr,none": 0.009995852282822383}, "xnli_zh": {"alias": "xnli_zh", "acc,none": 0.3417670682730924, "acc_stderr,none": 0.00950697739828762}}
|