{"created_at": "2025-08-14T15:22:23.158961", "global_step": 22000, "commonsense_qa": {"alias": "commonsense_qa", "acc,none": 0.1941031941031941, "acc_stderr,none": 0.01132338158892044}, "hellaswag": {"alias": "hellaswag", "acc,none": 0.40977892850029873, "acc_stderr,none": 0.004907877144720008, "acc_norm,none": 0.525094602668791, "acc_norm_stderr,none": 0.004983492928102842}, "mmlu": {"acc,none": 0.240777666999003, "acc_stderr,none": 0.0036019073404932747, "alias": "mmlu"}, "mmlu_humanities": {"acc,none": 0.23889479277364506, "acc_stderr,none": 0.0062195139225603505, "alias": " - humanities"}, "mmlu_formal_logic": {"alias": " - formal_logic", "acc,none": 0.20634920634920634, "acc_stderr,none": 0.03619604524124249}, "mmlu_high_school_european_history": {"alias": " - high_school_european_history", "acc,none": 0.22424242424242424, "acc_stderr,none": 0.03256866661681102}, "mmlu_high_school_us_history": {"alias": " - high_school_us_history", "acc,none": 0.24019607843137256, "acc_stderr,none": 0.02998373305591362}, "mmlu_high_school_world_history": {"alias": " - high_school_world_history", "acc,none": 0.270042194092827, "acc_stderr,none": 0.028900721906293426}, "mmlu_international_law": {"alias": " - international_law", "acc,none": 0.21487603305785125, "acc_stderr,none": 0.037494924487096966}, "mmlu_jurisprudence": {"alias": " - jurisprudence", "acc,none": 0.26851851851851855, "acc_stderr,none": 0.04284467968052191}, "mmlu_logical_fallacies": {"alias": " - logical_fallacies", "acc,none": 0.22699386503067484, "acc_stderr,none": 0.03291099578615768}, "mmlu_moral_disputes": {"alias": " - moral_disputes", "acc,none": 0.23121387283236994, "acc_stderr,none": 0.022698657167855713}, "mmlu_moral_scenarios": {"alias": " - moral_scenarios", "acc,none": 0.2424581005586592, "acc_stderr,none": 0.01433352205921789}, "mmlu_philosophy": {"alias": " - philosophy", "acc,none": 0.19292604501607716, "acc_stderr,none": 0.022411516780911363}, "mmlu_prehistory": {"alias": " - prehistory", "acc,none": 0.2654320987654321, "acc_stderr,none": 0.024569223600460845}, "mmlu_professional_law": {"alias": " - professional_law", "acc,none": 0.24315514993481094, "acc_stderr,none": 0.010956556654417351}, "mmlu_world_religions": {"alias": " - world_religions", "acc,none": 0.23391812865497075, "acc_stderr,none": 0.03246721765117827}, "mmlu_other": {"acc,none": 0.2632764724814934, "acc_stderr,none": 0.007870693663494535, "alias": " - other"}, "mmlu_business_ethics": {"alias": " - business_ethics", "acc,none": 0.31, "acc_stderr,none": 0.04648231987117316}, "mmlu_clinical_knowledge": {"alias": " - clinical_knowledge", "acc,none": 0.21509433962264152, "acc_stderr,none": 0.025288394502891366}, "mmlu_college_medicine": {"alias": " - college_medicine", "acc,none": 0.23121387283236994, "acc_stderr,none": 0.032147373020294696}, "mmlu_global_facts": {"alias": " - global_facts", "acc,none": 0.34, "acc_stderr,none": 0.04760952285695235}, "mmlu_human_aging": {"alias": " - human_aging", "acc,none": 0.33183856502242154, "acc_stderr,none": 0.03160295143776679}, "mmlu_management": {"alias": " - management", "acc,none": 0.2524271844660194, "acc_stderr,none": 0.04301250399690877}, "mmlu_marketing": {"alias": " - marketing", "acc,none": 0.2777777777777778, "acc_stderr,none": 0.029343114798094472}, "mmlu_medical_genetics": {"alias": " - medical_genetics", "acc,none": 0.33, "acc_stderr,none": 0.04725815626252605}, "mmlu_miscellaneous": {"alias": " - miscellaneous", "acc,none": 0.2937420178799489, "acc_stderr,none": 0.016287759388491675}, "mmlu_nutrition": {"alias": " - nutrition", "acc,none": 0.22875816993464052, "acc_stderr,none": 0.024051029739912248}, "mmlu_professional_accounting": {"alias": " - professional_accounting", "acc,none": 0.2624113475177305, "acc_stderr,none": 0.026244920349843007}, "mmlu_professional_medicine": {"alias": " - professional_medicine", "acc,none": 0.16176470588235295, "acc_stderr,none": 0.02236867256288675}, "mmlu_virology": {"alias": " - virology", "acc,none": 0.24096385542168675, "acc_stderr,none": 0.0332939411907353}, "mmlu_social_sciences": {"acc,none": 0.2265193370165746, "acc_stderr,none": 0.007550999736568054, "alias": " - social sciences"}, "mmlu_econometrics": {"alias": " - econometrics", "acc,none": 0.24561403508771928, "acc_stderr,none": 0.04049339297748142}, "mmlu_high_school_geography": {"alias": " - high_school_geography", "acc,none": 0.21717171717171718, "acc_stderr,none": 0.029376616484945644}, "mmlu_high_school_government_and_politics": {"alias": " - high_school_government_and_politics", "acc,none": 0.21761658031088082, "acc_stderr,none": 0.029778663037752954}, "mmlu_high_school_macroeconomics": {"alias": " - high_school_macroeconomics", "acc,none": 0.2128205128205128, "acc_stderr,none": 0.020752423722128006}, "mmlu_high_school_microeconomics": {"alias": " - high_school_microeconomics", "acc,none": 0.19327731092436976, "acc_stderr,none": 0.025649470265889183}, "mmlu_high_school_psychology": {"alias": " - high_school_psychology", "acc,none": 0.23486238532110093, "acc_stderr,none": 0.018175110510343595}, "mmlu_human_sexuality": {"alias": " - human_sexuality", "acc,none": 0.25190839694656486, "acc_stderr,none": 0.03807387116306086}, "mmlu_professional_psychology": {"alias": " - professional_psychology", "acc,none": 0.23202614379084968, "acc_stderr,none": 0.017077373377857002}, "mmlu_public_relations": {"alias": " - public_relations", "acc,none": 0.2545454545454545, "acc_stderr,none": 0.04172343038705383}, "mmlu_security_studies": {"alias": " - security_studies", "acc,none": 0.19183673469387755, "acc_stderr,none": 0.0252069631542254}, "mmlu_sociology": {"alias": " - sociology", "acc,none": 0.2736318407960199, "acc_stderr,none": 0.031524391865554016}, "mmlu_us_foreign_policy": {"alias": " - us_foreign_policy", "acc,none": 0.22, "acc_stderr,none": 0.0416333199893227}, "mmlu_stem": {"acc,none": 0.2353314303837615, "acc_stderr,none": 0.007531819202648028, "alias": " - stem"}, "mmlu_abstract_algebra": {"alias": " - abstract_algebra", "acc,none": 0.26, "acc_stderr,none": 0.04408440022768078}, "mmlu_anatomy": {"alias": " - anatomy", "acc,none": 0.21481481481481482, "acc_stderr,none": 0.035478541985608236}, "mmlu_astronomy": {"alias": " - astronomy", "acc,none": 0.23026315789473684, "acc_stderr,none": 0.03426059424403165}, "mmlu_college_biology": {"alias": " - college_biology", "acc,none": 0.2152777777777778, "acc_stderr,none": 0.03437079344106135}, "mmlu_college_chemistry": {"alias": " - college_chemistry", "acc,none": 0.31, "acc_stderr,none": 0.04648231987117316}, "mmlu_college_computer_science": {"alias": " - college_computer_science", "acc,none": 0.14, "acc_stderr,none": 0.0348735088019777}, "mmlu_college_mathematics": {"alias": " - college_mathematics", "acc,none": 0.28, "acc_stderr,none": 0.04512608598542128}, "mmlu_college_physics": {"alias": " - college_physics", "acc,none": 0.19607843137254902, "acc_stderr,none": 0.03950581861179962}, "mmlu_computer_security": {"alias": " - computer_security", "acc,none": 0.2, "acc_stderr,none": 0.04020151261036845}, "mmlu_conceptual_physics": {"alias": " - conceptual_physics", "acc,none": 0.2978723404255319, "acc_stderr,none": 0.02989614568209546}, "mmlu_electrical_engineering": {"alias": " - electrical_engineering", "acc,none": 0.2896551724137931, "acc_stderr,none": 0.03780019230438015}, "mmlu_elementary_mathematics": {"alias": " - elementary_mathematics", "acc,none": 0.24867724867724866, "acc_stderr,none": 0.022261817692400192}, "mmlu_high_school_biology": {"alias": " - high_school_biology", "acc,none": 0.2161290322580645, "acc_stderr,none": 0.02341529343356852}, "mmlu_high_school_chemistry": {"alias": " - high_school_chemistry", "acc,none": 0.20689655172413793, "acc_stderr,none": 0.02850137816789395}, "mmlu_high_school_computer_science": {"alias": " - high_school_computer_science", "acc,none": 0.27, "acc_stderr,none": 0.04461960433384741}, "mmlu_high_school_mathematics": {"alias": " - high_school_mathematics", "acc,none": 0.2740740740740741, "acc_stderr,none": 0.027195934804085622}, "mmlu_high_school_physics": {"alias": " - high_school_physics", "acc,none": 0.23178807947019867, "acc_stderr,none": 0.03445406271987054}, "mmlu_high_school_statistics": {"alias": " - high_school_statistics", "acc,none": 0.12962962962962962, "acc_stderr,none": 0.02290788315128859}, "mmlu_machine_learning": {"alias": " - machine_learning", "acc,none": 0.25892857142857145, "acc_stderr,none": 0.041577515398656284}, "sciq": {"alias": "sciq", "acc,none": 0.855, "acc_stderr,none": 0.011139977517890132, "acc_norm,none": 0.795, "acc_norm_stderr,none": 0.012772554096113112}} | |