| { | |
| "best_metric": 0.8359799918026352, | |
| "best_model_checkpoint": "result/simcse-celectra-amlp-dmlp-bs128-lr2e-6-mask0.40-elew0.01-roberta-base", | |
| "epoch": 1.0, | |
| "global_step": 7813, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "electra_acc": 0.2448, | |
| "electra_fix_acc": 0.006, | |
| "electra_rep_acc": 0.9912, | |
| "epoch": 0.0, | |
| "learning_rate": 1.9997440163829513e-06, | |
| "loss": 8.6888, | |
| "neg_sim": 0.2041, | |
| "pos_sim": 0.3651, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "eval_avg_sts": 0.7368351609735859, | |
| "eval_sickr_spearman": 0.6964049570574558, | |
| "eval_stsb_spearman": 0.7772653648897162, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "eval_avg_sts": 0.7530561409181189, | |
| "eval_sickr_spearman": 0.707297882521504, | |
| "eval_stsb_spearman": 0.7988143993147339, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "eval_avg_sts": 0.7605110871922394, | |
| "eval_sickr_spearman": 0.7124740982612865, | |
| "eval_stsb_spearman": 0.8085480761231921, | |
| "step": 375 | |
| }, | |
| { | |
| "electra_acc": 0.7243, | |
| "electra_fix_acc": 0.8981, | |
| "electra_rep_acc": 0.2064, | |
| "epoch": 0.06, | |
| "learning_rate": 1.8720081914757456e-06, | |
| "loss": 0.6062, | |
| "neg_sim": 0.0055, | |
| "pos_sim": 0.6593, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "eval_avg_sts": 0.763939631028596, | |
| "eval_sickr_spearman": 0.7155249377624325, | |
| "eval_stsb_spearman": 0.8123543242947595, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "eval_avg_sts": 0.7670128200581756, | |
| "eval_sickr_spearman": 0.7176028112417467, | |
| "eval_stsb_spearman": 0.8164228288746044, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "eval_avg_sts": 0.7683647856842559, | |
| "eval_sickr_spearman": 0.7182528161375087, | |
| "eval_stsb_spearman": 0.8184767552310033, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "eval_avg_sts": 0.7703405632893722, | |
| "eval_sickr_spearman": 0.7201300637061159, | |
| "eval_stsb_spearman": 0.8205510628726285, | |
| "step": 875 | |
| }, | |
| { | |
| "electra_acc": 0.7954, | |
| "electra_fix_acc": 0.9411, | |
| "electra_rep_acc": 0.3621, | |
| "epoch": 0.13, | |
| "learning_rate": 1.744016382951491e-06, | |
| "loss": 0.0043, | |
| "neg_sim": -0.0066, | |
| "pos_sim": 0.7176, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "eval_avg_sts": 0.7716371834988085, | |
| "eval_sickr_spearman": 0.7207027865596639, | |
| "eval_stsb_spearman": 0.8225715804379532, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "eval_avg_sts": 0.7719569589534554, | |
| "eval_sickr_spearman": 0.7195236710504597, | |
| "eval_stsb_spearman": 0.8243902468564511, | |
| "step": 1125 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "eval_avg_sts": 0.7725905596083784, | |
| "eval_sickr_spearman": 0.7204897205936844, | |
| "eval_stsb_spearman": 0.8246913986230725, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "eval_avg_sts": 0.7740128875580021, | |
| "eval_sickr_spearman": 0.7209413090094056, | |
| "eval_stsb_spearman": 0.8270844661065986, | |
| "step": 1375 | |
| }, | |
| { | |
| "electra_acc": 0.806, | |
| "electra_fix_acc": 0.9446, | |
| "electra_rep_acc": 0.3921, | |
| "epoch": 0.19, | |
| "learning_rate": 1.6160245744272365e-06, | |
| "loss": 0.0039, | |
| "neg_sim": -0.0067, | |
| "pos_sim": 0.7288, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "eval_avg_sts": 0.7747104419219604, | |
| "eval_sickr_spearman": 0.721527768757965, | |
| "eval_stsb_spearman": 0.8278931150859556, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "eval_avg_sts": 0.7750669451240817, | |
| "eval_sickr_spearman": 0.722242135329654, | |
| "eval_stsb_spearman": 0.8278917549185095, | |
| "step": 1625 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "eval_avg_sts": 0.7751917311744843, | |
| "eval_sickr_spearman": 0.7214598527805315, | |
| "eval_stsb_spearman": 0.8289236095684369, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "eval_avg_sts": 0.775110379872699, | |
| "eval_sickr_spearman": 0.7211095139266419, | |
| "eval_stsb_spearman": 0.8291112458187563, | |
| "step": 1875 | |
| }, | |
| { | |
| "electra_acc": 0.8111, | |
| "electra_fix_acc": 0.9455, | |
| "electra_rep_acc": 0.4077, | |
| "epoch": 0.26, | |
| "learning_rate": 1.4880327659029822e-06, | |
| "loss": 0.0037, | |
| "neg_sim": -0.0067, | |
| "pos_sim": 0.7361, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "eval_avg_sts": 0.7752389174925427, | |
| "eval_sickr_spearman": 0.7207011535022151, | |
| "eval_stsb_spearman": 0.8297766814828704, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "eval_avg_sts": 0.7759291537103485, | |
| "eval_sickr_spearman": 0.7218097593545081, | |
| "eval_stsb_spearman": 0.8300485480661889, | |
| "step": 2125 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "eval_avg_sts": 0.7759100192724719, | |
| "eval_sickr_spearman": 0.7214468843831432, | |
| "eval_stsb_spearman": 0.8303731541618006, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "eval_avg_sts": 0.77580052054564, | |
| "eval_sickr_spearman": 0.7208077825474077, | |
| "eval_stsb_spearman": 0.8307932585438723, | |
| "step": 2375 | |
| }, | |
| { | |
| "electra_acc": 0.8137, | |
| "electra_fix_acc": 0.9463, | |
| "electra_rep_acc": 0.4184, | |
| "epoch": 0.32, | |
| "learning_rate": 1.3600409573787276e-06, | |
| "loss": 0.0036, | |
| "neg_sim": -0.0067, | |
| "pos_sim": 0.7422, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "eval_avg_sts": 0.7764057696201115, | |
| "eval_sickr_spearman": 0.7207564372999703, | |
| "eval_stsb_spearman": 0.8320551019402528, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "eval_avg_sts": 0.7771167451325478, | |
| "eval_sickr_spearman": 0.7216867036726237, | |
| "eval_stsb_spearman": 0.8325467865924719, | |
| "step": 2625 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "eval_avg_sts": 0.777409082133846, | |
| "eval_sickr_spearman": 0.7218441496231378, | |
| "eval_stsb_spearman": 0.8329740146445542, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "eval_avg_sts": 0.7776325118235485, | |
| "eval_sickr_spearman": 0.722264277667417, | |
| "eval_stsb_spearman": 0.8330007459796801, | |
| "step": 2875 | |
| }, | |
| { | |
| "electra_acc": 0.8163, | |
| "electra_fix_acc": 0.9464, | |
| "electra_rep_acc": 0.4284, | |
| "epoch": 0.38, | |
| "learning_rate": 1.2320491488544733e-06, | |
| "loss": 0.0034, | |
| "neg_sim": -0.0068, | |
| "pos_sim": 0.7471, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "eval_avg_sts": 0.7777138423021386, | |
| "eval_sickr_spearman": 0.7227461256770441, | |
| "eval_stsb_spearman": 0.832681558927233, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "eval_avg_sts": 0.7781702830872564, | |
| "eval_sickr_spearman": 0.7235760070476919, | |
| "eval_stsb_spearman": 0.832764559126821, | |
| "step": 3125 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "eval_avg_sts": 0.7788742472264717, | |
| "eval_sickr_spearman": 0.7235008864050426, | |
| "eval_stsb_spearman": 0.8342476080479008, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "eval_avg_sts": 0.7786540883853343, | |
| "eval_sickr_spearman": 0.7228896906392426, | |
| "eval_stsb_spearman": 0.8344184861314262, | |
| "step": 3375 | |
| }, | |
| { | |
| "electra_acc": 0.8185, | |
| "electra_fix_acc": 0.9468, | |
| "electra_rep_acc": 0.4365, | |
| "epoch": 0.45, | |
| "learning_rate": 1.104057340330219e-06, | |
| "loss": 0.0034, | |
| "neg_sim": -0.0068, | |
| "pos_sim": 0.7518, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "eval_avg_sts": 0.7785390972296262, | |
| "eval_sickr_spearman": 0.7228380572051966, | |
| "eval_stsb_spearman": 0.8342401372540559, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "eval_avg_sts": 0.778413022922875, | |
| "eval_sickr_spearman": 0.7219714320419488, | |
| "eval_stsb_spearman": 0.8348546138038011, | |
| "step": 3625 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "eval_avg_sts": 0.778801601085773, | |
| "eval_sickr_spearman": 0.722748959512029, | |
| "eval_stsb_spearman": 0.8348542426595171, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "eval_avg_sts": 0.7782529644249387, | |
| "eval_sickr_spearman": 0.7217000082877221, | |
| "eval_stsb_spearman": 0.8348059205621555, | |
| "step": 3875 | |
| }, | |
| { | |
| "electra_acc": 0.8202, | |
| "electra_fix_acc": 0.9468, | |
| "electra_rep_acc": 0.4426, | |
| "epoch": 0.51, | |
| "learning_rate": 9.760655318059644e-07, | |
| "loss": 0.0033, | |
| "neg_sim": -0.0068, | |
| "pos_sim": 0.7559, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "eval_avg_sts": 0.7782394657347366, | |
| "eval_sickr_spearman": 0.7217301237883238, | |
| "eval_stsb_spearman": 0.8347488076811493, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "eval_avg_sts": 0.7782895079814026, | |
| "eval_sickr_spearman": 0.7220120663537655, | |
| "eval_stsb_spearman": 0.8345669496090398, | |
| "step": 4125 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "eval_avg_sts": 0.778080439519264, | |
| "eval_sickr_spearman": 0.7216004398144409, | |
| "eval_stsb_spearman": 0.834560439224087, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "eval_avg_sts": 0.7782284276808015, | |
| "eval_sickr_spearman": 0.7216616794687745, | |
| "eval_stsb_spearman": 0.8347951758928285, | |
| "step": 4375 | |
| }, | |
| { | |
| "electra_acc": 0.8217, | |
| "electra_fix_acc": 0.9477, | |
| "electra_rep_acc": 0.4458, | |
| "epoch": 0.58, | |
| "learning_rate": 8.480737232817099e-07, | |
| "loss": 0.0032, | |
| "neg_sim": -0.0068, | |
| "pos_sim": 0.7596, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "eval_avg_sts": 0.7781400153641456, | |
| "eval_sickr_spearman": 0.7214844927355691, | |
| "eval_stsb_spearman": 0.8347955379927221, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "eval_avg_sts": 0.7783651714396398, | |
| "eval_sickr_spearman": 0.721677865949959, | |
| "eval_stsb_spearman": 0.8350524769293207, | |
| "step": 4625 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "eval_avg_sts": 0.7779810461862591, | |
| "eval_sickr_spearman": 0.7215376151337598, | |
| "eval_stsb_spearman": 0.8344244772387583, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "eval_avg_sts": 0.7774863632137369, | |
| "eval_sickr_spearman": 0.7212506225972007, | |
| "eval_stsb_spearman": 0.833722103830273, | |
| "step": 4875 | |
| }, | |
| { | |
| "electra_acc": 0.8222, | |
| "electra_fix_acc": 0.9476, | |
| "electra_rep_acc": 0.4478, | |
| "epoch": 0.64, | |
| "learning_rate": 7.200819147574555e-07, | |
| "loss": 0.0033, | |
| "neg_sim": -0.0068, | |
| "pos_sim": 0.7614, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "eval_avg_sts": 0.7780441928626822, | |
| "eval_sickr_spearman": 0.7222703776172996, | |
| "eval_stsb_spearman": 0.833818008108065, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "eval_avg_sts": 0.7772624437857663, | |
| "eval_sickr_spearman": 0.7211198886445525, | |
| "eval_stsb_spearman": 0.8334049989269801, | |
| "step": 5125 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "eval_avg_sts": 0.777433289264948, | |
| "eval_sickr_spearman": 0.7208792047952461, | |
| "eval_stsb_spearman": 0.83398737373465, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "eval_avg_sts": 0.778471716959233, | |
| "eval_sickr_spearman": 0.7219667730251094, | |
| "eval_stsb_spearman": 0.8349766608933566, | |
| "step": 5375 | |
| }, | |
| { | |
| "electra_acc": 0.824, | |
| "electra_fix_acc": 0.9479, | |
| "electra_rep_acc": 0.4538, | |
| "epoch": 0.7, | |
| "learning_rate": 5.920901062332011e-07, | |
| "loss": 0.0033, | |
| "neg_sim": -0.0068, | |
| "pos_sim": 0.7634, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "eval_avg_sts": 0.7783835165868276, | |
| "eval_sickr_spearman": 0.7220774366828226, | |
| "eval_stsb_spearman": 0.8346895964908326, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "eval_avg_sts": 0.7777215174417764, | |
| "eval_sickr_spearman": 0.7211491395853283, | |
| "eval_stsb_spearman": 0.8342938952982245, | |
| "step": 5625 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "eval_avg_sts": 0.7783006770868415, | |
| "eval_sickr_spearman": 0.7218946783418506, | |
| "eval_stsb_spearman": 0.8347066758318323, | |
| "step": 5750 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "eval_avg_sts": 0.7786525912567952, | |
| "eval_sickr_spearman": 0.7218885303608666, | |
| "eval_stsb_spearman": 0.8354166521527239, | |
| "step": 5875 | |
| }, | |
| { | |
| "electra_acc": 0.8245, | |
| "electra_fix_acc": 0.9479, | |
| "electra_rep_acc": 0.4557, | |
| "epoch": 0.77, | |
| "learning_rate": 4.640982977089466e-07, | |
| "loss": 0.0033, | |
| "neg_sim": -0.0068, | |
| "pos_sim": 0.7639, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "eval_avg_sts": 0.7786598191346372, | |
| "eval_sickr_spearman": 0.7214591323140099, | |
| "eval_stsb_spearman": 0.8358605059552645, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "eval_avg_sts": 0.7785391387872032, | |
| "eval_sickr_spearman": 0.7212359798167284, | |
| "eval_stsb_spearman": 0.8358422977576779, | |
| "step": 6125 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "eval_avg_sts": 0.7780962443354171, | |
| "eval_sickr_spearman": 0.7207366965172793, | |
| "eval_stsb_spearman": 0.835455792153555, | |
| "step": 6250 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "eval_avg_sts": 0.7787170644410477, | |
| "eval_sickr_spearman": 0.7214541370794603, | |
| "eval_stsb_spearman": 0.8359799918026352, | |
| "step": 6375 | |
| }, | |
| { | |
| "electra_acc": 0.8249, | |
| "electra_fix_acc": 0.9477, | |
| "electra_rep_acc": 0.458, | |
| "epoch": 0.83, | |
| "learning_rate": 3.361064891846921e-07, | |
| "loss": 0.0034, | |
| "neg_sim": -0.0068, | |
| "pos_sim": 0.765, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "eval_avg_sts": 0.7787359930993909, | |
| "eval_sickr_spearman": 0.7215321395881958, | |
| "eval_stsb_spearman": 0.835939846610586, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "eval_avg_sts": 0.778662230635321, | |
| "eval_sickr_spearman": 0.7217551960232744, | |
| "eval_stsb_spearman": 0.8355692652473675, | |
| "step": 6625 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "eval_avg_sts": 0.7788844288300487, | |
| "eval_sickr_spearman": 0.7221871330355174, | |
| "eval_stsb_spearman": 0.8355817246245801, | |
| "step": 6750 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "eval_avg_sts": 0.7789522767655956, | |
| "eval_sickr_spearman": 0.722305728507958, | |
| "eval_stsb_spearman": 0.8355988250232333, | |
| "step": 6875 | |
| }, | |
| { | |
| "electra_acc": 0.8251, | |
| "electra_fix_acc": 0.9476, | |
| "electra_rep_acc": 0.4589, | |
| "epoch": 0.9, | |
| "learning_rate": 2.0811468066043772e-07, | |
| "loss": 0.0034, | |
| "neg_sim": -0.0069, | |
| "pos_sim": 0.7655, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "eval_avg_sts": 0.7787617595098588, | |
| "eval_sickr_spearman": 0.721966628931805, | |
| "eval_stsb_spearman": 0.8355568900879128, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "eval_avg_sts": 0.7786478565950568, | |
| "eval_sickr_spearman": 0.7216480386359659, | |
| "eval_stsb_spearman": 0.8356476745541477, | |
| "step": 7125 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "eval_avg_sts": 0.7787981847358625, | |
| "eval_sickr_spearman": 0.7219965523080009, | |
| "eval_stsb_spearman": 0.8355998171637242, | |
| "step": 7250 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "eval_avg_sts": 0.7788565954562792, | |
| "eval_sickr_spearman": 0.7221892050558691, | |
| "eval_stsb_spearman": 0.8355239858566892, | |
| "step": 7375 | |
| }, | |
| { | |
| "electra_acc": 0.8254, | |
| "electra_fix_acc": 0.9477, | |
| "electra_rep_acc": 0.4607, | |
| "epoch": 0.96, | |
| "learning_rate": 8.012287213618328e-08, | |
| "loss": 0.0032, | |
| "neg_sim": -0.0069, | |
| "pos_sim": 0.7665, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "eval_avg_sts": 0.7788488501171182, | |
| "eval_sickr_spearman": 0.7221623076390639, | |
| "eval_stsb_spearman": 0.8355353925951726, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "eval_avg_sts": 0.7788989946596425, | |
| "eval_sickr_spearman": 0.7222944411991201, | |
| "eval_stsb_spearman": 0.8355035481201649, | |
| "step": 7625 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "eval_avg_sts": 0.7788451325075824, | |
| "eval_sickr_spearman": 0.7221886767137534, | |
| "eval_stsb_spearman": 0.8355015883014113, | |
| "step": 7750 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "step": 7813, | |
| "train_runtime": 7033.0789, | |
| "train_samples_per_second": 1.111 | |
| } | |
| ], | |
| "max_steps": 7813, | |
| "num_train_epochs": 1, | |
| "total_flos": 285611882411596800, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |