SykoLLM-V6.0 / trainer_state.json
SykoSLM's picture
Upload folder using huggingface_hub
470d909 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.35,
"eval_steps": 500,
"global_step": 2800,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.00125,
"grad_norm": 0.40701737999916077,
"learning_rate": 6.417e-06,
"loss": 1.9589118957519531,
"step": 10
},
{
"epoch": 0.0025,
"grad_norm": 0.3704953193664551,
"learning_rate": 1.3547e-05,
"loss": 1.879218864440918,
"step": 20
},
{
"epoch": 0.00375,
"grad_norm": 0.34090375900268555,
"learning_rate": 2.0677e-05,
"loss": 1.8871658325195313,
"step": 30
},
{
"epoch": 0.005,
"grad_norm": 0.33982428908348083,
"learning_rate": 2.7807e-05,
"loss": 1.8348798751831055,
"step": 40
},
{
"epoch": 0.00625,
"grad_norm": 0.3448389172554016,
"learning_rate": 3.4937e-05,
"loss": 1.8976055145263673,
"step": 50
},
{
"epoch": 0.0075,
"grad_norm": 0.3351344168186188,
"learning_rate": 4.2066999999999996e-05,
"loss": 1.8488676071166992,
"step": 60
},
{
"epoch": 0.00875,
"grad_norm": 0.33170202374458313,
"learning_rate": 4.9197e-05,
"loss": 1.8325592041015626,
"step": 70
},
{
"epoch": 0.01,
"grad_norm": 0.34600478410720825,
"learning_rate": 5.6327e-05,
"loss": 1.8475696563720703,
"step": 80
},
{
"epoch": 0.01125,
"grad_norm": 0.34344804286956787,
"learning_rate": 6.3457e-05,
"loss": 1.8463781356811524,
"step": 90
},
{
"epoch": 0.0125,
"grad_norm": 0.32425570487976074,
"learning_rate": 7.0587e-05,
"loss": 1.8811756134033204,
"step": 100
},
{
"epoch": 0.01375,
"grad_norm": 0.33838146924972534,
"learning_rate": 7.7717e-05,
"loss": 1.8498527526855468,
"step": 110
},
{
"epoch": 0.015,
"grad_norm": 0.34978190064430237,
"learning_rate": 8.4847e-05,
"loss": 1.7197338104248048,
"step": 120
},
{
"epoch": 0.01625,
"grad_norm": 0.3554218113422394,
"learning_rate": 9.1977e-05,
"loss": 1.7990310668945313,
"step": 130
},
{
"epoch": 0.0175,
"grad_norm": 0.3349857032299042,
"learning_rate": 9.910699999999998e-05,
"loss": 1.8458877563476563,
"step": 140
},
{
"epoch": 0.01875,
"grad_norm": 0.3333263099193573,
"learning_rate": 0.00010623699999999999,
"loss": 1.8082691192626954,
"step": 150
},
{
"epoch": 0.02,
"grad_norm": 0.3492045998573303,
"learning_rate": 0.000113367,
"loss": 1.7753177642822267,
"step": 160
},
{
"epoch": 0.02125,
"grad_norm": 0.33766260743141174,
"learning_rate": 0.000120497,
"loss": 1.7588382720947267,
"step": 170
},
{
"epoch": 0.0225,
"grad_norm": 0.3680027723312378,
"learning_rate": 0.000127627,
"loss": 1.7494930267333983,
"step": 180
},
{
"epoch": 0.02375,
"grad_norm": 0.35260000824928284,
"learning_rate": 0.000134757,
"loss": 1.758560562133789,
"step": 190
},
{
"epoch": 0.025,
"grad_norm": 0.3592912256717682,
"learning_rate": 0.000141887,
"loss": 1.8017724990844726,
"step": 200
},
{
"epoch": 0.02625,
"grad_norm": 0.34770476818084717,
"learning_rate": 0.00014259953155930407,
"loss": 1.8061519622802735,
"step": 210
},
{
"epoch": 0.0275,
"grad_norm": 0.358970582485199,
"learning_rate": 0.00014259791226603537,
"loss": 1.8515422821044922,
"step": 220
},
{
"epoch": 0.02875,
"grad_norm": 0.34490638971328735,
"learning_rate": 0.00014259513636323773,
"loss": 1.8080307006835938,
"step": 230
},
{
"epoch": 0.03,
"grad_norm": 0.3587310016155243,
"learning_rate": 0.00014259120389594238,
"loss": 1.8180580139160156,
"step": 240
},
{
"epoch": 0.03125,
"grad_norm": 0.35348573327064514,
"learning_rate": 0.0001425861149279427,
"loss": 1.822945785522461,
"step": 250
},
{
"epoch": 0.0325,
"grad_norm": 0.3408539891242981,
"learning_rate": 0.00014257986954179292,
"loss": 1.804990577697754,
"step": 260
},
{
"epoch": 0.03375,
"grad_norm": 0.35097193717956543,
"learning_rate": 0.00014257246783880696,
"loss": 1.8341880798339845,
"step": 270
},
{
"epoch": 0.035,
"grad_norm": 0.3467462956905365,
"learning_rate": 0.00014256390993905687,
"loss": 1.7296785354614257,
"step": 280
},
{
"epoch": 0.03625,
"grad_norm": 0.3492400050163269,
"learning_rate": 0.00014255419598137062,
"loss": 1.8266151428222657,
"step": 290
},
{
"epoch": 0.0375,
"grad_norm": 0.3718615472316742,
"learning_rate": 0.00014254332612333005,
"loss": 1.7514339447021485,
"step": 300
},
{
"epoch": 0.03875,
"grad_norm": 0.3476354479789734,
"learning_rate": 0.00014253130054126827,
"loss": 1.8226016998291015,
"step": 310
},
{
"epoch": 0.04,
"grad_norm": 0.34655508399009705,
"learning_rate": 0.00014251811943026674,
"loss": 1.8513336181640625,
"step": 320
},
{
"epoch": 0.04125,
"grad_norm": 0.3519170880317688,
"learning_rate": 0.00014250378300415223,
"loss": 1.864480972290039,
"step": 330
},
{
"epoch": 0.0425,
"grad_norm": 0.3491443395614624,
"learning_rate": 0.00014248829149549318,
"loss": 1.8030773162841798,
"step": 340
},
{
"epoch": 0.04375,
"grad_norm": 0.3646671175956726,
"learning_rate": 0.00014247164515559605,
"loss": 1.782710647583008,
"step": 350
},
{
"epoch": 0.045,
"grad_norm": 0.3525862395763397,
"learning_rate": 0.00014245384425450123,
"loss": 1.8301689147949218,
"step": 360
},
{
"epoch": 0.04625,
"grad_norm": 0.3430674970149994,
"learning_rate": 0.00014243488908097866,
"loss": 1.7636734008789063,
"step": 370
},
{
"epoch": 0.0475,
"grad_norm": 0.3655545115470886,
"learning_rate": 0.00014241477994252308,
"loss": 1.8431385040283204,
"step": 380
},
{
"epoch": 0.04875,
"grad_norm": 0.35655322670936584,
"learning_rate": 0.00014239351716534906,
"loss": 1.8405876159667969,
"step": 390
},
{
"epoch": 0.05,
"grad_norm": 0.3450303077697754,
"learning_rate": 0.00014237110109438587,
"loss": 1.7880744934082031,
"step": 400
},
{
"epoch": 0.05125,
"grad_norm": 0.36362725496292114,
"learning_rate": 0.0001423475320932716,
"loss": 1.803448486328125,
"step": 410
},
{
"epoch": 0.0525,
"grad_norm": 0.3608654737472534,
"learning_rate": 0.0001423228105443475,
"loss": 1.7959218978881837,
"step": 420
},
{
"epoch": 0.05375,
"grad_norm": 0.3524814248085022,
"learning_rate": 0.00014229693684865167,
"loss": 1.8105106353759766,
"step": 430
},
{
"epoch": 0.055,
"grad_norm": 0.35871171951293945,
"learning_rate": 0.0001422699114259126,
"loss": 1.7514846801757813,
"step": 440
},
{
"epoch": 0.05625,
"grad_norm": 0.3381369709968567,
"learning_rate": 0.00014224173471454223,
"loss": 1.811713981628418,
"step": 450
},
{
"epoch": 0.0575,
"grad_norm": 0.3746880292892456,
"learning_rate": 0.00014221240717162908,
"loss": 1.7895519256591796,
"step": 460
},
{
"epoch": 0.05875,
"grad_norm": 0.35921189188957214,
"learning_rate": 0.00014218192927293062,
"loss": 1.7877384185791017,
"step": 470
},
{
"epoch": 0.06,
"grad_norm": 0.3727467656135559,
"learning_rate": 0.00014215030151286563,
"loss": 1.8092086791992188,
"step": 480
},
{
"epoch": 0.06125,
"grad_norm": 0.36004638671875,
"learning_rate": 0.00014211752440450624,
"loss": 1.845526123046875,
"step": 490
},
{
"epoch": 0.0625,
"grad_norm": 0.34500977396965027,
"learning_rate": 0.00014208359847956947,
"loss": 1.793890380859375,
"step": 500
},
{
"epoch": 0.06375,
"grad_norm": 0.3571811020374298,
"learning_rate": 0.00014204852428840873,
"loss": 1.8021648406982422,
"step": 510
},
{
"epoch": 0.065,
"grad_norm": 0.3511386513710022,
"learning_rate": 0.0001420123024000048,
"loss": 1.7810476303100586,
"step": 520
},
{
"epoch": 0.06625,
"grad_norm": 0.3544309139251709,
"learning_rate": 0.00014197493340195673,
"loss": 1.782750701904297,
"step": 530
},
{
"epoch": 0.0675,
"grad_norm": 0.35211437940597534,
"learning_rate": 0.00014193641790047207,
"loss": 1.8397369384765625,
"step": 540
},
{
"epoch": 0.06875,
"grad_norm": 0.3561457097530365,
"learning_rate": 0.00014189675652035737,
"loss": 1.806086540222168,
"step": 550
},
{
"epoch": 0.07,
"grad_norm": 0.3514038026332855,
"learning_rate": 0.0001418559499050077,
"loss": 1.7963085174560547,
"step": 560
},
{
"epoch": 0.07125,
"grad_norm": 0.35221120715141296,
"learning_rate": 0.00014181399871639652,
"loss": 1.777400016784668,
"step": 570
},
{
"epoch": 0.0725,
"grad_norm": 0.34728357195854187,
"learning_rate": 0.00014177090363506466,
"loss": 1.7832159042358398,
"step": 580
},
{
"epoch": 0.07375,
"grad_norm": 0.35810062289237976,
"learning_rate": 0.00014172666536010946,
"loss": 1.7859878540039062,
"step": 590
},
{
"epoch": 0.075,
"grad_norm": 0.3402475118637085,
"learning_rate": 0.00014168128460917344,
"loss": 1.8559268951416015,
"step": 600
},
{
"epoch": 0.07625,
"grad_norm": 0.36799490451812744,
"learning_rate": 0.00014163476211843254,
"loss": 1.8264755249023437,
"step": 610
},
{
"epoch": 0.0775,
"grad_norm": 0.3646862804889679,
"learning_rate": 0.00014158709864258424,
"loss": 1.800428581237793,
"step": 620
},
{
"epoch": 0.07875,
"grad_norm": 0.37956395745277405,
"learning_rate": 0.00014153829495483538,
"loss": 1.7767526626586914,
"step": 630
},
{
"epoch": 0.08,
"grad_norm": 0.3566032648086548,
"learning_rate": 0.00014148835184688949,
"loss": 1.8472091674804687,
"step": 640
},
{
"epoch": 0.08125,
"grad_norm": 0.333779513835907,
"learning_rate": 0.000141437270128934,
"loss": 1.8140777587890624,
"step": 650
},
{
"epoch": 0.0825,
"grad_norm": 0.3429010212421417,
"learning_rate": 0.0001413850506296272,
"loss": 1.8366750717163085,
"step": 660
},
{
"epoch": 0.08375,
"grad_norm": 0.3753111660480499,
"learning_rate": 0.00014133169419608456,
"loss": 1.760198211669922,
"step": 670
},
{
"epoch": 0.085,
"grad_norm": 0.35503339767456055,
"learning_rate": 0.0001412772016938653,
"loss": 1.8173086166381835,
"step": 680
},
{
"epoch": 0.08625,
"grad_norm": 0.358216792345047,
"learning_rate": 0.0001412215740069581,
"loss": 1.7937744140625,
"step": 690
},
{
"epoch": 0.0875,
"grad_norm": 0.3600156605243683,
"learning_rate": 0.00014116481203776677,
"loss": 1.7986185073852539,
"step": 700
},
{
"epoch": 0.08875,
"grad_norm": 0.3507816195487976,
"learning_rate": 0.00014110691670709584,
"loss": 1.7555866241455078,
"step": 710
},
{
"epoch": 0.09,
"grad_norm": 0.35459256172180176,
"learning_rate": 0.00014104788895413529,
"loss": 1.795433807373047,
"step": 720
},
{
"epoch": 0.09125,
"grad_norm": 0.35286569595336914,
"learning_rate": 0.00014098772973644564,
"loss": 1.820347213745117,
"step": 730
},
{
"epoch": 0.0925,
"grad_norm": 0.3857751786708832,
"learning_rate": 0.00014092644002994218,
"loss": 1.8153291702270509,
"step": 740
},
{
"epoch": 0.09375,
"grad_norm": 0.3553074598312378,
"learning_rate": 0.00014086402082887924,
"loss": 1.8413051605224608,
"step": 750
},
{
"epoch": 0.095,
"grad_norm": 0.35642898082733154,
"learning_rate": 0.0001408004731458341,
"loss": 1.7815227508544922,
"step": 760
},
{
"epoch": 0.09625,
"grad_norm": 0.37263238430023193,
"learning_rate": 0.00014073579801169043,
"loss": 1.8360301971435546,
"step": 770
},
{
"epoch": 0.0975,
"grad_norm": 0.37507593631744385,
"learning_rate": 0.00014066999647562167,
"loss": 1.8166229248046875,
"step": 780
},
{
"epoch": 0.09875,
"grad_norm": 0.3496163487434387,
"learning_rate": 0.00014060306960507398,
"loss": 1.7876134872436524,
"step": 790
},
{
"epoch": 0.1,
"grad_norm": 0.350668340921402,
"learning_rate": 0.000140535018485749,
"loss": 1.8262884140014648,
"step": 800
},
{
"epoch": 0.10125,
"grad_norm": 0.36257749795913696,
"learning_rate": 0.00014046584422158602,
"loss": 1.7791305541992188,
"step": 810
},
{
"epoch": 0.1025,
"grad_norm": 0.357570081949234,
"learning_rate": 0.00014039554793474442,
"loss": 1.8329212188720703,
"step": 820
},
{
"epoch": 0.10375,
"grad_norm": 0.354640930891037,
"learning_rate": 0.00014032413076558507,
"loss": 1.7825984954833984,
"step": 830
},
{
"epoch": 0.105,
"grad_norm": 0.35969364643096924,
"learning_rate": 0.00014025159387265215,
"loss": 1.7961544036865233,
"step": 840
},
{
"epoch": 0.10625,
"grad_norm": 0.3408399224281311,
"learning_rate": 0.00014017793843265416,
"loss": 1.8031917572021485,
"step": 850
},
{
"epoch": 0.1075,
"grad_norm": 0.3505636751651764,
"learning_rate": 0.00014010316564044495,
"loss": 1.8270240783691407,
"step": 860
},
{
"epoch": 0.10875,
"grad_norm": 0.3612024784088135,
"learning_rate": 0.00014002727670900427,
"loss": 1.8037662506103516,
"step": 870
},
{
"epoch": 0.11,
"grad_norm": 0.3611273467540741,
"learning_rate": 0.00013995027286941813,
"loss": 1.7805574417114258,
"step": 880
},
{
"epoch": 0.11125,
"grad_norm": 0.370518296957016,
"learning_rate": 0.00013987215537085876,
"loss": 1.83743896484375,
"step": 890
},
{
"epoch": 0.1125,
"grad_norm": 0.3627995550632477,
"learning_rate": 0.00013979292548056446,
"loss": 1.8568729400634765,
"step": 900
},
{
"epoch": 0.11375,
"grad_norm": 0.33446118235588074,
"learning_rate": 0.00013971258448381896,
"loss": 1.8121458053588868,
"step": 910
},
{
"epoch": 0.115,
"grad_norm": 0.35702356696128845,
"learning_rate": 0.00013963113368393058,
"loss": 1.8272817611694336,
"step": 920
},
{
"epoch": 0.11625,
"grad_norm": 0.35480058193206787,
"learning_rate": 0.00013954857440221107,
"loss": 1.8286819458007812,
"step": 930
},
{
"epoch": 0.1175,
"grad_norm": 0.33891281485557556,
"learning_rate": 0.00013946490797795425,
"loss": 1.7881786346435546,
"step": 940
},
{
"epoch": 0.11875,
"grad_norm": 0.34998786449432373,
"learning_rate": 0.00013938013576841426,
"loss": 1.8192798614501953,
"step": 950
},
{
"epoch": 0.12,
"grad_norm": 0.36356785893440247,
"learning_rate": 0.0001392942591487834,
"loss": 1.8080211639404298,
"step": 960
},
{
"epoch": 0.12125,
"grad_norm": 0.3536245822906494,
"learning_rate": 0.00013920727951217003,
"loss": 1.7745712280273438,
"step": 970
},
{
"epoch": 0.1225,
"grad_norm": 0.35819944739341736,
"learning_rate": 0.00013911919826957588,
"loss": 1.8335809707641602,
"step": 980
},
{
"epoch": 0.12375,
"grad_norm": 0.3673238754272461,
"learning_rate": 0.0001390300168498732,
"loss": 1.7918657302856444,
"step": 990
},
{
"epoch": 0.125,
"grad_norm": 0.37633419036865234,
"learning_rate": 0.0001389397366997814,
"loss": 1.7912788391113281,
"step": 1000
},
{
"epoch": 0.12625,
"grad_norm": 0.36260703206062317,
"learning_rate": 0.00013884835928384387,
"loss": 1.7769220352172852,
"step": 1010
},
{
"epoch": 0.1275,
"grad_norm": 0.3502698242664337,
"learning_rate": 0.00013875588608440397,
"loss": 1.8571086883544923,
"step": 1020
},
{
"epoch": 0.12875,
"grad_norm": 0.37244319915771484,
"learning_rate": 0.0001386623186015812,
"loss": 1.7873695373535157,
"step": 1030
},
{
"epoch": 0.13,
"grad_norm": 0.36906760931015015,
"learning_rate": 0.00013856765835324657,
"loss": 1.7982921600341797,
"step": 1040
},
{
"epoch": 0.13125,
"grad_norm": 0.3458193838596344,
"learning_rate": 0.0001384719068749984,
"loss": 1.896946907043457,
"step": 1050
},
{
"epoch": 0.1325,
"grad_norm": 0.3625653088092804,
"learning_rate": 0.00013837506572013695,
"loss": 1.8590087890625,
"step": 1060
},
{
"epoch": 0.13375,
"grad_norm": 0.37704798579216003,
"learning_rate": 0.00013827713645963959,
"loss": 1.7953170776367187,
"step": 1070
},
{
"epoch": 0.135,
"grad_norm": 0.35103756189346313,
"learning_rate": 0.00013817812068213505,
"loss": 1.864565658569336,
"step": 1080
},
{
"epoch": 0.13625,
"grad_norm": 0.39145445823669434,
"learning_rate": 0.0001380780199938779,
"loss": 1.787282371520996,
"step": 1090
},
{
"epoch": 0.1375,
"grad_norm": 0.3810483515262604,
"learning_rate": 0.00013797683601872218,
"loss": 1.8461406707763672,
"step": 1100
},
{
"epoch": 0.13875,
"grad_norm": 0.36001554131507874,
"learning_rate": 0.00013787457039809542,
"loss": 1.7846809387207032,
"step": 1110
},
{
"epoch": 0.14,
"grad_norm": 0.36254000663757324,
"learning_rate": 0.0001377712247909717,
"loss": 1.8589000701904297,
"step": 1120
},
{
"epoch": 0.14125,
"grad_norm": 0.3535791337490082,
"learning_rate": 0.00013766680087384488,
"loss": 1.790989875793457,
"step": 1130
},
{
"epoch": 0.1425,
"grad_norm": 0.36819183826446533,
"learning_rate": 0.00013756130034070147,
"loss": 1.8115760803222656,
"step": 1140
},
{
"epoch": 0.14375,
"grad_norm": 0.35042834281921387,
"learning_rate": 0.00013745472490299298,
"loss": 1.7872331619262696,
"step": 1150
},
{
"epoch": 0.145,
"grad_norm": 0.36452701687812805,
"learning_rate": 0.0001373470762896083,
"loss": 1.8083602905273437,
"step": 1160
},
{
"epoch": 0.14625,
"grad_norm": 0.35632047057151794,
"learning_rate": 0.00013723835624684556,
"loss": 1.8238039016723633,
"step": 1170
},
{
"epoch": 0.1475,
"grad_norm": 0.36330121755599976,
"learning_rate": 0.00013712856653838384,
"loss": 1.8468303680419922,
"step": 1180
},
{
"epoch": 0.14875,
"grad_norm": 0.37948107719421387,
"learning_rate": 0.0001370177089452546,
"loss": 1.7772663116455079,
"step": 1190
},
{
"epoch": 0.15,
"grad_norm": 0.3759608268737793,
"learning_rate": 0.0001369057852658127,
"loss": 1.793960952758789,
"step": 1200
},
{
"epoch": 0.15125,
"grad_norm": 0.3672516644001007,
"learning_rate": 0.00013679279731570733,
"loss": 1.7799537658691407,
"step": 1210
},
{
"epoch": 0.1525,
"grad_norm": 0.3496241569519043,
"learning_rate": 0.00013667874692785244,
"loss": 1.7861103057861327,
"step": 1220
},
{
"epoch": 0.15375,
"grad_norm": 0.3461642265319824,
"learning_rate": 0.00013656363595239708,
"loss": 1.8481361389160156,
"step": 1230
},
{
"epoch": 0.155,
"grad_norm": 0.33858028054237366,
"learning_rate": 0.0001364474662566954,
"loss": 1.77642822265625,
"step": 1240
},
{
"epoch": 0.15625,
"grad_norm": 0.3424132764339447,
"learning_rate": 0.00013633023972527632,
"loss": 1.7893180847167969,
"step": 1250
},
{
"epoch": 0.1575,
"grad_norm": 0.35095998644828796,
"learning_rate": 0.00013621195825981293,
"loss": 1.7366466522216797,
"step": 1260
},
{
"epoch": 0.15875,
"grad_norm": 0.36417317390441895,
"learning_rate": 0.00013609262377909176,
"loss": 1.839132308959961,
"step": 1270
},
{
"epoch": 0.16,
"grad_norm": 0.3565835654735565,
"learning_rate": 0.00013597223821898145,
"loss": 1.757269287109375,
"step": 1280
},
{
"epoch": 0.16125,
"grad_norm": 0.34676891565322876,
"learning_rate": 0.00013585080353240158,
"loss": 1.781381607055664,
"step": 1290
},
{
"epoch": 0.1625,
"grad_norm": 0.3492533564567566,
"learning_rate": 0.00013572832168929085,
"loss": 1.8004392623901366,
"step": 1300
},
{
"epoch": 0.16375,
"grad_norm": 0.33528923988342285,
"learning_rate": 0.0001356047946765751,
"loss": 1.7787307739257812,
"step": 1310
},
{
"epoch": 0.165,
"grad_norm": 0.35009509325027466,
"learning_rate": 0.00013548022449813522,
"loss": 1.7703327178955077,
"step": 1320
},
{
"epoch": 0.16625,
"grad_norm": 0.38126665353775024,
"learning_rate": 0.00013535461317477446,
"loss": 1.8216169357299805,
"step": 1330
},
{
"epoch": 0.1675,
"grad_norm": 0.3653838038444519,
"learning_rate": 0.00013522796274418575,
"loss": 1.784686279296875,
"step": 1340
},
{
"epoch": 0.16875,
"grad_norm": 0.35842376947402954,
"learning_rate": 0.00013510027526091872,
"loss": 1.818338394165039,
"step": 1350
},
{
"epoch": 0.17,
"grad_norm": 0.3575061559677124,
"learning_rate": 0.00013497155279634617,
"loss": 1.8177734375,
"step": 1360
},
{
"epoch": 0.17125,
"grad_norm": 0.36351051926612854,
"learning_rate": 0.00013484179743863064,
"loss": 1.8408927917480469,
"step": 1370
},
{
"epoch": 0.1725,
"grad_norm": 0.37017935514450073,
"learning_rate": 0.0001347110112926905,
"loss": 1.8088676452636718,
"step": 1380
},
{
"epoch": 0.17375,
"grad_norm": 0.35998839139938354,
"learning_rate": 0.00013457919648016573,
"loss": 1.8451946258544922,
"step": 1390
},
{
"epoch": 0.175,
"grad_norm": 0.36173009872436523,
"learning_rate": 0.0001344463551393836,
"loss": 1.7784915924072267,
"step": 1400
},
{
"epoch": 0.17625,
"grad_norm": 0.3683062493801117,
"learning_rate": 0.00013431248942532385,
"loss": 1.745309829711914,
"step": 1410
},
{
"epoch": 0.1775,
"grad_norm": 0.3488103151321411,
"learning_rate": 0.00013417760150958392,
"loss": 1.793316650390625,
"step": 1420
},
{
"epoch": 0.17875,
"grad_norm": 0.35314610600471497,
"learning_rate": 0.00013404169358034355,
"loss": 1.7867753982543946,
"step": 1430
},
{
"epoch": 0.18,
"grad_norm": 0.3577822744846344,
"learning_rate": 0.0001339047678423294,
"loss": 1.7581512451171875,
"step": 1440
},
{
"epoch": 0.18125,
"grad_norm": 0.3387848436832428,
"learning_rate": 0.00013376682651677918,
"loss": 1.7947473526000977,
"step": 1450
},
{
"epoch": 0.1825,
"grad_norm": 0.3571684658527374,
"learning_rate": 0.00013362787184140572,
"loss": 1.7496719360351562,
"step": 1460
},
{
"epoch": 0.18375,
"grad_norm": 0.3472369313240051,
"learning_rate": 0.0001334879060703606,
"loss": 1.7750968933105469,
"step": 1470
},
{
"epoch": 0.185,
"grad_norm": 0.3559383749961853,
"learning_rate": 0.00013334693147419759,
"loss": 1.8256034851074219,
"step": 1480
},
{
"epoch": 0.18625,
"grad_norm": 0.35892486572265625,
"learning_rate": 0.00013320495033983585,
"loss": 1.7993803024291992,
"step": 1490
},
{
"epoch": 0.1875,
"grad_norm": 0.3679066300392151,
"learning_rate": 0.0001330619649705228,
"loss": 1.8065261840820312,
"step": 1500
},
{
"epoch": 0.18875,
"grad_norm": 0.36252209544181824,
"learning_rate": 0.0001329179776857968,
"loss": 1.8372112274169923,
"step": 1510
},
{
"epoch": 0.19,
"grad_norm": 0.3526136577129364,
"learning_rate": 0.0001327729908214494,
"loss": 1.799185562133789,
"step": 1520
},
{
"epoch": 0.19125,
"grad_norm": 0.3635775148868561,
"learning_rate": 0.0001326270067294877,
"loss": 1.8340118408203125,
"step": 1530
},
{
"epoch": 0.1925,
"grad_norm": 0.36545416712760925,
"learning_rate": 0.00013248002777809586,
"loss": 1.7582477569580077,
"step": 1540
},
{
"epoch": 0.19375,
"grad_norm": 0.37526363134384155,
"learning_rate": 0.00013233205635159695,
"loss": 1.799554443359375,
"step": 1550
},
{
"epoch": 0.195,
"grad_norm": 0.35140055418014526,
"learning_rate": 0.0001321830948504142,
"loss": 1.84625244140625,
"step": 1560
},
{
"epoch": 0.19625,
"grad_norm": 0.3566315770149231,
"learning_rate": 0.0001320331456910319,
"loss": 1.7883316040039063,
"step": 1570
},
{
"epoch": 0.1975,
"grad_norm": 0.35099372267723083,
"learning_rate": 0.0001318822113059565,
"loss": 1.794087028503418,
"step": 1580
},
{
"epoch": 0.19875,
"grad_norm": 0.35940778255462646,
"learning_rate": 0.00013173029414367693,
"loss": 1.7220880508422851,
"step": 1590
},
{
"epoch": 0.2,
"grad_norm": 0.36045801639556885,
"learning_rate": 0.0001315773966686249,
"loss": 1.7802143096923828,
"step": 1600
},
{
"epoch": 0.20125,
"grad_norm": 0.3581635057926178,
"learning_rate": 0.000131423521361135,
"loss": 1.799722671508789,
"step": 1610
},
{
"epoch": 0.2025,
"grad_norm": 0.33708855509757996,
"learning_rate": 0.00013126867071740436,
"loss": 1.8053092956542969,
"step": 1620
},
{
"epoch": 0.20375,
"grad_norm": 0.3750436007976532,
"learning_rate": 0.00013111284724945228,
"loss": 1.8074203491210938,
"step": 1630
},
{
"epoch": 0.205,
"grad_norm": 0.35119321942329407,
"learning_rate": 0.0001309560534850794,
"loss": 1.8175487518310547,
"step": 1640
},
{
"epoch": 0.20625,
"grad_norm": 0.3611745834350586,
"learning_rate": 0.00013079829196782668,
"loss": 1.7702863693237305,
"step": 1650
},
{
"epoch": 0.2075,
"grad_norm": 0.3799806833267212,
"learning_rate": 0.00013063956525693424,
"loss": 1.8235919952392579,
"step": 1660
},
{
"epoch": 0.20875,
"grad_norm": 0.33240807056427,
"learning_rate": 0.0001304798759272997,
"loss": 1.768626594543457,
"step": 1670
},
{
"epoch": 0.21,
"grad_norm": 0.36028313636779785,
"learning_rate": 0.00013031922656943647,
"loss": 1.829296875,
"step": 1680
},
{
"epoch": 0.21125,
"grad_norm": 0.34874534606933594,
"learning_rate": 0.00013015761978943185,
"loss": 1.8018821716308593,
"step": 1690
},
{
"epoch": 0.2125,
"grad_norm": 0.34944280982017517,
"learning_rate": 0.00012999505820890448,
"loss": 1.8226497650146485,
"step": 1700
},
{
"epoch": 0.21375,
"grad_norm": 0.35128575563430786,
"learning_rate": 0.00012983154446496209,
"loss": 1.7741992950439454,
"step": 1710
},
{
"epoch": 0.215,
"grad_norm": 0.3564985692501068,
"learning_rate": 0.0001296670812101586,
"loss": 1.7850433349609376,
"step": 1720
},
{
"epoch": 0.21625,
"grad_norm": 0.3676067292690277,
"learning_rate": 0.000129501671112451,
"loss": 1.8290214538574219,
"step": 1730
},
{
"epoch": 0.2175,
"grad_norm": 0.3726136386394501,
"learning_rate": 0.00012933531685515627,
"loss": 1.7774532318115235,
"step": 1740
},
{
"epoch": 0.21875,
"grad_norm": 0.3493287265300751,
"learning_rate": 0.00012916802113690766,
"loss": 1.7807361602783203,
"step": 1750
},
{
"epoch": 0.22,
"grad_norm": 0.37059202790260315,
"learning_rate": 0.00012899978667161105,
"loss": 1.749721145629883,
"step": 1760
},
{
"epoch": 0.22125,
"grad_norm": 0.356022447347641,
"learning_rate": 0.00012883061618840087,
"loss": 1.8218292236328124,
"step": 1770
},
{
"epoch": 0.2225,
"grad_norm": 0.3568074405193329,
"learning_rate": 0.00012866051243159572,
"loss": 1.8072574615478516,
"step": 1780
},
{
"epoch": 0.22375,
"grad_norm": 0.3749092221260071,
"learning_rate": 0.00012848947816065416,
"loss": 1.8410078048706056,
"step": 1790
},
{
"epoch": 0.225,
"grad_norm": 0.35633665323257446,
"learning_rate": 0.00012831751615012955,
"loss": 1.7817327499389648,
"step": 1800
},
{
"epoch": 0.22625,
"grad_norm": 0.3607875108718872,
"learning_rate": 0.00012814462918962533,
"loss": 1.8118452072143554,
"step": 1810
},
{
"epoch": 0.2275,
"grad_norm": 0.34315699338912964,
"learning_rate": 0.00012797082008374967,
"loss": 1.8008819580078126,
"step": 1820
},
{
"epoch": 0.22875,
"grad_norm": 0.358188658952713,
"learning_rate": 0.00012779609165206992,
"loss": 1.8048545837402343,
"step": 1830
},
{
"epoch": 0.23,
"grad_norm": 0.3641424775123596,
"learning_rate": 0.000127620446729067,
"loss": 1.8129388809204101,
"step": 1840
},
{
"epoch": 0.23125,
"grad_norm": 0.36388713121414185,
"learning_rate": 0.00012744388816408926,
"loss": 1.7981510162353516,
"step": 1850
},
{
"epoch": 0.2325,
"grad_norm": 0.3411344587802887,
"learning_rate": 0.00012726641882130642,
"loss": 1.7846858978271485,
"step": 1860
},
{
"epoch": 0.23375,
"grad_norm": 0.36635443568229675,
"learning_rate": 0.00012708804157966297,
"loss": 1.8334461212158204,
"step": 1870
},
{
"epoch": 0.235,
"grad_norm": 0.3459226191043854,
"learning_rate": 0.00012690875933283154,
"loss": 1.7850067138671875,
"step": 1880
},
{
"epoch": 0.23625,
"grad_norm": 0.3630014657974243,
"learning_rate": 0.00012672857498916595,
"loss": 1.8400045394897462,
"step": 1890
},
{
"epoch": 0.2375,
"grad_norm": 0.3783304691314697,
"learning_rate": 0.000126547491471654,
"loss": 1.7719623565673828,
"step": 1900
},
{
"epoch": 0.23875,
"grad_norm": 0.3790845572948456,
"learning_rate": 0.0001263655117178701,
"loss": 1.8144996643066407,
"step": 1910
},
{
"epoch": 0.24,
"grad_norm": 0.35528555512428284,
"learning_rate": 0.0001261826386799276,
"loss": 1.797579002380371,
"step": 1920
},
{
"epoch": 0.24125,
"grad_norm": 0.3462880253791809,
"learning_rate": 0.00012599887532443088,
"loss": 1.7669387817382813,
"step": 1930
},
{
"epoch": 0.2425,
"grad_norm": 0.35499900579452515,
"learning_rate": 0.00012581422463242716,
"loss": 1.782514762878418,
"step": 1940
},
{
"epoch": 0.24375,
"grad_norm": 0.35548484325408936,
"learning_rate": 0.00012562868959935835,
"loss": 1.7927711486816407,
"step": 1950
},
{
"epoch": 0.245,
"grad_norm": 0.36208584904670715,
"learning_rate": 0.00012544227323501222,
"loss": 1.8539527893066405,
"step": 1960
},
{
"epoch": 0.24625,
"grad_norm": 0.3629232347011566,
"learning_rate": 0.0001252549785634738,
"loss": 1.7535400390625,
"step": 1970
},
{
"epoch": 0.2475,
"grad_norm": 0.33926820755004883,
"learning_rate": 0.000125066808623076,
"loss": 1.7788131713867188,
"step": 1980
},
{
"epoch": 0.24875,
"grad_norm": 0.3651394546031952,
"learning_rate": 0.00012487776646635072,
"loss": 1.8248186111450195,
"step": 1990
},
{
"epoch": 0.25,
"grad_norm": 0.35856956243515015,
"learning_rate": 0.00012468785515997905,
"loss": 1.7728294372558593,
"step": 2000
},
{
"epoch": 0.25125,
"grad_norm": 0.36707815527915955,
"learning_rate": 0.0001244970777847416,
"loss": 1.797306442260742,
"step": 2010
},
{
"epoch": 0.2525,
"grad_norm": 0.37768349051475525,
"learning_rate": 0.00012430543743546853,
"loss": 1.8138954162597656,
"step": 2020
},
{
"epoch": 0.25375,
"grad_norm": 0.3719421625137329,
"learning_rate": 0.00012411293722098938,
"loss": 1.8046173095703124,
"step": 2030
},
{
"epoch": 0.255,
"grad_norm": 0.35382720828056335,
"learning_rate": 0.00012391958026408258,
"loss": 1.765408706665039,
"step": 2040
},
{
"epoch": 0.25625,
"grad_norm": 0.3717374801635742,
"learning_rate": 0.00012372536970142481,
"loss": 1.794291877746582,
"step": 2050
},
{
"epoch": 0.2575,
"grad_norm": 0.37810182571411133,
"learning_rate": 0.0001235303086835401,
"loss": 1.7855905532836913,
"step": 2060
},
{
"epoch": 0.25875,
"grad_norm": 0.34465938806533813,
"learning_rate": 0.00012333440037474877,
"loss": 1.7502609252929688,
"step": 2070
},
{
"epoch": 0.26,
"grad_norm": 0.3537978529930115,
"learning_rate": 0.0001231376479531161,
"loss": 1.8433588027954102,
"step": 2080
},
{
"epoch": 0.26125,
"grad_norm": 0.3481179475784302,
"learning_rate": 0.00012294005461040066,
"loss": 1.778417205810547,
"step": 2090
},
{
"epoch": 0.2625,
"grad_norm": 0.36712074279785156,
"learning_rate": 0.00012274162355200264,
"loss": 1.8297000885009767,
"step": 2100
},
{
"epoch": 0.26375,
"grad_norm": 0.36218199133872986,
"learning_rate": 0.0001225423579969119,
"loss": 1.8048271179199218,
"step": 2110
},
{
"epoch": 0.265,
"grad_norm": 0.3427264988422394,
"learning_rate": 0.00012234226117765565,
"loss": 1.765831756591797,
"step": 2120
},
{
"epoch": 0.26625,
"grad_norm": 0.35128286480903625,
"learning_rate": 0.00012214133634024592,
"loss": 1.8477115631103516,
"step": 2130
},
{
"epoch": 0.2675,
"grad_norm": 0.36919906735420227,
"learning_rate": 0.0001219395867441272,
"loss": 1.7384143829345704,
"step": 2140
},
{
"epoch": 0.26875,
"grad_norm": 0.37480294704437256,
"learning_rate": 0.00012173701566212328,
"loss": 1.776589584350586,
"step": 2150
},
{
"epoch": 0.27,
"grad_norm": 0.3442743718624115,
"learning_rate": 0.00012153362638038429,
"loss": 1.7534845352172852,
"step": 2160
},
{
"epoch": 0.27125,
"grad_norm": 0.3617842495441437,
"learning_rate": 0.0001213294221983334,
"loss": 1.8287986755371093,
"step": 2170
},
{
"epoch": 0.2725,
"grad_norm": 0.3468424081802368,
"learning_rate": 0.00012112440642861319,
"loss": 1.7810518264770507,
"step": 2180
},
{
"epoch": 0.27375,
"grad_norm": 0.36655351519584656,
"learning_rate": 0.000120918582397032,
"loss": 1.8189208984375,
"step": 2190
},
{
"epoch": 0.275,
"grad_norm": 0.35723134875297546,
"learning_rate": 0.00012071195344251006,
"loss": 1.8201839447021484,
"step": 2200
},
{
"epoch": 0.27625,
"grad_norm": 0.36652442812919617,
"learning_rate": 0.00012050452291702508,
"loss": 1.8076786041259765,
"step": 2210
},
{
"epoch": 0.2775,
"grad_norm": 0.3568657338619232,
"learning_rate": 0.00012029629418555812,
"loss": 1.7748506546020508,
"step": 2220
},
{
"epoch": 0.27875,
"grad_norm": 0.34934675693511963,
"learning_rate": 0.00012008727062603888,
"loss": 1.8173185348510743,
"step": 2230
},
{
"epoch": 0.28,
"grad_norm": 0.34384509921073914,
"learning_rate": 0.00011987745562929093,
"loss": 1.7502407073974608,
"step": 2240
},
{
"epoch": 0.28125,
"grad_norm": 0.3680790066719055,
"learning_rate": 0.00011966685259897665,
"loss": 1.741659927368164,
"step": 2250
},
{
"epoch": 0.2825,
"grad_norm": 0.37108564376831055,
"learning_rate": 0.00011945546495154214,
"loss": 1.7894527435302734,
"step": 2260
},
{
"epoch": 0.28375,
"grad_norm": 0.37491941452026367,
"learning_rate": 0.00011924329611616168,
"loss": 1.7868507385253907,
"step": 2270
},
{
"epoch": 0.285,
"grad_norm": 0.3443116545677185,
"learning_rate": 0.00011903034953468213,
"loss": 1.7541233062744142,
"step": 2280
},
{
"epoch": 0.28625,
"grad_norm": 0.3643540143966675,
"learning_rate": 0.00011881662866156715,
"loss": 1.8128959655761718,
"step": 2290
},
{
"epoch": 0.2875,
"grad_norm": 0.35639819502830505,
"learning_rate": 0.00011860213696384107,
"loss": 1.7657649993896485,
"step": 2300
},
{
"epoch": 0.28875,
"grad_norm": 0.36442187428474426,
"learning_rate": 0.00011838687792103273,
"loss": 1.792444610595703,
"step": 2310
},
{
"epoch": 0.29,
"grad_norm": 0.36035555601119995,
"learning_rate": 0.00011817085502511903,
"loss": 1.7670486450195313,
"step": 2320
},
{
"epoch": 0.29125,
"grad_norm": 0.3552349805831909,
"learning_rate": 0.00011795407178046817,
"loss": 1.8542526245117188,
"step": 2330
},
{
"epoch": 0.2925,
"grad_norm": 0.3693036437034607,
"learning_rate": 0.00011773653170378296,
"loss": 1.6886547088623047,
"step": 2340
},
{
"epoch": 0.29375,
"grad_norm": 0.3605458736419678,
"learning_rate": 0.00011751823832404365,
"loss": 1.7754722595214845,
"step": 2350
},
{
"epoch": 0.295,
"grad_norm": 0.35839903354644775,
"learning_rate": 0.00011729919518245076,
"loss": 1.7882440567016602,
"step": 2360
},
{
"epoch": 0.29625,
"grad_norm": 0.36839786171913147,
"learning_rate": 0.00011707940583236761,
"loss": 1.7781326293945312,
"step": 2370
},
{
"epoch": 0.2975,
"grad_norm": 0.35868513584136963,
"learning_rate": 0.0001168588738392626,
"loss": 1.7871665954589844,
"step": 2380
},
{
"epoch": 0.29875,
"grad_norm": 0.3435186743736267,
"learning_rate": 0.00011663760278065153,
"loss": 1.8193252563476563,
"step": 2390
},
{
"epoch": 0.3,
"grad_norm": 0.3949030935764313,
"learning_rate": 0.00011641559624603941,
"loss": 1.7928247451782227,
"step": 2400
},
{
"epoch": 0.30125,
"grad_norm": 0.3681996762752533,
"learning_rate": 0.00011619285783686234,
"loss": 1.7616628646850585,
"step": 2410
},
{
"epoch": 0.3025,
"grad_norm": 0.3694431781768799,
"learning_rate": 0.00011596939116642899,
"loss": 1.8024406433105469,
"step": 2420
},
{
"epoch": 0.30375,
"grad_norm": 0.3637784719467163,
"learning_rate": 0.00011574519985986208,
"loss": 1.757676887512207,
"step": 2430
},
{
"epoch": 0.305,
"grad_norm": 0.3616812229156494,
"learning_rate": 0.00011552028755403952,
"loss": 1.79559326171875,
"step": 2440
},
{
"epoch": 0.30625,
"grad_norm": 0.36502957344055176,
"learning_rate": 0.00011529465789753538,
"loss": 1.7899351119995117,
"step": 2450
},
{
"epoch": 0.3075,
"grad_norm": 0.3788166344165802,
"learning_rate": 0.00011506831455056079,
"loss": 1.8282848358154298,
"step": 2460
},
{
"epoch": 0.30875,
"grad_norm": 0.36333489418029785,
"learning_rate": 0.00011484126118490451,
"loss": 1.766189956665039,
"step": 2470
},
{
"epoch": 0.31,
"grad_norm": 0.35034865140914917,
"learning_rate": 0.00011461350148387332,
"loss": 1.7669204711914062,
"step": 2480
},
{
"epoch": 0.31125,
"grad_norm": 0.35153037309646606,
"learning_rate": 0.00011438503914223241,
"loss": 1.7271625518798828,
"step": 2490
},
{
"epoch": 0.3125,
"grad_norm": 0.3732260763645172,
"learning_rate": 0.00011415587786614524,
"loss": 1.7690876007080079,
"step": 2500
},
{
"epoch": 0.31375,
"grad_norm": 0.3613711893558502,
"learning_rate": 0.0001139260213731136,
"loss": 1.7684833526611328,
"step": 2510
},
{
"epoch": 0.315,
"grad_norm": 0.35713133215904236,
"learning_rate": 0.00011369547339191726,
"loss": 1.7643346786499023,
"step": 2520
},
{
"epoch": 0.31625,
"grad_norm": 0.35974639654159546,
"learning_rate": 0.0001134642376625534,
"loss": 1.7887260437011718,
"step": 2530
},
{
"epoch": 0.3175,
"grad_norm": 0.36356088519096375,
"learning_rate": 0.00011323231793617599,
"loss": 1.788846206665039,
"step": 2540
},
{
"epoch": 0.31875,
"grad_norm": 0.3578101098537445,
"learning_rate": 0.00011299971797503495,
"loss": 1.781305694580078,
"step": 2550
},
{
"epoch": 0.32,
"grad_norm": 0.35546955466270447,
"learning_rate": 0.00011276644155241517,
"loss": 1.7678417205810546,
"step": 2560
},
{
"epoch": 0.32125,
"grad_norm": 0.3539295792579651,
"learning_rate": 0.00011253249245257516,
"loss": 1.7507053375244142,
"step": 2570
},
{
"epoch": 0.3225,
"grad_norm": 0.35056355595588684,
"learning_rate": 0.00011229787447068576,
"loss": 1.8345399856567384,
"step": 2580
},
{
"epoch": 0.32375,
"grad_norm": 0.3503001034259796,
"learning_rate": 0.00011206259141276858,
"loss": 1.8280166625976562,
"step": 2590
},
{
"epoch": 0.325,
"grad_norm": 0.3602514863014221,
"learning_rate": 0.0001118266470956342,
"loss": 1.7046276092529298,
"step": 2600
},
{
"epoch": 0.32625,
"grad_norm": 0.3672384023666382,
"learning_rate": 0.00011159004534682027,
"loss": 1.805099868774414,
"step": 2610
},
{
"epoch": 0.3275,
"grad_norm": 0.3589872419834137,
"learning_rate": 0.00011135279000452953,
"loss": 1.7550365447998046,
"step": 2620
},
{
"epoch": 0.32875,
"grad_norm": 0.3497745990753174,
"learning_rate": 0.00011111488491756732,
"loss": 1.758819580078125,
"step": 2630
},
{
"epoch": 0.33,
"grad_norm": 0.3647236227989197,
"learning_rate": 0.00011087633394527935,
"loss": 1.765294647216797,
"step": 2640
},
{
"epoch": 0.33125,
"grad_norm": 0.33403027057647705,
"learning_rate": 0.00011063714095748899,
"loss": 1.7979480743408203,
"step": 2650
},
{
"epoch": 0.3325,
"grad_norm": 0.3792349696159363,
"learning_rate": 0.00011039730983443455,
"loss": 1.829258346557617,
"step": 2660
},
{
"epoch": 0.33375,
"grad_norm": 0.3754643201828003,
"learning_rate": 0.00011015684446670626,
"loss": 1.783727264404297,
"step": 2670
},
{
"epoch": 0.335,
"grad_norm": 0.3466981053352356,
"learning_rate": 0.00010991574875518323,
"loss": 1.7687664031982422,
"step": 2680
},
{
"epoch": 0.33625,
"grad_norm": 0.3535688519477844,
"learning_rate": 0.00010967402661097012,
"loss": 1.8189085006713868,
"step": 2690
},
{
"epoch": 0.3375,
"grad_norm": 0.36101067066192627,
"learning_rate": 0.0001094316819553337,
"loss": 1.752197265625,
"step": 2700
},
{
"epoch": 0.33875,
"grad_norm": 0.36568474769592285,
"learning_rate": 0.0001091887187196393,
"loss": 1.7754268646240234,
"step": 2710
},
{
"epoch": 0.34,
"grad_norm": 0.3312813639640808,
"learning_rate": 0.00010894514084528695,
"loss": 1.75748291015625,
"step": 2720
},
{
"epoch": 0.34125,
"grad_norm": 0.3573434054851532,
"learning_rate": 0.00010870095228364743,
"loss": 1.7631900787353516,
"step": 2730
},
{
"epoch": 0.3425,
"grad_norm": 0.35645684599876404,
"learning_rate": 0.00010845615699599832,
"loss": 1.747064971923828,
"step": 2740
},
{
"epoch": 0.34375,
"grad_norm": 0.3608238101005554,
"learning_rate": 0.00010821075895345951,
"loss": 1.772369384765625,
"step": 2750
},
{
"epoch": 0.345,
"grad_norm": 0.37147653102874756,
"learning_rate": 0.00010796476213692903,
"loss": 1.8682558059692382,
"step": 2760
},
{
"epoch": 0.34625,
"grad_norm": 0.3562459349632263,
"learning_rate": 0.0001077181705370183,
"loss": 1.7756576538085938,
"step": 2770
},
{
"epoch": 0.3475,
"grad_norm": 0.3861102759838104,
"learning_rate": 0.00010747098815398739,
"loss": 1.797110366821289,
"step": 2780
},
{
"epoch": 0.34875,
"grad_norm": 0.3438943326473236,
"learning_rate": 0.0001072232189976802,
"loss": 1.7463438034057617,
"step": 2790
},
{
"epoch": 0.35,
"grad_norm": 0.3862653374671936,
"learning_rate": 0.00010697486708745942,
"loss": 1.781214141845703,
"step": 2800
}
],
"logging_steps": 10,
"max_steps": 8000,
"num_input_tokens_seen": 0,
"num_train_epochs": 9223372036854775807,
"save_steps": 100,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 3.750651595063296e+17,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}
Free AI Image Generator No sign-up. Instant results. Open Now