diff --git "a/run.log" "b/run.log" --- "a/run.log" +++ "b/run.log" @@ -5035,3 +5035,1154 @@ Time to load utils op: 0.00036025047302246094 seconds [2022-12-20 01:52:57,817] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved ./checkpoint-4000/global_step4000/zero_pp_rank_0_mp_rank_00_optim_states.pt. [2022-12-20 01:52:57,818] [INFO] [engine.py:3269:_save_zero_checkpoint] zero checkpoint saved ./checkpoint-4000/global_step4000/zero_pp_rank_0_mp_rank_00_optim_states.pt [2022-12-20 01:52:57,818] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step4000 is ready now! +[2022-12-20 01:54:15,312] [INFO] [timer.py:197:stop] 0/4001, RunningAvgSamplesPerSec=12.004123110771685, CurrSamplesPerSec=11.581480724537343, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 01:54:21,788] [INFO] [timer.py:197:stop] 0/4002, RunningAvgSamplesPerSec=12.004096423422798, CurrSamplesPerSec=11.898314407952874, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 01:54:28,430] [INFO] [timer.py:197:stop] 0/4003, RunningAvgSamplesPerSec=12.004084639772081, CurrSamplesPerSec=11.957134435183542, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 01:54:35,043] [INFO] [timer.py:197:stop] 0/4004, RunningAvgSamplesPerSec=12.004083561678474, CurrSamplesPerSec=11.999771658950918, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 01:54:41,503] [INFO] [timer.py:197:stop] 0/4005, RunningAvgSamplesPerSec=12.004081138553007, CurrSamplesPerSec=11.994391619940965, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 01:54:47,985] [INFO] [timer.py:197:stop] 0/4006, RunningAvgSamplesPerSec=12.004054381548189, CurrSamplesPerSec=11.897893566478789, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 01:54:53,983] [INFO] [stage_1_and_2.py:1765:step] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536.0, reducing to 65536.0 +[2022-12-20 01:54:53,984] [INFO] [timer.py:197:stop] 0/4007, RunningAvgSamplesPerSec=12.00424334166192, CurrSamplesPerSec=12.811747221342047, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 01:55:00,197] [INFO] [stage_1_and_2.py:1765:step] [deepspeed] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 65536.0, reducing to 32768.0 +[2022-12-20 01:55:00,198] [INFO] [timer.py:197:stop] 0/4008, RunningAvgSamplesPerSec=12.004428118618659, CurrSamplesPerSec=12.793090279884327, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 01:55:06,832] [INFO] [timer.py:197:stop] 0/4009, RunningAvgSamplesPerSec=12.004428251356638, CurrSamplesPerSec=12.004960023262575, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 01:55:13,382] [INFO] [logging.py:68:log_dist] [Rank 0] step=4010, skipped=8, lr=[2.2200000000000003e-06], mom=[[0.9, 0.999]] +[2022-12-20 01:55:13,382] [INFO] [timer.py:197:stop] 0/4010, RunningAvgSamplesPerSec=12.004429985166102, CurrSamplesPerSec=12.011381383699298, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 01:55:19,808] [INFO] [timer.py:197:stop] 0/4011, RunningAvgSamplesPerSec=12.004428291400423, CurrSamplesPerSec=11.997643516373794, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 01:55:26,261] [INFO] [timer.py:197:stop] 0/4012, RunningAvgSamplesPerSec=12.004416233773906, CurrSamplesPerSec=11.956271126711004, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 01:55:32,770] [INFO] [timer.py:197:stop] 0/4013, RunningAvgSamplesPerSec=12.004393593776078, CurrSamplesPerSec=11.914288813569915, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 01:55:39,252] [INFO] [timer.py:197:stop] 0/4014, RunningAvgSamplesPerSec=12.00439014878608, CurrSamplesPerSec=11.990588184845077, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 01:55:45,816] [INFO] [timer.py:197:stop] 0/4015, RunningAvgSamplesPerSec=12.004394572888401, CurrSamplesPerSec=12.022170360943068, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 01:55:52,433] [INFO] [timer.py:197:stop] 0/4016, RunningAvgSamplesPerSec=12.004370760722816, CurrSamplesPerSec=11.909567390197111, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 01:55:58,930] [INFO] [timer.py:197:stop] 0/4017, RunningAvgSamplesPerSec=12.004371065655986, CurrSamplesPerSec=12.005595192242154, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 01:56:05,370] [INFO] [timer.py:197:stop] 0/4018, RunningAvgSamplesPerSec=12.004359632179282, CurrSamplesPerSec=11.958629142974015, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 01:56:11,818] [INFO] [timer.py:197:stop] 0/4019, RunningAvgSamplesPerSec=12.004361501447095, CurrSamplesPerSec=12.011873179607035, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 01:56:18,326] [INFO] [logging.py:68:log_dist] [Rank 0] step=4020, skipped=8, lr=[2.197777777777778e-06], mom=[[0.9, 0.999]] +[2022-12-20 01:56:18,327] [INFO] [timer.py:197:stop] 0/4020, RunningAvgSamplesPerSec=12.004334827439283, CurrSamplesPerSec=11.898133516302812, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 01:56:24,818] [INFO] [timer.py:197:stop] 0/4021, RunningAvgSamplesPerSec=12.004317225570166, CurrSamplesPerSec=11.934007254864678, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 01:56:31,222] [INFO] [timer.py:197:stop] 0/4022, RunningAvgSamplesPerSec=12.004322320941542, CurrSamplesPerSec=12.02483562106523, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 01:56:37,811] [INFO] [timer.py:197:stop] 0/4023, RunningAvgSamplesPerSec=12.004305803025838, CurrSamplesPerSec=11.938269155600432, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 01:56:44,311] [INFO] [timer.py:197:stop] 0/4024, RunningAvgSamplesPerSec=12.004290280396663, CurrSamplesPerSec=11.942196725173922, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 01:56:50,781] [INFO] [timer.py:197:stop] 0/4025, RunningAvgSamplesPerSec=12.004296665397634, CurrSamplesPerSec=12.030032208352715, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +{'loss': 0.0, 'learning_rate': 2.1866666666666668e-06, 'epoch': 105.92} +[2022-12-20 01:56:57,253] [INFO] [timer.py:197:stop] 0/4026, RunningAvgSamplesPerSec=12.004275797074035, CurrSamplesPerSec=11.920905734166407, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 01:57:03,716] [INFO] [timer.py:197:stop] 0/4027, RunningAvgSamplesPerSec=12.004268563557458, CurrSamplesPerSec=11.975231319177214, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 01:57:08,383] [INFO] [timer.py:197:stop] 0/4028, RunningAvgSamplesPerSec=12.005091868871453, CurrSamplesPerSec=16.582812578860658, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 01:57:14,884] [INFO] [timer.py:197:stop] 0/4029, RunningAvgSamplesPerSec=12.005093410291462, CurrSamplesPerSec=12.011302377619474, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 01:57:21,315] [INFO] [logging.py:68:log_dist] [Rank 0] step=4030, skipped=8, lr=[2.1755555555555556e-06], mom=[[0.9, 0.999]] +[2022-12-20 01:57:21,316] [INFO] [timer.py:197:stop] 0/4030, RunningAvgSamplesPerSec=12.005096673203859, CurrSamplesPerSec=12.018250822418384, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 01:57:27,801] [INFO] [timer.py:197:stop] 0/4031, RunningAvgSamplesPerSec=12.005082804162592, CurrSamplesPerSec=11.949477125850146, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 01:57:34,245] [INFO] [timer.py:197:stop] 0/4032, RunningAvgSamplesPerSec=12.005072064155321, CurrSamplesPerSec=11.961956022356812, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 01:57:40,712] [INFO] [timer.py:197:stop] 0/4033, RunningAvgSamplesPerSec=12.005074462445888, CurrSamplesPerSec=12.014747362878547, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 01:57:47,206] [INFO] [timer.py:197:stop] 0/4034, RunningAvgSamplesPerSec=12.005075700268412, CurrSamplesPerSec=12.010067438090777, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 01:57:53,673] [INFO] [timer.py:197:stop] 0/4035, RunningAvgSamplesPerSec=12.005078468156453, CurrSamplesPerSec=12.016248979604262, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 01:58:00,079] [INFO] [timer.py:197:stop] 0/4036, RunningAvgSamplesPerSec=12.005081548772411, CurrSamplesPerSec=12.017518547243473, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 01:58:06,576] [INFO] [timer.py:197:stop] 0/4037, RunningAvgSamplesPerSec=12.005062813139185, CurrSamplesPerSec=11.92995622986558, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 01:58:13,071] [INFO] [timer.py:197:stop] 0/4038, RunningAvgSamplesPerSec=12.005039401542271, CurrSamplesPerSec=11.911311322504524, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 01:58:19,496] [INFO] [timer.py:197:stop] 0/4039, RunningAvgSamplesPerSec=12.005014728498903, CurrSamplesPerSec=11.906253741003905, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 01:58:26,049] [INFO] [logging.py:68:log_dist] [Rank 0] step=4040, skipped=8, lr=[2.153333333333333e-06], mom=[[0.9, 0.999]] +[2022-12-20 01:58:26,050] [INFO] [timer.py:197:stop] 0/4040, RunningAvgSamplesPerSec=12.004992321962934, CurrSamplesPerSec=11.915213767054338, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 01:58:32,579] [INFO] [timer.py:197:stop] 0/4041, RunningAvgSamplesPerSec=12.004972670056405, CurrSamplesPerSec=11.926139497853223, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 01:58:38,990] [INFO] [timer.py:197:stop] 0/4042, RunningAvgSamplesPerSec=12.004954695076465, CurrSamplesPerSec=11.932790279349998, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 01:58:45,366] [INFO] [timer.py:197:stop] 0/4043, RunningAvgSamplesPerSec=12.004951581639782, CurrSamplesPerSec=11.992386465886547, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 01:58:51,924] [INFO] [timer.py:197:stop] 0/4044, RunningAvgSamplesPerSec=12.00488212558153, CurrSamplesPerSec=11.7306238879994, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 01:58:58,379] [INFO] [timer.py:197:stop] 0/4045, RunningAvgSamplesPerSec=12.00486888783137, CurrSamplesPerSec=11.951599388287162, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 01:59:04,887] [INFO] [timer.py:197:stop] 0/4046, RunningAvgSamplesPerSec=12.004850851061196, CurrSamplesPerSec=11.932368586810073, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 01:59:11,384] [INFO] [timer.py:197:stop] 0/4047, RunningAvgSamplesPerSec=12.004832673018164, CurrSamplesPerSec=11.931768190498323, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 01:59:17,873] [INFO] [timer.py:197:stop] 0/4048, RunningAvgSamplesPerSec=12.004806619587603, CurrSamplesPerSec=11.900337815896387, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 01:59:24,377] [INFO] [timer.py:197:stop] 0/4049, RunningAvgSamplesPerSec=12.004778556500323, CurrSamplesPerSec=11.89229941528095, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 01:59:30,905] [INFO] [logging.py:68:log_dist] [Rank 0] step=4050, skipped=8, lr=[2.1311111111111112e-06], mom=[[0.9, 0.999]] +[2022-12-20 01:59:30,905] [INFO] [timer.py:197:stop] 0/4050, RunningAvgSamplesPerSec=12.004748330898336, CurrSamplesPerSec=11.883659465857564, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +{'loss': 0.0, 'learning_rate': 2.1311111111111112e-06, 'epoch': 106.58} +[2022-12-20 01:59:37,322] [INFO] [timer.py:197:stop] 0/4051, RunningAvgSamplesPerSec=12.004750673713325, CurrSamplesPerSec=12.01424188868297, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 01:59:44,009] [INFO] [timer.py:197:stop] 0/4052, RunningAvgSamplesPerSec=12.004748111062568, CurrSamplesPerSec=11.994380901140705, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 01:59:50,423] [INFO] [timer.py:197:stop] 0/4053, RunningAvgSamplesPerSec=12.004723249165462, CurrSamplesPerSec=11.904870298180716, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 01:59:56,857] [INFO] [timer.py:197:stop] 0/4054, RunningAvgSamplesPerSec=12.004710278453896, CurrSamplesPerSec=11.952394965393616, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:00:03,249] [INFO] [timer.py:197:stop] 0/4055, RunningAvgSamplesPerSec=12.004714787175473, CurrSamplesPerSec=12.023011979411654, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:00:09,897] [INFO] [timer.py:197:stop] 0/4056, RunningAvgSamplesPerSec=12.004691163613824, CurrSamplesPerSec=11.90970265879606, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:00:16,364] [INFO] [timer.py:197:stop] 0/4057, RunningAvgSamplesPerSec=12.004673678084755, CurrSamplesPerSec=11.934203564485603, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:00:22,862] [INFO] [timer.py:197:stop] 0/4058, RunningAvgSamplesPerSec=12.004649512609479, CurrSamplesPerSec=11.907452104563784, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:00:29,345] [INFO] [timer.py:197:stop] 0/4059, RunningAvgSamplesPerSec=12.004651425205687, CurrSamplesPerSec=12.01241393284605, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:00:35,785] [INFO] [logging.py:68:log_dist] [Rank 0] step=4060, skipped=8, lr=[2.108888888888889e-06], mom=[[0.9, 0.999]] +[2022-12-20 02:00:35,786] [INFO] [timer.py:197:stop] 0/4060, RunningAvgSamplesPerSec=12.004635725352784, CurrSamplesPerSec=11.941277670722814, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:00:42,194] [INFO] [timer.py:197:stop] 0/4061, RunningAvgSamplesPerSec=12.004620856216306, CurrSamplesPerSec=11.944583739962134, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:00:48,633] [INFO] [timer.py:197:stop] 0/4062, RunningAvgSamplesPerSec=12.004600304720528, CurrSamplesPerSec=11.921757589223835, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:00:55,080] [INFO] [timer.py:197:stop] 0/4063, RunningAvgSamplesPerSec=12.004605349110683, CurrSamplesPerSec=12.025120581379895, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:01:01,498] [INFO] [timer.py:197:stop] 0/4064, RunningAvgSamplesPerSec=12.004613900266525, CurrSamplesPerSec=12.039440914492733, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:01:07,950] [INFO] [timer.py:197:stop] 0/4065, RunningAvgSamplesPerSec=12.004622248761946, CurrSamplesPerSec=12.038629928341493, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:01:12,616] [INFO] [timer.py:197:stop] 0/4066, RunningAvgSamplesPerSec=12.005431747897077, CurrSamplesPerSec=16.535892222399564, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:01:19,054] [INFO] [timer.py:197:stop] 0/4067, RunningAvgSamplesPerSec=12.005428655433285, CurrSamplesPerSec=11.992874028505584, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:01:25,559] [INFO] [timer.py:197:stop] 0/4068, RunningAvgSamplesPerSec=12.005409456307794, CurrSamplesPerSec=11.927869206771717, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:01:32,130] [INFO] [timer.py:197:stop] 0/4069, RunningAvgSamplesPerSec=12.005368401765665, CurrSamplesPerSec=11.840730393641255, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:01:38,560] [INFO] [logging.py:68:log_dist] [Rank 0] step=4070, skipped=8, lr=[2.086666666666667e-06], mom=[[0.9, 0.999]] +[2022-12-20 02:01:38,561] [INFO] [timer.py:197:stop] 0/4070, RunningAvgSamplesPerSec=12.005372236558813, CurrSamplesPerSec=12.020988632441169, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:01:44,989] [INFO] [timer.py:197:stop] 0/4071, RunningAvgSamplesPerSec=12.005375468021272, CurrSamplesPerSec=12.018535470726329, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:01:51,540] [INFO] [timer.py:197:stop] 0/4072, RunningAvgSamplesPerSec=12.005326109375579, CurrSamplesPerSec=11.807791206706398, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:01:58,017] [INFO] [timer.py:197:stop] 0/4073, RunningAvgSamplesPerSec=12.005318813459724, CurrSamplesPerSec=11.975697719778562, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:02:04,428] [INFO] [timer.py:197:stop] 0/4074, RunningAvgSamplesPerSec=12.00531613516777, CurrSamplesPerSec=11.994422704570054, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:02:10,920] [INFO] [timer.py:197:stop] 0/4075, RunningAvgSamplesPerSec=12.005290832582757, CurrSamplesPerSec=11.903135640708296, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +{'loss': 0.0, 'learning_rate': 2.0755555555555557e-06, 'epoch': 107.24} +[2022-12-20 02:02:17,702] [INFO] [timer.py:197:stop] 0/4076, RunningAvgSamplesPerSec=12.005272888903322, CurrSamplesPerSec=11.932630616152727, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:02:24,153] [INFO] [timer.py:197:stop] 0/4077, RunningAvgSamplesPerSec=12.005270655512, CurrSamplesPerSec=11.996178711767175, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:02:30,563] [INFO] [timer.py:197:stop] 0/4078, RunningAvgSamplesPerSec=12.005254015499967, CurrSamplesPerSec=11.93782690160385, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:02:36,981] [INFO] [timer.py:197:stop] 0/4079, RunningAvgSamplesPerSec=12.00525628106804, CurrSamplesPerSec=12.014497846896077, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:02:43,420] [INFO] [logging.py:68:log_dist] [Rank 0] step=4080, skipped=8, lr=[2.064444444444445e-06], mom=[[0.9, 0.999]] +[2022-12-20 02:02:43,420] [INFO] [timer.py:197:stop] 0/4080, RunningAvgSamplesPerSec=12.005234906827573, CurrSamplesPerSec=11.91872026901847, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:02:49,931] [INFO] [timer.py:197:stop] 0/4081, RunningAvgSamplesPerSec=12.005217656763193, CurrSamplesPerSec=11.935281790841895, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:02:56,359] [INFO] [timer.py:197:stop] 0/4082, RunningAvgSamplesPerSec=12.005220382119191, CurrSamplesPerSec=12.016347415297842, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:03:02,832] [INFO] [timer.py:197:stop] 0/4083, RunningAvgSamplesPerSec=12.005218269621182, CurrSamplesPerSec=11.99660546271307, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:03:09,219] [INFO] [timer.py:197:stop] 0/4084, RunningAvgSamplesPerSec=12.005203035225097, CurrSamplesPerSec=11.9433518531685, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:03:15,710] [INFO] [timer.py:197:stop] 0/4085, RunningAvgSamplesPerSec=12.005187736926668, CurrSamplesPerSec=11.943063315902354, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:03:22,169] [INFO] [timer.py:197:stop] 0/4086, RunningAvgSamplesPerSec=12.005161207462509, CurrSamplesPerSec=11.897810245507076, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:03:28,793] [INFO] [timer.py:197:stop] 0/4087, RunningAvgSamplesPerSec=12.005160584244793, CurrSamplesPerSec=12.002615902724761, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:03:35,401] [INFO] [timer.py:197:stop] 0/4088, RunningAvgSamplesPerSec=12.005135814137892, CurrSamplesPerSec=11.904795854839925, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:03:41,904] [INFO] [timer.py:197:stop] 0/4089, RunningAvgSamplesPerSec=12.005110873391025, CurrSamplesPerSec=11.904060974799629, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:03:48,391] [INFO] [logging.py:68:log_dist] [Rank 0] step=4090, skipped=8, lr=[2.0422222222222225e-06], mom=[[0.9, 0.999]] +[2022-12-20 02:03:48,392] [INFO] [timer.py:197:stop] 0/4090, RunningAvgSamplesPerSec=12.005087359031485, CurrSamplesPerSec=11.909747572877562, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:03:54,898] [INFO] [timer.py:197:stop] 0/4091, RunningAvgSamplesPerSec=12.005065513430397, CurrSamplesPerSec=11.916420282974936, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:04:01,334] [INFO] [timer.py:197:stop] 0/4092, RunningAvgSamplesPerSec=12.005047739550236, CurrSamplesPerSec=11.932807784212917, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:04:07,818] [INFO] [timer.py:197:stop] 0/4093, RunningAvgSamplesPerSec=12.005022085351971, CurrSamplesPerSec=11.9010057548659, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:04:14,257] [INFO] [timer.py:197:stop] 0/4094, RunningAvgSamplesPerSec=12.005016135916696, CurrSamplesPerSec=11.980726253891437, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:04:20,788] [INFO] [timer.py:197:stop] 0/4095, RunningAvgSamplesPerSec=12.004973574878123, CurrSamplesPerSec=11.833304863388125, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:04:27,216] [INFO] [timer.py:197:stop] 0/4096, RunningAvgSamplesPerSec=12.004972594445022, CurrSamplesPerSec=12.000961023040679, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:04:33,697] [INFO] [timer.py:197:stop] 0/4097, RunningAvgSamplesPerSec=12.00494810114184, CurrSamplesPerSec=11.905503368182638, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:04:40,173] [INFO] [timer.py:197:stop] 0/4098, RunningAvgSamplesPerSec=12.004939086080991, CurrSamplesPerSec=11.968135614772168, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:04:46,637] [INFO] [timer.py:197:stop] 0/4099, RunningAvgSamplesPerSec=12.004922129654753, CurrSamplesPerSec=11.935868211271112, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:04:53,076] [INFO] [logging.py:68:log_dist] [Rank 0] step=4100, skipped=8, lr=[2.02e-06], mom=[[0.9, 0.999]] +[2022-12-20 02:04:53,077] [INFO] [timer.py:197:stop] 0/4100, RunningAvgSamplesPerSec=12.004907089587013, CurrSamplesPerSec=11.943602674052569, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +{'loss': 0.0, 'learning_rate': 2.02e-06, 'epoch': 107.89} +[2022-12-20 02:04:59,888] [INFO] [timer.py:197:stop] 0/4101, RunningAvgSamplesPerSec=12.004886612530674, CurrSamplesPerSec=11.92155427652141, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:05:06,446] [INFO] [timer.py:197:stop] 0/4102, RunningAvgSamplesPerSec=12.00487247348239, CurrSamplesPerSec=11.947195032047544, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:05:12,900] [INFO] [timer.py:197:stop] 0/4103, RunningAvgSamplesPerSec=12.00487115741532, CurrSamplesPerSec=11.9994777072313, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:05:17,600] [INFO] [timer.py:197:stop] 0/4104, RunningAvgSamplesPerSec=12.00565576975593, CurrSamplesPerSec=16.401896527357152, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:05:24,066] [INFO] [timer.py:197:stop] 0/4105, RunningAvgSamplesPerSec=12.00564854107965, CurrSamplesPerSec=11.976069584087316, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:05:30,490] [INFO] [timer.py:197:stop] 0/4106, RunningAvgSamplesPerSec=12.005649793778522, CurrSamplesPerSec=12.010791819171793, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:05:36,882] [INFO] [timer.py:197:stop] 0/4107, RunningAvgSamplesPerSec=12.005643656929513, CurrSamplesPerSec=11.98051076561553, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:05:43,365] [INFO] [timer.py:197:stop] 0/4108, RunningAvgSamplesPerSec=12.005614688531502, CurrSamplesPerSec=11.88786599627486, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:05:49,858] [INFO] [timer.py:197:stop] 0/4109, RunningAvgSamplesPerSec=12.005583483985756, CurrSamplesPerSec=11.878810889837741, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:05:56,358] [INFO] [logging.py:68:log_dist] [Rank 0] step=4110, skipped=8, lr=[1.9977777777777778e-06], mom=[[0.9, 0.999]] +[2022-12-20 02:05:56,358] [INFO] [timer.py:197:stop] 0/4110, RunningAvgSamplesPerSec=12.00553984601997, CurrSamplesPerSec=11.828955449947257, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:06:03,004] [INFO] [timer.py:197:stop] 0/4111, RunningAvgSamplesPerSec=12.005535575361783, CurrSamplesPerSec=11.988017317449492, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:06:09,487] [INFO] [timer.py:197:stop] 0/4112, RunningAvgSamplesPerSec=12.005500372923317, CurrSamplesPerSec=11.862575979618155, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:06:15,914] [INFO] [timer.py:197:stop] 0/4113, RunningAvgSamplesPerSec=12.005484831330945, CurrSamplesPerSec=11.94194702586478, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:06:22,413] [INFO] [timer.py:197:stop] 0/4114, RunningAvgSamplesPerSec=12.00547590647529, CurrSamplesPerSec=11.968897639357598, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:06:28,888] [INFO] [timer.py:197:stop] 0/4115, RunningAvgSamplesPerSec=12.0054556588735, CurrSamplesPerSec=11.922771079408802, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:06:35,645] [INFO] [timer.py:197:stop] 0/4116, RunningAvgSamplesPerSec=12.00542999748324, CurrSamplesPerSec=11.900804732170613, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:06:42,113] [INFO] [timer.py:197:stop] 0/4117, RunningAvgSamplesPerSec=12.005399923317514, CurrSamplesPerSec=11.88293718740582, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:06:48,547] [INFO] [timer.py:197:stop] 0/4118, RunningAvgSamplesPerSec=12.005398157006958, CurrSamplesPerSec=11.998134187931042, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:06:55,081] [INFO] [timer.py:197:stop] 0/4119, RunningAvgSamplesPerSec=12.005393710324475, CurrSamplesPerSec=11.98711903222247, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:07:01,496] [INFO] [logging.py:68:log_dist] [Rank 0] step=4120, skipped=8, lr=[1.975555555555556e-06], mom=[[0.9, 0.999]] +[2022-12-20 02:07:01,497] [INFO] [timer.py:197:stop] 0/4120, RunningAvgSamplesPerSec=12.005374409281368, CurrSamplesPerSec=11.926434636331555, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:07:08,022] [INFO] [timer.py:197:stop] 0/4121, RunningAvgSamplesPerSec=12.00534861231883, CurrSamplesPerSec=11.900048715485385, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:07:14,457] [INFO] [timer.py:197:stop] 0/4122, RunningAvgSamplesPerSec=12.005356716330548, CurrSamplesPerSec=12.038830234999756, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:07:20,868] [INFO] [timer.py:197:stop] 0/4123, RunningAvgSamplesPerSec=12.005348712703578, CurrSamplesPerSec=11.972464115227329, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:07:27,508] [INFO] [timer.py:197:stop] 0/4124, RunningAvgSamplesPerSec=12.005319697080495, CurrSamplesPerSec=11.886925807570954, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:07:34,023] [INFO] [timer.py:197:stop] 0/4125, RunningAvgSamplesPerSec=12.005305391376513, CurrSamplesPerSec=11.946625575228143, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +{'loss': 0.0, 'learning_rate': 1.9644444444444446e-06, 'epoch': 108.55} +[2022-12-20 02:07:40,612] [INFO] [timer.py:197:stop] 0/4126, RunningAvgSamplesPerSec=12.005261351873127, CurrSamplesPerSec=11.826392457485241, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:07:47,097] [INFO] [timer.py:197:stop] 0/4127, RunningAvgSamplesPerSec=12.005239718675373, CurrSamplesPerSec=11.91668267038006, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:07:53,622] [INFO] [timer.py:197:stop] 0/4128, RunningAvgSamplesPerSec=12.005201240055372, CurrSamplesPerSec=11.84854858560964, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:08:00,156] [INFO] [timer.py:197:stop] 0/4129, RunningAvgSamplesPerSec=12.005174313682852, CurrSamplesPerSec=11.895095042505389, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:08:06,525] [INFO] [logging.py:68:log_dist] [Rank 0] step=4130, skipped=8, lr=[1.9533333333333334e-06], mom=[[0.9, 0.999]] +[2022-12-20 02:08:06,525] [INFO] [timer.py:197:stop] 0/4130, RunningAvgSamplesPerSec=12.005176487947674, CurrSamplesPerSec=12.014156392466042, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:08:13,192] [INFO] [timer.py:197:stop] 0/4131, RunningAvgSamplesPerSec=12.005147666261847, CurrSamplesPerSec=11.887339556504921, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:08:19,642] [INFO] [timer.py:197:stop] 0/4132, RunningAvgSamplesPerSec=12.005134594529334, CurrSamplesPerSec=11.951403038060324, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:08:26,141] [INFO] [timer.py:197:stop] 0/4133, RunningAvgSamplesPerSec=12.005113248217725, CurrSamplesPerSec=11.917595826302406, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:08:32,587] [INFO] [timer.py:197:stop] 0/4134, RunningAvgSamplesPerSec=12.005114515700834, CurrSamplesPerSec=12.010352773620122, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:08:39,070] [INFO] [timer.py:197:stop] 0/4135, RunningAvgSamplesPerSec=12.005089101086885, CurrSamplesPerSec=11.900986760304953, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:08:45,656] [INFO] [timer.py:197:stop] 0/4136, RunningAvgSamplesPerSec=12.00507220724615, CurrSamplesPerSec=11.935653802911114, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:08:52,166] [INFO] [timer.py:197:stop] 0/4137, RunningAvgSamplesPerSec=12.005070634480273, CurrSamplesPerSec=11.99857234060556, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:08:58,804] [INFO] [timer.py:197:stop] 0/4138, RunningAvgSamplesPerSec=12.005047725476063, CurrSamplesPerSec=11.911060798998822, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:09:05,281] [INFO] [timer.py:197:stop] 0/4139, RunningAvgSamplesPerSec=12.005026298407467, CurrSamplesPerSec=11.917053523898726, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:09:11,688] [INFO] [logging.py:68:log_dist] [Rank 0] step=4140, skipped=8, lr=[1.9311111111111114e-06], mom=[[0.9, 0.999]] +[2022-12-20 02:09:11,689] [INFO] [timer.py:197:stop] 0/4140, RunningAvgSamplesPerSec=12.005030248225935, CurrSamplesPerSec=12.021392924338416, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:09:18,215] [INFO] [timer.py:197:stop] 0/4141, RunningAvgSamplesPerSec=12.005010029302335, CurrSamplesPerSec=11.921923315384712, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:09:22,815] [INFO] [timer.py:197:stop] 0/4142, RunningAvgSamplesPerSec=12.005820278728365, CurrSamplesPerSec=16.65975431969967, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:09:29,295] [INFO] [timer.py:197:stop] 0/4143, RunningAvgSamplesPerSec=12.005798705202128, CurrSamplesPerSec=11.917143992381849, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:09:35,767] [INFO] [timer.py:197:stop] 0/4144, RunningAvgSamplesPerSec=12.005803422986082, CurrSamplesPerSec=12.025371616211052, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:09:42,234] [INFO] [timer.py:197:stop] 0/4145, RunningAvgSamplesPerSec=12.005804045124238, CurrSamplesPerSec=12.008381494718979, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:09:48,694] [INFO] [timer.py:197:stop] 0/4146, RunningAvgSamplesPerSec=12.00578364052215, CurrSamplesPerSec=11.921838598560687, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:09:55,128] [INFO] [timer.py:197:stop] 0/4147, RunningAvgSamplesPerSec=12.005763876840483, CurrSamplesPerSec=11.92441823610419, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:10:01,601] [INFO] [timer.py:197:stop] 0/4148, RunningAvgSamplesPerSec=12.005754035979608, CurrSamplesPerSec=11.96510181964091, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:10:08,027] [INFO] [timer.py:197:stop] 0/4149, RunningAvgSamplesPerSec=12.00575432021219, CurrSamplesPerSec=12.006932864212324, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:10:14,582] [INFO] [logging.py:68:log_dist] [Rank 0] step=4150, skipped=8, lr=[1.908888888888889e-06], mom=[[0.9, 0.999]] +[2022-12-20 02:10:14,583] [INFO] [timer.py:197:stop] 0/4150, RunningAvgSamplesPerSec=12.005733751068913, CurrSamplesPerSec=11.921035436992348, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +{'loss': 0.0, 'learning_rate': 1.908888888888889e-06, 'epoch': 109.21} +[2022-12-20 02:10:21,081] [INFO] [timer.py:197:stop] 0/4151, RunningAvgSamplesPerSec=12.005717376623844, CurrSamplesPerSec=11.938178366092723, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:10:27,532] [INFO] [timer.py:197:stop] 0/4152, RunningAvgSamplesPerSec=12.005708410606648, CurrSamplesPerSec=11.968623342019546, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:10:33,973] [INFO] [timer.py:197:stop] 0/4153, RunningAvgSamplesPerSec=12.005688630996236, CurrSamplesPerSec=11.924160804765659, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:10:40,493] [INFO] [timer.py:197:stop] 0/4154, RunningAvgSamplesPerSec=12.005663251341081, CurrSamplesPerSec=11.901228945498861, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:10:47,027] [INFO] [timer.py:197:stop] 0/4155, RunningAvgSamplesPerSec=12.00562971463284, CurrSamplesPerSec=11.867982158642292, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:10:53,572] [INFO] [timer.py:197:stop] 0/4156, RunningAvgSamplesPerSec=12.005599804853771, CurrSamplesPerSec=11.882656821831276, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:11:00,025] [INFO] [timer.py:197:stop] 0/4157, RunningAvgSamplesPerSec=12.005605204574294, CurrSamplesPerSec=12.028077639678774, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:11:06,513] [INFO] [timer.py:197:stop] 0/4158, RunningAvgSamplesPerSec=12.0055657261272, CurrSamplesPerSec=11.843744289038138, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:11:12,937] [INFO] [timer.py:197:stop] 0/4159, RunningAvgSamplesPerSec=12.005563577062011, CurrSamplesPerSec=11.996638703382914, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:11:19,433] [INFO] [logging.py:68:log_dist] [Rank 0] step=4160, skipped=8, lr=[1.8866666666666669e-06], mom=[[0.9, 0.999]] +[2022-12-20 02:11:19,434] [INFO] [timer.py:197:stop] 0/4160, RunningAvgSamplesPerSec=12.005532250888773, CurrSamplesPerSec=11.87670704007222, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:11:25,934] [INFO] [timer.py:197:stop] 0/4161, RunningAvgSamplesPerSec=12.005520061605454, CurrSamplesPerSec=11.955050138887843, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:11:32,369] [INFO] [timer.py:197:stop] 0/4162, RunningAvgSamplesPerSec=12.005516229736392, CurrSamplesPerSec=11.989600618588822, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:11:38,864] [INFO] [timer.py:197:stop] 0/4163, RunningAvgSamplesPerSec=12.00549486219098, CurrSamplesPerSec=11.917259328463233, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:11:45,436] [INFO] [timer.py:197:stop] 0/4164, RunningAvgSamplesPerSec=12.005483637157138, CurrSamplesPerSec=11.958957325571195, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:11:51,910] [INFO] [timer.py:197:stop] 0/4165, RunningAvgSamplesPerSec=12.0054628580285, CurrSamplesPerSec=11.919598804840618, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:11:58,432] [INFO] [timer.py:197:stop] 0/4166, RunningAvgSamplesPerSec=12.005435644395481, CurrSamplesPerSec=11.893204622496011, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:12:04,880] [INFO] [timer.py:197:stop] 0/4167, RunningAvgSamplesPerSec=12.005420328517161, CurrSamplesPerSec=11.941982089392882, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:12:11,420] [INFO] [timer.py:197:stop] 0/4168, RunningAvgSamplesPerSec=12.005397609669009, CurrSamplesPerSec=11.911513757423355, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:12:17,899] [INFO] [timer.py:197:stop] 0/4169, RunningAvgSamplesPerSec=12.005400096245003, CurrSamplesPerSec=12.015768120218443, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:12:24,439] [INFO] [logging.py:68:log_dist] [Rank 0] step=4170, skipped=8, lr=[1.8644444444444445e-06], mom=[[0.9, 0.999]] +[2022-12-20 02:12:24,440] [INFO] [timer.py:197:stop] 0/4170, RunningAvgSamplesPerSec=12.005393973702818, CurrSamplesPerSec=11.979935455242353, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:12:30,942] [INFO] [timer.py:197:stop] 0/4171, RunningAvgSamplesPerSec=12.005379670564212, CurrSamplesPerSec=11.946058830840917, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:12:37,472] [INFO] [timer.py:197:stop] 0/4172, RunningAvgSamplesPerSec=12.005349377424672, CurrSamplesPerSec=11.880372309306154, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:12:44,032] [INFO] [timer.py:197:stop] 0/4173, RunningAvgSamplesPerSec=12.005330678808065, CurrSamplesPerSec=11.927860726582713, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:12:50,474] [INFO] [timer.py:197:stop] 0/4174, RunningAvgSamplesPerSec=12.005314865554412, CurrSamplesPerSec=11.939718258088202, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:12:56,935] [INFO] [timer.py:197:stop] 0/4175, RunningAvgSamplesPerSec=12.005314495655735, CurrSamplesPerSec=12.003771476769264, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +{'loss': 0.0, 'learning_rate': 1.8533333333333333e-06, 'epoch': 109.87} +[2022-12-20 02:13:03,448] [INFO] [timer.py:197:stop] 0/4176, RunningAvgSamplesPerSec=12.005291787834931, CurrSamplesPerSec=11.911274324699097, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:13:09,988] [INFO] [timer.py:197:stop] 0/4177, RunningAvgSamplesPerSec=12.005237246012953, CurrSamplesPerSec=11.781817447646636, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:13:16,496] [INFO] [timer.py:197:stop] 0/4178, RunningAvgSamplesPerSec=12.00520156006416, CurrSamplesPerSec=11.85803949504323, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:13:23,030] [INFO] [timer.py:197:stop] 0/4179, RunningAvgSamplesPerSec=12.005189845570877, CurrSamplesPerSec=11.956468702041528, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:13:27,769] [INFO] [logging.py:68:log_dist] [Rank 0] step=4180, skipped=8, lr=[1.8422222222222225e-06], mom=[[0.9, 0.999]] +[2022-12-20 02:13:27,770] [INFO] [timer.py:197:stop] 0/4180, RunningAvgSamplesPerSec=12.005970691448994, CurrSamplesPerSec=16.48451590530319, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:13:34,256] [INFO] [timer.py:197:stop] 0/4181, RunningAvgSamplesPerSec=12.005944042617243, CurrSamplesPerSec=11.895628495236462, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:13:40,826] [INFO] [timer.py:197:stop] 0/4182, RunningAvgSamplesPerSec=12.005917739499289, CurrSamplesPerSec=11.896994499918009, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:13:47,342] [INFO] [timer.py:197:stop] 0/4183, RunningAvgSamplesPerSec=12.005898990661356, CurrSamplesPerSec=11.928037223002484, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:13:53,779] [INFO] [timer.py:197:stop] 0/4184, RunningAvgSamplesPerSec=12.005902761674303, CurrSamplesPerSec=12.021690104298571, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:14:00,256] [INFO] [timer.py:197:stop] 0/4185, RunningAvgSamplesPerSec=12.005904257589863, CurrSamplesPerSec=12.012163438712273, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:14:06,753] [INFO] [timer.py:197:stop] 0/4186, RunningAvgSamplesPerSec=12.005883310594642, CurrSamplesPerSec=11.918897024252507, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:14:13,126] [INFO] [timer.py:197:stop] 0/4187, RunningAvgSamplesPerSec=12.005884799532101, CurrSamplesPerSec=12.012117748848786, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:14:19,608] [INFO] [timer.py:197:stop] 0/4188, RunningAvgSamplesPerSec=12.005887673793456, CurrSamplesPerSec=12.017928524259892, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:14:26,107] [INFO] [timer.py:197:stop] 0/4189, RunningAvgSamplesPerSec=12.005859172256192, CurrSamplesPerSec=11.887725958744234, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:14:32,644] [INFO] [logging.py:68:log_dist] [Rank 0] step=4190, skipped=8, lr=[1.8200000000000002e-06], mom=[[0.9, 0.999]] +[2022-12-20 02:14:32,645] [INFO] [timer.py:197:stop] 0/4190, RunningAvgSamplesPerSec=12.005836609783046, CurrSamplesPerSec=11.912105245080493, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:14:39,091] [INFO] [timer.py:197:stop] 0/4191, RunningAvgSamplesPerSec=12.00581682168108, CurrSamplesPerSec=11.92351250841787, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:14:45,620] [INFO] [timer.py:197:stop] 0/4192, RunningAvgSamplesPerSec=12.0057888748922, CurrSamplesPerSec=11.88985056688803, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:14:52,101] [INFO] [timer.py:197:stop] 0/4193, RunningAvgSamplesPerSec=12.005789980110434, CurrSamplesPerSec=12.010422631842056, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:14:58,597] [INFO] [timer.py:197:stop] 0/4194, RunningAvgSamplesPerSec=12.005771584691177, CurrSamplesPerSec=11.929168408144093, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:15:04,987] [INFO] [timer.py:197:stop] 0/4195, RunningAvgSamplesPerSec=12.005765955982756, CurrSamplesPerSec=11.98221670386957, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:15:11,449] [INFO] [timer.py:197:stop] 0/4196, RunningAvgSamplesPerSec=12.005715287844234, CurrSamplesPerSec=11.796958790632166, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:15:17,850] [INFO] [timer.py:197:stop] 0/4197, RunningAvgSamplesPerSec=12.005710365714297, CurrSamplesPerSec=11.985102395913602, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:15:24,411] [INFO] [timer.py:197:stop] 0/4198, RunningAvgSamplesPerSec=12.005685293503413, CurrSamplesPerSec=11.901421012224922, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:15:30,889] [INFO] [timer.py:197:stop] 0/4199, RunningAvgSamplesPerSec=12.005677042770113, CurrSamplesPerSec=11.971156534351277, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:15:37,368] [INFO] [logging.py:68:log_dist] [Rank 0] step=4200, skipped=8, lr=[1.797777777777778e-06], mom=[[0.9, 0.999]] +[2022-12-20 02:15:37,369] [INFO] [timer.py:197:stop] 0/4200, RunningAvgSamplesPerSec=12.00564691567691, CurrSamplesPerSec=11.880521638259786, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +{'loss': 0.0, 'learning_rate': 1.797777777777778e-06, 'epoch': 110.53} +[2022-12-20 02:15:43,848] [INFO] [timer.py:197:stop] 0/4201, RunningAvgSamplesPerSec=12.005644735751423, CurrSamplesPerSec=11.996500380516807, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:15:50,267] [INFO] [timer.py:197:stop] 0/4202, RunningAvgSamplesPerSec=12.005650893730586, CurrSamplesPerSec=12.03156407243515, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:15:56,741] [INFO] [timer.py:197:stop] 0/4203, RunningAvgSamplesPerSec=12.005632708653149, CurrSamplesPerSec=11.929738322599913, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:16:03,184] [INFO] [timer.py:197:stop] 0/4204, RunningAvgSamplesPerSec=12.005615136316175, CurrSamplesPerSec=11.93224500206542, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:16:09,651] [INFO] [timer.py:197:stop] 0/4205, RunningAvgSamplesPerSec=12.005594940825983, CurrSamplesPerSec=11.921329264036865, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:16:16,124] [INFO] [timer.py:197:stop] 0/4206, RunningAvgSamplesPerSec=12.005571477172916, CurrSamplesPerSec=11.907757411577727, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:16:22,652] [INFO] [timer.py:197:stop] 0/4207, RunningAvgSamplesPerSec=12.00556191532752, CurrSamplesPerSec=11.96549809413098, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:16:29,161] [INFO] [timer.py:197:stop] 0/4208, RunningAvgSamplesPerSec=12.005540324050154, CurrSamplesPerSec=11.91543061512299, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:16:35,670] [INFO] [timer.py:197:stop] 0/4209, RunningAvgSamplesPerSec=12.00552395638011, CurrSamplesPerSec=11.937074136426293, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:16:42,224] [INFO] [logging.py:68:log_dist] [Rank 0] step=4210, skipped=8, lr=[1.7755555555555556e-06], mom=[[0.9, 0.999]] +[2022-12-20 02:16:42,224] [INFO] [timer.py:197:stop] 0/4210, RunningAvgSamplesPerSec=12.00550195515337, CurrSamplesPerSec=11.913651107676747, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:16:48,595] [INFO] [timer.py:197:stop] 0/4211, RunningAvgSamplesPerSec=12.005500550201676, CurrSamplesPerSec=11.99959142407173, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:16:55,033] [INFO] [timer.py:197:stop] 0/4212, RunningAvgSamplesPerSec=12.005505266198421, CurrSamplesPerSec=12.025387777610781, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:17:01,411] [INFO] [timer.py:197:stop] 0/4213, RunningAvgSamplesPerSec=12.005509951779842, CurrSamplesPerSec=12.025268722984798, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:17:07,897] [INFO] [timer.py:197:stop] 0/4214, RunningAvgSamplesPerSec=12.005502462841873, CurrSamplesPerSec=11.97404918578745, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:17:14,846] [INFO] [timer.py:197:stop] 0/4215, RunningAvgSamplesPerSec=12.005467415002984, CurrSamplesPerSec=11.859639471283693, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:17:21,916] [INFO] [timer.py:197:stop] 0/4216, RunningAvgSamplesPerSec=12.00543325055334, CurrSamplesPerSec=11.86320403500721, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:17:28,878] [INFO] [timer.py:197:stop] 0/4217, RunningAvgSamplesPerSec=12.00541793782895, CurrSamplesPerSec=11.941235174516962, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:17:33,878] [INFO] [timer.py:197:stop] 0/4218, RunningAvgSamplesPerSec=12.006177695011475, CurrSamplesPerSec=16.373793362328524, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:17:40,786] [INFO] [timer.py:197:stop] 0/4219, RunningAvgSamplesPerSec=12.006175003571903, CurrSamplesPerSec=11.994838610968456, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:17:47,275] [INFO] [logging.py:68:log_dist] [Rank 0] step=4220, skipped=8, lr=[1.7533333333333336e-06], mom=[[0.9, 0.999]] +[2022-12-20 02:17:47,275] [INFO] [timer.py:197:stop] 0/4220, RunningAvgSamplesPerSec=12.006158370429173, CurrSamplesPerSec=11.936423904431933, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:17:53,907] [INFO] [timer.py:197:stop] 0/4221, RunningAvgSamplesPerSec=12.006125258125355, CurrSamplesPerSec=11.868064012939609, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:18:00,652] [INFO] [timer.py:197:stop] 0/4222, RunningAvgSamplesPerSec=12.006072500129509, CurrSamplesPerSec=11.787538967512845, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:18:07,105] [INFO] [timer.py:197:stop] 0/4223, RunningAvgSamplesPerSec=12.006067508767341, CurrSamplesPerSec=11.985040858501348, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:18:13,619] [INFO] [timer.py:197:stop] 0/4224, RunningAvgSamplesPerSec=12.006064442588787, CurrSamplesPerSec=11.993136042816452, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:18:20,100] [INFO] [timer.py:197:stop] 0/4225, RunningAvgSamplesPerSec=12.006057330840525, CurrSamplesPerSec=11.976106451203746, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +{'loss': 0.0, 'learning_rate': 1.7422222222222224e-06, 'epoch': 111.18} +[2022-12-20 02:18:26,600] [INFO] [timer.py:197:stop] 0/4226, RunningAvgSamplesPerSec=12.00601898296052, CurrSamplesPerSec=11.846231685834594, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:18:33,187] [INFO] [timer.py:197:stop] 0/4227, RunningAvgSamplesPerSec=12.005985764735453, CurrSamplesPerSec=11.867293263048508, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:18:39,836] [INFO] [timer.py:197:stop] 0/4228, RunningAvgSamplesPerSec=12.005962766456971, CurrSamplesPerSec=11.909575316006835, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:18:46,427] [INFO] [timer.py:197:stop] 0/4229, RunningAvgSamplesPerSec=12.005944905991123, CurrSamplesPerSec=11.930938236793132, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:18:52,978] [INFO] [logging.py:68:log_dist] [Rank 0] step=4230, skipped=8, lr=[1.7311111111111112e-06], mom=[[0.9, 0.999]] +[2022-12-20 02:18:52,978] [INFO] [timer.py:197:stop] 0/4230, RunningAvgSamplesPerSec=12.005938339082828, CurrSamplesPerSec=11.978244063397595, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:18:59,498] [INFO] [timer.py:197:stop] 0/4231, RunningAvgSamplesPerSec=12.005919130232103, CurrSamplesPerSec=11.925249931818703, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:19:05,954] [INFO] [timer.py:197:stop] 0/4232, RunningAvgSamplesPerSec=12.005922664685736, CurrSamplesPerSec=12.020888505759995, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:19:12,450] [INFO] [timer.py:197:stop] 0/4233, RunningAvgSamplesPerSec=12.005903543548607, CurrSamplesPerSec=11.925562509882049, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:19:18,994] [INFO] [timer.py:197:stop] 0/4234, RunningAvgSamplesPerSec=12.005859404434728, CurrSamplesPerSec=11.821967943533338, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:19:25,502] [INFO] [timer.py:197:stop] 0/4235, RunningAvgSamplesPerSec=12.00584240446686, CurrSamplesPerSec=11.934327189804273, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:19:32,053] [INFO] [timer.py:197:stop] 0/4236, RunningAvgSamplesPerSec=12.00584271221334, CurrSamplesPerSec=12.007145544463533, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:19:38,654] [INFO] [timer.py:197:stop] 0/4237, RunningAvgSamplesPerSec=12.005839059339298, CurrSamplesPerSec=11.990392693794696, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:19:45,179] [INFO] [timer.py:197:stop] 0/4238, RunningAvgSamplesPerSec=12.005825305272568, CurrSamplesPerSec=11.947858137479695, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:19:51,596] [INFO] [timer.py:197:stop] 0/4239, RunningAvgSamplesPerSec=12.005831694517587, CurrSamplesPerSec=12.03295770122329, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:19:58,126] [INFO] [logging.py:68:log_dist] [Rank 0] step=4240, skipped=8, lr=[1.708888888888889e-06], mom=[[0.9, 0.999]] +[2022-12-20 02:19:58,127] [INFO] [timer.py:197:stop] 0/4240, RunningAvgSamplesPerSec=12.005806703671539, CurrSamplesPerSec=11.90084641348374, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:20:04,602] [INFO] [timer.py:197:stop] 0/4241, RunningAvgSamplesPerSec=12.005781878119093, CurrSamplesPerSec=11.901485387624604, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:20:11,146] [INFO] [timer.py:197:stop] 0/4242, RunningAvgSamplesPerSec=12.005776387884602, CurrSamplesPerSec=11.982548321926046, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:20:17,709] [INFO] [timer.py:197:stop] 0/4243, RunningAvgSamplesPerSec=12.005745649476115, CurrSamplesPerSec=11.876814763461603, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:20:24,196] [INFO] [timer.py:197:stop] 0/4244, RunningAvgSamplesPerSec=12.005750352560176, CurrSamplesPerSec=12.025729332018203, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:20:30,663] [INFO] [timer.py:197:stop] 0/4245, RunningAvgSamplesPerSec=12.005730073807479, CurrSamplesPerSec=11.920319724337718, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:20:37,070] [INFO] [timer.py:197:stop] 0/4246, RunningAvgSamplesPerSec=12.00573179332644, CurrSamplesPerSec=12.013032149770373, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:20:43,551] [INFO] [timer.py:197:stop] 0/4247, RunningAvgSamplesPerSec=12.005709699255872, CurrSamplesPerSec=11.912669300983318, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:20:50,201] [INFO] [timer.py:197:stop] 0/4248, RunningAvgSamplesPerSec=12.00568047357943, CurrSamplesPerSec=11.88288668906469, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:20:56,770] [INFO] [timer.py:197:stop] 0/4249, RunningAvgSamplesPerSec=12.005663258866347, CurrSamplesPerSec=11.93301201140848, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:21:03,264] [INFO] [logging.py:68:log_dist] [Rank 0] step=4250, skipped=8, lr=[1.6866666666666667e-06], mom=[[0.9, 0.999]] +[2022-12-20 02:21:03,265] [INFO] [timer.py:197:stop] 0/4250, RunningAvgSamplesPerSec=12.005646049021054, CurrSamplesPerSec=11.93299821922192, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +{'loss': 0.0, 'learning_rate': 1.6866666666666667e-06, 'epoch': 111.84} +[2022-12-20 02:21:09,730] [INFO] [timer.py:197:stop] 0/4251, RunningAvgSamplesPerSec=12.005632501182545, CurrSamplesPerSec=11.948355913737322, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:21:16,191] [INFO] [timer.py:197:stop] 0/4252, RunningAvgSamplesPerSec=12.005614477719764, CurrSamplesPerSec=11.9295183026591, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:21:22,707] [INFO] [timer.py:197:stop] 0/4253, RunningAvgSamplesPerSec=12.005556195286005, CurrSamplesPerSec=11.762864284028465, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:21:29,186] [INFO] [timer.py:197:stop] 0/4254, RunningAvgSamplesPerSec=12.005531744879333, CurrSamplesPerSec=11.90248540641641, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:21:35,733] [INFO] [timer.py:197:stop] 0/4255, RunningAvgSamplesPerSec=12.005509456080224, CurrSamplesPerSec=11.911479929699603, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:21:40,480] [INFO] [timer.py:197:stop] 0/4256, RunningAvgSamplesPerSec=12.00629059496938, CurrSamplesPerSec=16.599818922617402, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:21:46,910] [INFO] [timer.py:197:stop] 0/4257, RunningAvgSamplesPerSec=12.006269816527695, CurrSamplesPerSec=11.918524468442525, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:21:53,398] [INFO] [timer.py:197:stop] 0/4258, RunningAvgSamplesPerSec=12.006274288520219, CurrSamplesPerSec=12.025332829028939, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:22:00,037] [INFO] [timer.py:197:stop] 0/4259, RunningAvgSamplesPerSec=12.006260725247685, CurrSamplesPerSec=11.94881171320301, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:22:06,552] [INFO] [logging.py:68:log_dist] [Rank 0] step=4260, skipped=8, lr=[1.6644444444444447e-06], mom=[[0.9, 0.999]] +[2022-12-20 02:22:06,553] [INFO] [timer.py:197:stop] 0/4260, RunningAvgSamplesPerSec=12.00627451548505, CurrSamplesPerSec=12.065268074487886, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:22:13,158] [INFO] [timer.py:197:stop] 0/4261, RunningAvgSamplesPerSec=12.006256601742123, CurrSamplesPerSec=11.930461529654703, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:22:19,727] [INFO] [timer.py:197:stop] 0/4262, RunningAvgSamplesPerSec=12.006238646544537, CurrSamplesPerSec=11.930251557283288, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:22:26,192] [INFO] [timer.py:197:stop] 0/4263, RunningAvgSamplesPerSec=12.006235089403981, CurrSamplesPerSec=11.99110077658031, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:22:32,672] [INFO] [timer.py:197:stop] 0/4264, RunningAvgSamplesPerSec=12.006233437245314, CurrSamplesPerSec=11.99919771551863, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:22:39,088] [INFO] [timer.py:197:stop] 0/4265, RunningAvgSamplesPerSec=12.00622951731537, CurrSamplesPerSec=11.989545996569943, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:22:45,784] [INFO] [timer.py:197:stop] 0/4266, RunningAvgSamplesPerSec=12.006197444145837, CurrSamplesPerSec=11.871009419931905, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:22:52,477] [INFO] [timer.py:197:stop] 0/4267, RunningAvgSamplesPerSec=12.006175575090978, CurrSamplesPerSec=11.913644762687488, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:22:59,052] [INFO] [timer.py:197:stop] 0/4268, RunningAvgSamplesPerSec=12.006151569659092, CurrSamplesPerSec=11.904634300154065, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:23:05,587] [INFO] [timer.py:197:stop] 0/4269, RunningAvgSamplesPerSec=12.006118739223636, CurrSamplesPerSec=11.86767941249483, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:23:12,080] [INFO] [logging.py:68:log_dist] [Rank 0] step=4270, skipped=8, lr=[1.6422222222222223e-06], mom=[[0.9, 0.999]] +[2022-12-20 02:23:12,081] [INFO] [timer.py:197:stop] 0/4270, RunningAvgSamplesPerSec=12.00610000490313, CurrSamplesPerSec=11.926689516322877, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:23:18,529] [INFO] [timer.py:197:stop] 0/4271, RunningAvgSamplesPerSec=12.006099125758146, CurrSamplesPerSec=12.002348107521291, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:23:25,084] [INFO] [timer.py:197:stop] 0/4272, RunningAvgSamplesPerSec=12.00606348494076, CurrSamplesPerSec=11.855817323689873, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:23:31,581] [INFO] [timer.py:197:stop] 0/4273, RunningAvgSamplesPerSec=12.006042734570693, CurrSamplesPerSec=11.918087909240564, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:23:38,132] [INFO] [timer.py:197:stop] 0/4274, RunningAvgSamplesPerSec=12.006020716144759, CurrSamplesPerSec=11.91271106544682, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:23:44,749] [INFO] [timer.py:197:stop] 0/4275, RunningAvgSamplesPerSec=12.005991647341743, CurrSamplesPerSec=11.883081320448532, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +{'loss': 0.0, 'learning_rate': 1.6311111111111114e-06, 'epoch': 112.5} +[2022-12-20 02:23:51,417] [INFO] [timer.py:197:stop] 0/4276, RunningAvgSamplesPerSec=12.005958233287629, CurrSamplesPerSec=11.864858372237105, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:23:57,874] [INFO] [timer.py:197:stop] 0/4277, RunningAvgSamplesPerSec=12.005962261778707, CurrSamplesPerSec=12.023204765935002, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:24:04,396] [INFO] [timer.py:197:stop] 0/4278, RunningAvgSamplesPerSec=12.00593030511897, CurrSamplesPerSec=11.870852980596814, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:24:10,864] [INFO] [timer.py:197:stop] 0/4279, RunningAvgSamplesPerSec=12.005908835801216, CurrSamplesPerSec=11.914802835170978, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:24:17,379] [INFO] [logging.py:68:log_dist] [Rank 0] step=4280, skipped=8, lr=[1.6200000000000002e-06], mom=[[0.9, 0.999]] +[2022-12-20 02:24:17,380] [INFO] [timer.py:197:stop] 0/4280, RunningAvgSamplesPerSec=12.005892093836819, CurrSamplesPerSec=11.934711346624047, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:24:23,864] [INFO] [timer.py:197:stop] 0/4281, RunningAvgSamplesPerSec=12.00589811658156, CurrSamplesPerSec=12.031718844216499, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:24:30,489] [INFO] [timer.py:197:stop] 0/4282, RunningAvgSamplesPerSec=12.005876342498343, CurrSamplesPerSec=11.91342269232111, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:24:37,095] [INFO] [timer.py:197:stop] 0/4283, RunningAvgSamplesPerSec=12.005836870124053, CurrSamplesPerSec=11.83923994709388, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:24:43,539] [INFO] [timer.py:197:stop] 0/4284, RunningAvgSamplesPerSec=12.005807770432657, CurrSamplesPerSec=11.882511647108043, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:24:50,038] [INFO] [timer.py:197:stop] 0/4285, RunningAvgSamplesPerSec=12.005780145153667, CurrSamplesPerSec=11.888643106582842, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:24:56,525] [INFO] [timer.py:197:stop] 0/4286, RunningAvgSamplesPerSec=12.005760076110835, CurrSamplesPerSec=11.920415535916211, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:25:03,060] [INFO] [timer.py:197:stop] 0/4287, RunningAvgSamplesPerSec=12.00574226695231, CurrSamplesPerSec=11.929929720040555, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:25:09,712] [INFO] [timer.py:197:stop] 0/4288, RunningAvgSamplesPerSec=12.005727804516768, CurrSamplesPerSec=11.944074585966, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:25:16,173] [INFO] [timer.py:197:stop] 0/4289, RunningAvgSamplesPerSec=12.005717604781873, CurrSamplesPerSec=11.962160182381913, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:25:22,650] [INFO] [logging.py:68:log_dist] [Rank 0] step=4290, skipped=8, lr=[1.5977777777777778e-06], mom=[[0.9, 0.999]] +[2022-12-20 02:25:22,651] [INFO] [timer.py:197:stop] 0/4290, RunningAvgSamplesPerSec=12.005671929131472, CurrSamplesPerSec=11.813003554082636, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:25:29,027] [INFO] [timer.py:197:stop] 0/4291, RunningAvgSamplesPerSec=12.005654685453848, CurrSamplesPerSec=11.932166503213725, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:25:35,544] [INFO] [timer.py:197:stop] 0/4292, RunningAvgSamplesPerSec=12.005634185759938, CurrSamplesPerSec=11.91835037009049, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:25:42,064] [INFO] [timer.py:197:stop] 0/4293, RunningAvgSamplesPerSec=12.005607158143578, CurrSamplesPerSec=11.890768042599118, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:25:46,694] [INFO] [timer.py:197:stop] 0/4294, RunningAvgSamplesPerSec=12.006388698589964, CurrSamplesPerSec=16.660159635878365, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:25:53,121] [INFO] [timer.py:197:stop] 0/4295, RunningAvgSamplesPerSec=12.00636215512231, CurrSamplesPerSec=11.893508672363957, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:25:59,605] [INFO] [timer.py:197:stop] 0/4296, RunningAvgSamplesPerSec=12.006354658324156, CurrSamplesPerSec=11.974256963678677, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:26:06,067] [INFO] [timer.py:197:stop] 0/4297, RunningAvgSamplesPerSec=12.006357292820208, CurrSamplesPerSec=12.017680490202878, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:26:12,517] [INFO] [timer.py:197:stop] 0/4298, RunningAvgSamplesPerSec=12.006355276001157, CurrSamplesPerSec=11.997699284671329, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:26:19,076] [INFO] [timer.py:197:stop] 0/4299, RunningAvgSamplesPerSec=12.006316823661335, CurrSamplesPerSec=11.843368056054969, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:26:25,504] [INFO] [logging.py:68:log_dist] [Rank 0] step=4300, skipped=8, lr=[1.5755555555555558e-06], mom=[[0.9, 0.999]] +[2022-12-20 02:26:25,505] [INFO] [timer.py:197:stop] 0/4300, RunningAvgSamplesPerSec=12.006304426554891, CurrSamplesPerSec=11.953269424146708, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +{'loss': 0.0, 'learning_rate': 1.5755555555555558e-06, 'epoch': 113.16} +[2022-12-20 02:26:31,979] [INFO] [timer.py:197:stop] 0/4301, RunningAvgSamplesPerSec=12.006295156713673, CurrSamplesPerSec=11.966585183677754, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:26:38,366] [INFO] [timer.py:197:stop] 0/4302, RunningAvgSamplesPerSec=12.006296391882692, CurrSamplesPerSec=12.01160873352347, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:26:44,768] [INFO] [timer.py:197:stop] 0/4303, RunningAvgSamplesPerSec=12.00629343681623, CurrSamplesPerSec=11.99360008807251, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:26:51,193] [INFO] [timer.py:197:stop] 0/4304, RunningAvgSamplesPerSec=12.006294378717222, CurrSamplesPerSec=12.010346862577096, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:26:57,692] [INFO] [timer.py:197:stop] 0/4305, RunningAvgSamplesPerSec=12.006261558236119, CurrSamplesPerSec=11.866709364673811, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:27:04,240] [INFO] [timer.py:197:stop] 0/4306, RunningAvgSamplesPerSec=12.006220692219438, CurrSamplesPerSec=11.832913123506424, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:27:10,715] [INFO] [timer.py:197:stop] 0/4307, RunningAvgSamplesPerSec=12.00619176625617, CurrSamplesPerSec=11.88297243129404, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:27:17,238] [INFO] [timer.py:197:stop] 0/4308, RunningAvgSamplesPerSec=12.00616642725346, CurrSamplesPerSec=11.898064430855868, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:27:23,704] [INFO] [timer.py:197:stop] 0/4309, RunningAvgSamplesPerSec=12.006142591405725, CurrSamplesPerSec=11.904375610606268, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:27:30,164] [INFO] [logging.py:68:log_dist] [Rank 0] step=4310, skipped=8, lr=[1.5533333333333334e-06], mom=[[0.9, 0.999]] +[2022-12-20 02:27:30,164] [INFO] [timer.py:197:stop] 0/4310, RunningAvgSamplesPerSec=12.006143436228383, CurrSamplesPerSec=12.0097831907524, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:27:36,641] [INFO] [timer.py:197:stop] 0/4311, RunningAvgSamplesPerSec=12.006141128873699, CurrSamplesPerSec=11.996209269556847, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:27:43,165] [INFO] [timer.py:197:stop] 0/4312, RunningAvgSamplesPerSec=12.006103870322702, CurrSamplesPerSec=11.847675785538609, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:27:49,700] [INFO] [timer.py:197:stop] 0/4313, RunningAvgSamplesPerSec=12.006082177813742, CurrSamplesPerSec=11.913310074084876, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:27:56,245] [INFO] [timer.py:197:stop] 0/4314, RunningAvgSamplesPerSec=12.006046414082354, CurrSamplesPerSec=11.853824205835753, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:28:02,777] [INFO] [timer.py:197:stop] 0/4315, RunningAvgSamplesPerSec=12.006014468584434, CurrSamplesPerSec=11.869828352407744, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:28:09,259] [INFO] [timer.py:197:stop] 0/4316, RunningAvgSamplesPerSec=12.00600277306501, CurrSamplesPerSec=11.955771093258509, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:28:15,753] [INFO] [timer.py:197:stop] 0/4317, RunningAvgSamplesPerSec=12.005976615474173, CurrSamplesPerSec=11.894183750260096, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:28:22,212] [INFO] [timer.py:197:stop] 0/4318, RunningAvgSamplesPerSec=12.005959514453622, CurrSamplesPerSec=11.932619477019339, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:28:28,662] [INFO] [timer.py:197:stop] 0/4319, RunningAvgSamplesPerSec=12.005944762717588, CurrSamplesPerSec=11.942612205463744, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:28:35,193] [INFO] [logging.py:68:log_dist] [Rank 0] step=4320, skipped=8, lr=[1.5311111111111113e-06], mom=[[0.9, 0.999]] +[2022-12-20 02:28:35,193] [INFO] [timer.py:197:stop] 0/4320, RunningAvgSamplesPerSec=12.005922632685184, CurrSamplesPerSec=11.911141663029234, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:28:41,676] [INFO] [timer.py:197:stop] 0/4321, RunningAvgSamplesPerSec=12.005906508916318, CurrSamplesPerSec=11.93668558111663, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:28:48,181] [INFO] [timer.py:197:stop] 0/4322, RunningAvgSamplesPerSec=12.005901052609785, CurrSamplesPerSec=11.982381440867817, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:28:54,616] [INFO] [timer.py:197:stop] 0/4323, RunningAvgSamplesPerSec=12.005897081494746, CurrSamplesPerSec=11.988766348279826, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:29:01,112] [INFO] [timer.py:197:stop] 0/4324, RunningAvgSamplesPerSec=12.005872588665751, CurrSamplesPerSec=11.900964072436619, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:29:07,545] [INFO] [timer.py:197:stop] 0/4325, RunningAvgSamplesPerSec=12.005846388185985, CurrSamplesPerSec=11.89366623638313, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +{'loss': 0.0, 'learning_rate': 1.52e-06, 'epoch': 113.82} +[2022-12-20 02:29:14,027] [INFO] [timer.py:197:stop] 0/4326, RunningAvgSamplesPerSec=12.0058442659321, CurrSamplesPerSec=11.996676769537542, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:29:20,471] [INFO] [timer.py:197:stop] 0/4327, RunningAvgSamplesPerSec=12.005847561856209, CurrSamplesPerSec=12.020116079102873, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:29:26,923] [INFO] [timer.py:197:stop] 0/4328, RunningAvgSamplesPerSec=12.005825172726905, CurrSamplesPerSec=11.909767123818863, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:29:33,463] [INFO] [timer.py:197:stop] 0/4329, RunningAvgSamplesPerSec=12.005802625508082, CurrSamplesPerSec=11.909049593486557, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:29:39,932] [INFO] [logging.py:68:log_dist] [Rank 0] step=4330, skipped=8, lr=[1.5088888888888889e-06], mom=[[0.9, 0.999]] +[2022-12-20 02:29:39,933] [INFO] [timer.py:197:stop] 0/4330, RunningAvgSamplesPerSec=12.005795491637176, CurrSamplesPerSec=11.975006412741436, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:29:46,349] [INFO] [timer.py:197:stop] 0/4331, RunningAvgSamplesPerSec=12.005794593474944, CurrSamplesPerSec=12.001908605823537, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:29:50,982] [INFO] [timer.py:197:stop] 0/4332, RunningAvgSamplesPerSec=12.006567376732368, CurrSamplesPerSec=16.64451035918809, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:29:57,384] [INFO] [timer.py:197:stop] 0/4333, RunningAvgSamplesPerSec=12.006550774055427, CurrSamplesPerSec=11.93508916032443, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:30:03,841] [INFO] [timer.py:197:stop] 0/4334, RunningAvgSamplesPerSec=12.006527610068078, CurrSamplesPerSec=11.90703589807646, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:30:10,318] [INFO] [timer.py:197:stop] 0/4335, RunningAvgSamplesPerSec=12.006514584590926, CurrSamplesPerSec=11.950352221715107, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:30:16,863] [INFO] [timer.py:197:stop] 0/4336, RunningAvgSamplesPerSec=12.00648481832809, CurrSamplesPerSec=11.878878700540895, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:30:23,329] [INFO] [timer.py:197:stop] 0/4337, RunningAvgSamplesPerSec=12.006459212205256, CurrSamplesPerSec=11.896498884961337, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:30:29,737] [INFO] [timer.py:197:stop] 0/4338, RunningAvgSamplesPerSec=12.006441244493315, CurrSamplesPerSec=11.92905337155222, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:30:36,181] [INFO] [timer.py:197:stop] 0/4339, RunningAvgSamplesPerSec=12.006426331887102, CurrSamplesPerSec=11.94211171998148, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:30:42,648] [INFO] [logging.py:68:log_dist] [Rank 0] step=4340, skipped=8, lr=[1.486666666666667e-06], mom=[[0.9, 0.999]] +[2022-12-20 02:30:42,649] [INFO] [timer.py:197:stop] 0/4340, RunningAvgSamplesPerSec=12.006432569253533, CurrSamplesPerSec=12.033545128328832, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:30:49,090] [INFO] [timer.py:197:stop] 0/4341, RunningAvgSamplesPerSec=12.00643638055112, CurrSamplesPerSec=12.022992593399461, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:30:55,580] [INFO] [timer.py:197:stop] 0/4342, RunningAvgSamplesPerSec=12.006420869489135, CurrSamplesPerSec=11.939493621509778, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:31:02,061] [INFO] [timer.py:197:stop] 0/4343, RunningAvgSamplesPerSec=12.00638089135416, CurrSamplesPerSec=11.835347964635325, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:31:08,657] [INFO] [timer.py:197:stop] 0/4344, RunningAvgSamplesPerSec=12.006353710424298, CurrSamplesPerSec=11.889509841300438, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:31:15,172] [INFO] [timer.py:197:stop] 0/4345, RunningAvgSamplesPerSec=12.006339875838819, CurrSamplesPerSec=11.946569217374764, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:31:21,640] [INFO] [timer.py:197:stop] 0/4346, RunningAvgSamplesPerSec=12.006345985081886, CurrSamplesPerSec=12.032937204343298, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:31:28,132] [INFO] [timer.py:197:stop] 0/4347, RunningAvgSamplesPerSec=12.006330780730988, CurrSamplesPerSec=11.940644508576156, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:31:34,565] [INFO] [timer.py:197:stop] 0/4348, RunningAvgSamplesPerSec=12.006334574771998, CurrSamplesPerSec=12.022842353924629, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:31:41,055] [INFO] [timer.py:197:stop] 0/4349, RunningAvgSamplesPerSec=12.0063041695869, CurrSamplesPerSec=11.875602068576967, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:31:47,804] [INFO] [logging.py:68:log_dist] [Rank 0] step=4350, skipped=8, lr=[1.4644444444444445e-06], mom=[[0.9, 0.999]] +[2022-12-20 02:31:47,805] [INFO] [timer.py:197:stop] 0/4350, RunningAvgSamplesPerSec=12.00627729549142, CurrSamplesPerSec=11.890581586588523, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +{'loss': 0.0, 'learning_rate': 1.4644444444444445e-06, 'epoch': 114.47} +[2022-12-20 02:31:54,806] [INFO] [timer.py:197:stop] 0/4351, RunningAvgSamplesPerSec=12.006249710424147, CurrSamplesPerSec=11.887496430679365, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:32:01,814] [INFO] [timer.py:197:stop] 0/4352, RunningAvgSamplesPerSec=12.00623609593982, CurrSamplesPerSec=11.947317331442903, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:32:08,670] [INFO] [timer.py:197:stop] 0/4353, RunningAvgSamplesPerSec=12.006203190641648, CurrSamplesPerSec=11.864751914463433, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:32:15,366] [INFO] [timer.py:197:stop] 0/4354, RunningAvgSamplesPerSec=12.006195976324598, CurrSamplesPerSec=11.974888353599404, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:32:22,221] [INFO] [timer.py:197:stop] 0/4355, RunningAvgSamplesPerSec=12.00617514740299, CurrSamplesPerSec=11.916207101716209, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:32:28,806] [INFO] [timer.py:197:stop] 0/4356, RunningAvgSamplesPerSec=12.006161079253998, CurrSamplesPerSec=11.945233266622708, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:32:35,412] [INFO] [timer.py:197:stop] 0/4357, RunningAvgSamplesPerSec=12.006134085426195, CurrSamplesPerSec=11.889742606898984, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:32:42,119] [INFO] [timer.py:197:stop] 0/4358, RunningAvgSamplesPerSec=12.006105177934597, CurrSamplesPerSec=11.88151971476434, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:32:48,642] [INFO] [timer.py:197:stop] 0/4359, RunningAvgSamplesPerSec=12.006094356259334, CurrSamplesPerSec=11.959139538620846, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:32:55,075] [INFO] [logging.py:68:log_dist] [Rank 0] step=4360, skipped=8, lr=[1.4422222222222223e-06], mom=[[0.9, 0.999]] +[2022-12-20 02:32:55,076] [INFO] [timer.py:197:stop] 0/4360, RunningAvgSamplesPerSec=12.006077298190796, CurrSamplesPerSec=11.932212647683857, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:33:01,578] [INFO] [timer.py:197:stop] 0/4361, RunningAvgSamplesPerSec=12.00607432415816, CurrSamplesPerSec=11.993127469569895, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:33:08,149] [INFO] [timer.py:197:stop] 0/4362, RunningAvgSamplesPerSec=12.006057199619379, CurrSamplesPerSec=11.931872672086417, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:33:14,787] [INFO] [timer.py:197:stop] 0/4363, RunningAvgSamplesPerSec=12.006038971586722, CurrSamplesPerSec=11.927087490026713, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:33:21,369] [INFO] [timer.py:197:stop] 0/4364, RunningAvgSamplesPerSec=12.006035580247664, CurrSamplesPerSec=11.991264150972054, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:33:27,813] [INFO] [timer.py:197:stop] 0/4365, RunningAvgSamplesPerSec=12.006033948977455, CurrSamplesPerSec=11.998922563979468, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:33:34,238] [INFO] [timer.py:197:stop] 0/4366, RunningAvgSamplesPerSec=12.006040674032398, CurrSamplesPerSec=12.035453988067015, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:33:40,666] [INFO] [timer.py:197:stop] 0/4367, RunningAvgSamplesPerSec=12.006014470642421, CurrSamplesPerSec=11.892741990649393, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:33:47,287] [INFO] [timer.py:197:stop] 0/4368, RunningAvgSamplesPerSec=12.00594274347164, CurrSamplesPerSec=11.700812602150643, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:33:53,835] [INFO] [timer.py:197:stop] 0/4369, RunningAvgSamplesPerSec=12.00594133860369, CurrSamplesPerSec=11.999810817840022, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:33:58,559] [INFO] [logging.py:68:log_dist] [Rank 0] step=4370, skipped=8, lr=[1.42e-06], mom=[[0.9, 0.999]] +[2022-12-20 02:33:58,560] [INFO] [timer.py:197:stop] 0/4370, RunningAvgSamplesPerSec=12.006707877637767, CurrSamplesPerSec=16.648658183133254, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:34:04,984] [INFO] [timer.py:197:stop] 0/4371, RunningAvgSamplesPerSec=12.00668984601082, CurrSamplesPerSec=11.92844111734467, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:34:11,466] [INFO] [timer.py:197:stop] 0/4372, RunningAvgSamplesPerSec=12.006692942718098, CurrSamplesPerSec=12.020237722987043, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:34:18,041] [INFO] [timer.py:197:stop] 0/4373, RunningAvgSamplesPerSec=12.006678954219849, CurrSamplesPerSec=11.94585894133351, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:34:24,719] [INFO] [timer.py:197:stop] 0/4374, RunningAvgSamplesPerSec=12.006659711100784, CurrSamplesPerSec=11.923133308918716, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:34:31,279] [INFO] [timer.py:197:stop] 0/4375, RunningAvgSamplesPerSec=12.006622199238135, CurrSamplesPerSec=11.844830795951774, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +{'loss': 0.0, 'learning_rate': 1.4088888888888892e-06, 'epoch': 115.13} +[2022-12-20 02:34:37,790] [INFO] [timer.py:197:stop] 0/4376, RunningAvgSamplesPerSec=12.00659814171682, CurrSamplesPerSec=11.902308609863617, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:34:44,254] [INFO] [timer.py:197:stop] 0/4377, RunningAvgSamplesPerSec=12.00657810601309, CurrSamplesPerSec=11.919577104504366, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:34:50,789] [INFO] [timer.py:197:stop] 0/4378, RunningAvgSamplesPerSec=12.006546742307107, CurrSamplesPerSec=11.870881328358363, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:34:57,345] [INFO] [timer.py:197:stop] 0/4379, RunningAvgSamplesPerSec=12.006525163465083, CurrSamplesPerSec=11.91283318853283, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:35:03,879] [INFO] [logging.py:68:log_dist] [Rank 0] step=4380, skipped=8, lr=[1.397777777777778e-06], mom=[[0.9, 0.999]] +[2022-12-20 02:35:03,880] [INFO] [timer.py:197:stop] 0/4380, RunningAvgSamplesPerSec=12.006506316843401, CurrSamplesPerSec=11.924577679605997, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:35:10,482] [INFO] [timer.py:197:stop] 0/4381, RunningAvgSamplesPerSec=12.00649427671514, CurrSamplesPerSec=11.954013054249044, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:35:17,097] [INFO] [timer.py:197:stop] 0/4382, RunningAvgSamplesPerSec=12.006498533990518, CurrSamplesPerSec=12.025170141074154, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:35:23,626] [INFO] [timer.py:197:stop] 0/4383, RunningAvgSamplesPerSec=12.006500395949697, CurrSamplesPerSec=12.014661321713891, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:35:30,239] [INFO] [timer.py:197:stop] 0/4384, RunningAvgSamplesPerSec=12.006481188061917, CurrSamplesPerSec=11.922917239544763, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:35:36,730] [INFO] [timer.py:197:stop] 0/4385, RunningAvgSamplesPerSec=12.006465072667945, CurrSamplesPerSec=11.936260428674812, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:35:43,157] [INFO] [timer.py:197:stop] 0/4386, RunningAvgSamplesPerSec=12.00644437477929, CurrSamplesPerSec=11.91640600012119, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:35:49,623] [INFO] [timer.py:197:stop] 0/4387, RunningAvgSamplesPerSec=12.006438304162378, CurrSamplesPerSec=11.979883594475771, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:35:56,171] [INFO] [timer.py:197:stop] 0/4388, RunningAvgSamplesPerSec=12.006439711342118, CurrSamplesPerSec=12.01261336804896, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:36:02,795] [INFO] [timer.py:197:stop] 0/4389, RunningAvgSamplesPerSec=12.00644079385828, CurrSamplesPerSec=12.01119058847955, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:36:09,408] [INFO] [logging.py:68:log_dist] [Rank 0] step=4390, skipped=8, lr=[1.3755555555555556e-06], mom=[[0.9, 0.999]] +[2022-12-20 02:36:09,409] [INFO] [timer.py:197:stop] 0/4390, RunningAvgSamplesPerSec=12.006433106634903, CurrSamplesPerSec=11.972803737875903, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:36:15,948] [INFO] [timer.py:197:stop] 0/4391, RunningAvgSamplesPerSec=12.006372129326905, CurrSamplesPerSec=11.744637918067994, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:36:22,464] [INFO] [timer.py:197:stop] 0/4392, RunningAvgSamplesPerSec=12.006338855384488, CurrSamplesPerSec=11.862054922367713, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:36:28,977] [INFO] [timer.py:197:stop] 0/4393, RunningAvgSamplesPerSec=12.006325325628929, CurrSamplesPerSec=11.947222150393078, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:36:35,596] [INFO] [timer.py:197:stop] 0/4394, RunningAvgSamplesPerSec=12.006293339812856, CurrSamplesPerSec=11.867467971573829, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:36:42,171] [INFO] [timer.py:197:stop] 0/4395, RunningAvgSamplesPerSec=12.006289712905412, CurrSamplesPerSec=11.990381446558326, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:36:48,721] [INFO] [timer.py:197:stop] 0/4396, RunningAvgSamplesPerSec=12.006294444704725, CurrSamplesPerSec=12.027117298413899, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:36:55,219] [INFO] [timer.py:197:stop] 0/4397, RunningAvgSamplesPerSec=12.006273490168342, CurrSamplesPerSec=11.914900144664356, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:37:01,692] [INFO] [timer.py:197:stop] 0/4398, RunningAvgSamplesPerSec=12.006254926073582, CurrSamplesPerSec=11.925216555807307, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:37:08,184] [INFO] [timer.py:197:stop] 0/4399, RunningAvgSamplesPerSec=12.006226761408383, CurrSamplesPerSec=11.88367893124057, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:37:14,747] [INFO] [logging.py:68:log_dist] [Rank 0] step=4400, skipped=8, lr=[1.3533333333333334e-06], mom=[[0.9, 0.999]] +[2022-12-20 02:37:14,747] [INFO] [timer.py:197:stop] 0/4400, RunningAvgSamplesPerSec=12.006207734228976, CurrSamplesPerSec=11.92312430587178, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +{'loss': 0.0, 'learning_rate': 1.3533333333333334e-06, 'epoch': 115.79} +[2022-12-20 02:37:21,324] [INFO] [timer.py:197:stop] 0/4401, RunningAvgSamplesPerSec=12.006189108588615, CurrSamplesPerSec=11.924828772037252, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:37:27,942] [INFO] [timer.py:197:stop] 0/4402, RunningAvgSamplesPerSec=12.006169811687124, CurrSamplesPerSec=11.921878838901975, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:37:34,507] [INFO] [timer.py:197:stop] 0/4403, RunningAvgSamplesPerSec=12.006171796389467, CurrSamplesPerSec=12.014910844486538, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:37:41,136] [INFO] [timer.py:197:stop] 0/4404, RunningAvgSamplesPerSec=12.006167193174607, CurrSamplesPerSec=11.98594257857603, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:37:47,641] [INFO] [timer.py:197:stop] 0/4405, RunningAvgSamplesPerSec=12.006163533102987, CurrSamplesPerSec=11.990073494713116, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:37:54,089] [INFO] [timer.py:197:stop] 0/4406, RunningAvgSamplesPerSec=12.006144307659065, CurrSamplesPerSec=11.92208745765753, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:38:00,555] [INFO] [timer.py:197:stop] 0/4407, RunningAvgSamplesPerSec=12.006119932982239, CurrSamplesPerSec=11.899725340611244, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:38:05,280] [INFO] [timer.py:197:stop] 0/4408, RunningAvgSamplesPerSec=12.006881235191226, CurrSamplesPerSec=16.660463635955136, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:38:11,772] [INFO] [timer.py:197:stop] 0/4409, RunningAvgSamplesPerSec=12.006855538043743, CurrSamplesPerSec=11.894691821946335, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:38:18,311] [INFO] [logging.py:68:log_dist] [Rank 0] step=4410, skipped=8, lr=[1.3311111111111113e-06], mom=[[0.9, 0.999]] +[2022-12-20 02:38:18,312] [INFO] [timer.py:197:stop] 0/4410, RunningAvgSamplesPerSec=12.00685380077625, CurrSamplesPerSec=11.999202542851252, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:38:24,857] [INFO] [timer.py:197:stop] 0/4411, RunningAvgSamplesPerSec=12.006852990409007, CurrSamplesPerSec=12.003281954237496, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:38:31,481] [INFO] [timer.py:197:stop] 0/4412, RunningAvgSamplesPerSec=12.006826968610214, CurrSamplesPerSec=11.893183018157615, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:38:37,949] [INFO] [timer.py:197:stop] 0/4413, RunningAvgSamplesPerSec=12.006830533316373, CurrSamplesPerSec=12.02257150156237, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:38:44,406] [INFO] [timer.py:197:stop] 0/4414, RunningAvgSamplesPerSec=12.006811367132377, CurrSamplesPerSec=11.922860575588459, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:38:50,926] [INFO] [timer.py:197:stop] 0/4415, RunningAvgSamplesPerSec=12.0067860243228, CurrSamplesPerSec=11.896005420033063, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:38:57,378] [INFO] [timer.py:197:stop] 0/4416, RunningAvgSamplesPerSec=12.006782114528892, CurrSamplesPerSec=11.989552958172126, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:39:03,967] [INFO] [timer.py:197:stop] 0/4417, RunningAvgSamplesPerSec=12.006782132251717, CurrSamplesPerSec=12.006860361303296, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:39:10,606] [INFO] [timer.py:197:stop] 0/4418, RunningAvgSamplesPerSec=12.006754711188213, CurrSamplesPerSec=11.886899488638573, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:39:17,103] [INFO] [timer.py:197:stop] 0/4419, RunningAvgSamplesPerSec=12.006753251026979, CurrSamplesPerSec=12.000308640788704, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:39:23,532] [INFO] [logging.py:68:log_dist] [Rank 0] step=4420, skipped=8, lr=[1.308888888888889e-06], mom=[[0.9, 0.999]] +[2022-12-20 02:39:23,533] [INFO] [timer.py:197:stop] 0/4420, RunningAvgSamplesPerSec=12.00673686269824, CurrSamplesPerSec=11.934783511437784, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:39:30,013] [INFO] [timer.py:197:stop] 0/4421, RunningAvgSamplesPerSec=12.00672582127184, CurrSamplesPerSec=11.958142229906972, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:39:36,508] [INFO] [timer.py:197:stop] 0/4422, RunningAvgSamplesPerSec=12.006701927329173, CurrSamplesPerSec=11.902035245234671, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:39:43,083] [INFO] [timer.py:197:stop] 0/4423, RunningAvgSamplesPerSec=12.006701331738133, CurrSamplesPerSec=12.004069396537114, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:39:49,725] [INFO] [timer.py:197:stop] 0/4424, RunningAvgSamplesPerSec=12.006671066853565, CurrSamplesPerSec=11.874344973520257, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:39:56,225] [INFO] [timer.py:197:stop] 0/4425, RunningAvgSamplesPerSec=12.006670668047965, CurrSamplesPerSec=12.004907408728355, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +{'loss': 0.0, 'learning_rate': 1.2977777777777779e-06, 'epoch': 116.45} +[2022-12-20 02:40:02,818] [INFO] [timer.py:197:stop] 0/4426, RunningAvgSamplesPerSec=12.006645100537673, CurrSamplesPerSec=11.894615397601886, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:40:09,249] [INFO] [timer.py:197:stop] 0/4427, RunningAvgSamplesPerSec=12.006623945094086, CurrSamplesPerSec=11.91375632973401, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:40:15,778] [INFO] [timer.py:197:stop] 0/4428, RunningAvgSamplesPerSec=12.006568296211082, CurrSamplesPerSec=11.765271926842647, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:40:22,321] [INFO] [timer.py:197:stop] 0/4429, RunningAvgSamplesPerSec=12.00654515654871, CurrSamplesPerSec=11.904995428446945, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:40:28,868] [INFO] [logging.py:68:log_dist] [Rank 0] step=4430, skipped=8, lr=[1.286666666666667e-06], mom=[[0.9, 0.999]] +[2022-12-20 02:40:28,868] [INFO] [timer.py:197:stop] 0/4430, RunningAvgSamplesPerSec=12.006522298243839, CurrSamplesPerSec=11.9061745275849, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:40:35,296] [INFO] [timer.py:197:stop] 0/4431, RunningAvgSamplesPerSec=12.006522457749995, CurrSamplesPerSec=12.007228792573176, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:40:41,745] [INFO] [timer.py:197:stop] 0/4432, RunningAvgSamplesPerSec=12.006501727549098, CurrSamplesPerSec=11.915384600360895, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:40:48,201] [INFO] [timer.py:197:stop] 0/4433, RunningAvgSamplesPerSec=12.006478531945925, CurrSamplesPerSec=11.904594176140597, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:40:54,652] [INFO] [timer.py:197:stop] 0/4434, RunningAvgSamplesPerSec=12.006477223077004, CurrSamplesPerSec=12.000680425599507, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:41:01,151] [INFO] [timer.py:197:stop] 0/4435, RunningAvgSamplesPerSec=12.006455899323647, CurrSamplesPerSec=11.91268727552665, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:41:07,678] [INFO] [timer.py:197:stop] 0/4436, RunningAvgSamplesPerSec=12.006435952030264, CurrSamplesPerSec=11.918656236230309, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:41:14,172] [INFO] [timer.py:197:stop] 0/4437, RunningAvgSamplesPerSec=12.006412269463, CurrSamplesPerSec=11.902314415040161, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:41:20,655] [INFO] [timer.py:197:stop] 0/4438, RunningAvgSamplesPerSec=12.006388767995103, CurrSamplesPerSec=11.903056996583157, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:41:27,145] [INFO] [timer.py:197:stop] 0/4439, RunningAvgSamplesPerSec=12.006384486311447, CurrSamplesPerSec=11.987420943822192, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:41:33,574] [INFO] [logging.py:68:log_dist] [Rank 0] step=4440, skipped=8, lr=[1.2644444444444445e-06], mom=[[0.9, 0.999]] +[2022-12-20 02:41:33,575] [INFO] [timer.py:197:stop] 0/4440, RunningAvgSamplesPerSec=12.006385508682811, CurrSamplesPerSec=12.010923485354805, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:41:40,088] [INFO] [timer.py:197:stop] 0/4441, RunningAvgSamplesPerSec=12.00636200660861, CurrSamplesPerSec=11.902958297037578, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:41:46,556] [INFO] [timer.py:197:stop] 0/4442, RunningAvgSamplesPerSec=12.006341780806963, CurrSamplesPerSec=11.917225997164483, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:41:53,039] [INFO] [timer.py:197:stop] 0/4443, RunningAvgSamplesPerSec=12.006319032232026, CurrSamplesPerSec=11.906158156943066, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:41:59,509] [INFO] [timer.py:197:stop] 0/4444, RunningAvgSamplesPerSec=12.006296423099014, CurrSamplesPerSec=11.906722177900726, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:42:05,982] [INFO] [timer.py:197:stop] 0/4445, RunningAvgSamplesPerSec=12.006283326878373, CurrSamplesPerSec=11.948390483073183, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:42:10,612] [INFO] [timer.py:197:stop] 0/4446, RunningAvgSamplesPerSec=12.007027459504016, CurrSamplesPerSec=16.569894515441206, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:42:17,044] [INFO] [timer.py:197:stop] 0/4447, RunningAvgSamplesPerSec=12.00702990149686, CurrSamplesPerSec=12.01789193724767, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:42:23,619] [INFO] [timer.py:197:stop] 0/4448, RunningAvgSamplesPerSec=12.006996915035675, CurrSamplesPerSec=11.862141412605926, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:42:30,130] [INFO] [timer.py:197:stop] 0/4449, RunningAvgSamplesPerSec=12.006970083666486, CurrSamplesPerSec=11.888851616878416, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:42:36,570] [INFO] [logging.py:68:log_dist] [Rank 0] step=4450, skipped=8, lr=[1.2422222222222224e-06], mom=[[0.9, 0.999]] +[2022-12-20 02:42:36,571] [INFO] [timer.py:197:stop] 0/4450, RunningAvgSamplesPerSec=12.006962118698233, CurrSamplesPerSec=11.971646109581751, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +{'loss': 0.0, 'learning_rate': 1.2422222222222224e-06, 'epoch': 117.11} +[2022-12-20 02:42:43,090] [INFO] [timer.py:197:stop] 0/4451, RunningAvgSamplesPerSec=12.00693558923738, CurrSamplesPerSec=11.890081239190325, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:42:49,616] [INFO] [timer.py:197:stop] 0/4452, RunningAvgSamplesPerSec=12.00691102104234, CurrSamplesPerSec=11.898593403234393, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:42:56,099] [INFO] [timer.py:197:stop] 0/4453, RunningAvgSamplesPerSec=12.00687082562642, CurrSamplesPerSec=11.830627361323057, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:43:02,571] [INFO] [timer.py:197:stop] 0/4454, RunningAvgSamplesPerSec=12.006852809118085, CurrSamplesPerSec=11.927193479512797, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:43:08,983] [INFO] [timer.py:197:stop] 0/4455, RunningAvgSamplesPerSec=12.006856907892507, CurrSamplesPerSec=12.025132432573967, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:43:15,404] [INFO] [timer.py:197:stop] 0/4456, RunningAvgSamplesPerSec=12.006855371055616, CurrSamplesPerSec=12.000015735646855, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:43:21,850] [INFO] [timer.py:197:stop] 0/4457, RunningAvgSamplesPerSec=12.006856410843637, CurrSamplesPerSec=12.011489414102883, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:43:28,333] [INFO] [timer.py:197:stop] 0/4458, RunningAvgSamplesPerSec=12.006837768141057, CurrSamplesPerSec=11.924355201477216, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:43:34,855] [INFO] [timer.py:197:stop] 0/4459, RunningAvgSamplesPerSec=12.00683605312155, CurrSamplesPerSec=11.999198788258878, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:43:41,298] [INFO] [logging.py:68:log_dist] [Rank 0] step=4460, skipped=8, lr=[1.2200000000000002e-06], mom=[[0.9, 0.999]] +[2022-12-20 02:43:41,299] [INFO] [timer.py:197:stop] 0/4460, RunningAvgSamplesPerSec=12.006812328653439, CurrSamplesPerSec=11.901995666454537, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:43:47,781] [INFO] [timer.py:197:stop] 0/4461, RunningAvgSamplesPerSec=12.006811971354606, CurrSamplesPerSec=12.00521934449069, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:43:54,255] [INFO] [timer.py:197:stop] 0/4462, RunningAvgSamplesPerSec=12.006788758044078, CurrSamplesPerSec=11.904165499615338, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:44:00,619] [INFO] [timer.py:197:stop] 0/4463, RunningAvgSamplesPerSec=12.006788971771314, CurrSamplesPerSec=12.007742270947455, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:44:07,057] [INFO] [timer.py:197:stop] 0/4464, RunningAvgSamplesPerSec=12.006787020717848, CurrSamplesPerSec=11.998089677271222, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:44:13,538] [INFO] [timer.py:197:stop] 0/4465, RunningAvgSamplesPerSec=12.006785054895978, CurrSamplesPerSec=11.99801996244365, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:44:19,991] [INFO] [timer.py:197:stop] 0/4466, RunningAvgSamplesPerSec=12.006784482383644, CurrSamplesPerSec=12.004229903586658, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:44:26,413] [INFO] [timer.py:197:stop] 0/4467, RunningAvgSamplesPerSec=12.006764809634259, CurrSamplesPerSec=11.919583455814115, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:44:32,899] [INFO] [timer.py:197:stop] 0/4468, RunningAvgSamplesPerSec=12.006749762352205, CurrSamplesPerSec=11.939937592264867, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:44:39,372] [INFO] [timer.py:197:stop] 0/4469, RunningAvgSamplesPerSec=12.006722709597812, CurrSamplesPerSec=11.887108990568798, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:44:45,885] [INFO] [logging.py:68:log_dist] [Rank 0] step=4470, skipped=8, lr=[1.1977777777777778e-06], mom=[[0.9, 0.999]] +[2022-12-20 02:44:45,885] [INFO] [timer.py:197:stop] 0/4470, RunningAvgSamplesPerSec=12.00671433499111, CurrSamplesPerSec=11.969421187268608, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:44:52,305] [INFO] [timer.py:197:stop] 0/4471, RunningAvgSamplesPerSec=12.006691654509767, CurrSamplesPerSec=11.906203572382818, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:44:58,795] [INFO] [timer.py:197:stop] 0/4472, RunningAvgSamplesPerSec=12.006691851801465, CurrSamplesPerSec=12.00757361316529, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:45:05,279] [INFO] [timer.py:197:stop] 0/4473, RunningAvgSamplesPerSec=12.006653243975826, CurrSamplesPerSec=11.836522180075248, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:45:11,753] [INFO] [timer.py:197:stop] 0/4474, RunningAvgSamplesPerSec=12.006625996463773, CurrSamplesPerSec=11.886026292544217, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:45:18,306] [INFO] [timer.py:197:stop] 0/4475, RunningAvgSamplesPerSec=12.006586057278847, CurrSamplesPerSec=11.830596598474019, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +{'loss': 0.0, 'learning_rate': 1.1866666666666668e-06, 'epoch': 117.76} +[2022-12-20 02:45:24,822] [INFO] [timer.py:197:stop] 0/4476, RunningAvgSamplesPerSec=12.0065472758059, CurrSamplesPerSec=11.835548869422286, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:45:31,344] [INFO] [timer.py:197:stop] 0/4477, RunningAvgSamplesPerSec=12.00652920507115, CurrSamplesPerSec=11.926221626483555, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:45:37,825] [INFO] [timer.py:197:stop] 0/4478, RunningAvgSamplesPerSec=12.006503827615942, CurrSamplesPerSec=11.894004038198814, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:45:44,259] [INFO] [timer.py:197:stop] 0/4479, RunningAvgSamplesPerSec=12.006506280733726, CurrSamplesPerSec=12.017496488905676, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:45:50,723] [INFO] [logging.py:68:log_dist] [Rank 0] step=4480, skipped=8, lr=[1.1755555555555556e-06], mom=[[0.9, 0.999]] +[2022-12-20 02:45:50,724] [INFO] [timer.py:197:stop] 0/4480, RunningAvgSamplesPerSec=12.006509422924955, CurrSamplesPerSec=12.020593518567528, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:45:57,153] [INFO] [timer.py:197:stop] 0/4481, RunningAvgSamplesPerSec=12.006509502353698, CurrSamplesPerSec=12.006865194803325, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:46:03,623] [INFO] [timer.py:197:stop] 0/4482, RunningAvgSamplesPerSec=12.006485076482551, CurrSamplesPerSec=11.898069704525824, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:46:10,066] [INFO] [timer.py:197:stop] 0/4483, RunningAvgSamplesPerSec=12.006469028865459, CurrSamplesPerSec=11.935003725639124, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:46:14,771] [INFO] [timer.py:197:stop] 0/4484, RunningAvgSamplesPerSec=12.007194218302322, CurrSamplesPerSec=16.462913262516587, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:46:21,208] [INFO] [timer.py:197:stop] 0/4485, RunningAvgSamplesPerSec=12.007199410811989, CurrSamplesPerSec=12.030517445150618, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:46:27,692] [INFO] [timer.py:197:stop] 0/4486, RunningAvgSamplesPerSec=12.00718328852615, CurrSamplesPerSec=11.93533963427729, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:46:34,724] [INFO] [timer.py:197:stop] 0/4487, RunningAvgSamplesPerSec=12.00715829670455, CurrSamplesPerSec=11.896131418403694, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:46:41,707] [INFO] [timer.py:197:stop] 0/4488, RunningAvgSamplesPerSec=12.007131002232816, CurrSamplesPerSec=11.885951032099165, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:46:48,582] [INFO] [timer.py:197:stop] 0/4489, RunningAvgSamplesPerSec=12.00709367060103, CurrSamplesPerSec=11.841928140636703, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:46:55,443] [INFO] [logging.py:68:log_dist] [Rank 0] step=4490, skipped=8, lr=[1.1533333333333334e-06], mom=[[0.9, 0.999]] +[2022-12-20 02:46:55,444] [INFO] [timer.py:197:stop] 0/4490, RunningAvgSamplesPerSec=12.007055617242601, CurrSamplesPerSec=11.838704752153465, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:47:02,246] [INFO] [timer.py:197:stop] 0/4491, RunningAvgSamplesPerSec=12.007034802020932, CurrSamplesPerSec=11.91433746391995, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:47:08,728] [INFO] [timer.py:197:stop] 0/4492, RunningAvgSamplesPerSec=12.007032115015528, CurrSamplesPerSec=11.99498225544606, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:47:15,203] [INFO] [timer.py:197:stop] 0/4493, RunningAvgSamplesPerSec=12.007023188383, CurrSamplesPerSec=11.967075985576317, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:47:21,745] [INFO] [timer.py:197:stop] 0/4494, RunningAvgSamplesPerSec=12.00699703471592, CurrSamplesPerSec=11.89067902755952, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:47:28,350] [INFO] [timer.py:197:stop] 0/4495, RunningAvgSamplesPerSec=12.006961125708228, CurrSamplesPerSec=11.84779657884007, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:47:35,000] [INFO] [timer.py:197:stop] 0/4496, RunningAvgSamplesPerSec=12.006942348726927, CurrSamplesPerSec=11.923166143675695, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:47:41,691] [INFO] [timer.py:197:stop] 0/4497, RunningAvgSamplesPerSec=12.006903784445774, CurrSamplesPerSec=11.836062381743195, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:47:48,433] [INFO] [timer.py:197:stop] 0/4498, RunningAvgSamplesPerSec=12.006880792973083, CurrSamplesPerSec=11.904416261076374, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:47:54,950] [INFO] [timer.py:197:stop] 0/4499, RunningAvgSamplesPerSec=12.006859517922253, CurrSamplesPerSec=11.911963050101694, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:48:01,428] [INFO] [logging.py:68:log_dist] [Rank 0] step=4500, skipped=8, lr=[1.131111111111111e-06], mom=[[0.9, 0.999]] +[2022-12-20 02:48:01,429] [INFO] [timer.py:197:stop] 0/4500, RunningAvgSamplesPerSec=12.006852767312127, CurrSamplesPerSec=11.97657185127812, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +{'loss': 0.0, 'learning_rate': 1.131111111111111e-06, 'epoch': 118.42} +[2022-12-20 02:48:07,847] [INFO] [timer.py:197:stop] 0/4501, RunningAvgSamplesPerSec=12.006841384621834, CurrSamplesPerSec=11.95585948803523, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:48:14,258] [INFO] [timer.py:197:stop] 0/4502, RunningAvgSamplesPerSec=12.006835953023023, CurrSamplesPerSec=11.98244883458178, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:48:20,873] [INFO] [timer.py:197:stop] 0/4503, RunningAvgSamplesPerSec=12.006810365847198, CurrSamplesPerSec=11.892762012694865, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:48:27,557] [INFO] [timer.py:197:stop] 0/4504, RunningAvgSamplesPerSec=12.00678210037201, CurrSamplesPerSec=11.880893399530617, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:48:34,113] [INFO] [timer.py:197:stop] 0/4505, RunningAvgSamplesPerSec=12.006777209301138, CurrSamplesPerSec=11.984797925671565, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:48:40,655] [INFO] [timer.py:197:stop] 0/4506, RunningAvgSamplesPerSec=12.00675859256329, CurrSamplesPerSec=11.923508801043445, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:48:47,113] [INFO] [timer.py:197:stop] 0/4507, RunningAvgSamplesPerSec=12.006735377368361, CurrSamplesPerSec=11.90307705337541, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:48:53,657] [INFO] [timer.py:197:stop] 0/4508, RunningAvgSamplesPerSec=12.006704324082513, CurrSamplesPerSec=11.868420826910981, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:49:00,182] [INFO] [timer.py:197:stop] 0/4509, RunningAvgSamplesPerSec=12.006709232471888, CurrSamplesPerSec=12.028867260699505, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:49:06,762] [INFO] [logging.py:68:log_dist] [Rank 0] step=4510, skipped=8, lr=[1.1088888888888889e-06], mom=[[0.9, 0.999]] +[2022-12-20 02:49:06,763] [INFO] [timer.py:197:stop] 0/4510, RunningAvgSamplesPerSec=12.006692157759657, CurrSamplesPerSec=11.93022663681067, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:49:13,255] [INFO] [timer.py:197:stop] 0/4511, RunningAvgSamplesPerSec=12.0066651311039, CurrSamplesPerSec=11.886053133912531, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:49:19,850] [INFO] [timer.py:197:stop] 0/4512, RunningAvgSamplesPerSec=12.006619698172113, CurrSamplesPerSec=11.805199996288273, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:49:26,276] [INFO] [timer.py:197:stop] 0/4513, RunningAvgSamplesPerSec=12.006612748656805, CurrSamplesPerSec=11.975352056283144, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:49:32,789] [INFO] [timer.py:197:stop] 0/4514, RunningAvgSamplesPerSec=12.006618374423198, CurrSamplesPerSec=12.032049972131375, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:49:39,367] [INFO] [timer.py:197:stop] 0/4515, RunningAvgSamplesPerSec=12.006604615356057, CurrSamplesPerSec=11.9448431169324, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:49:45,866] [INFO] [timer.py:197:stop] 0/4516, RunningAvgSamplesPerSec=12.00658222614206, CurrSamplesPerSec=11.906383125188396, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:49:52,331] [INFO] [timer.py:197:stop] 0/4517, RunningAvgSamplesPerSec=12.006568892089119, CurrSamplesPerSec=11.946679275186918, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:49:58,781] [INFO] [timer.py:197:stop] 0/4518, RunningAvgSamplesPerSec=12.006563225782589, CurrSamplesPerSec=11.981034260517905, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:50:05,252] [INFO] [timer.py:197:stop] 0/4519, RunningAvgSamplesPerSec=12.00654140907714, CurrSamplesPerSec=11.9088192412582, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:50:11,667] [INFO] [logging.py:68:log_dist] [Rank 0] step=4520, skipped=8, lr=[1.0866666666666667e-06], mom=[[0.9, 0.999]] +[2022-12-20 02:50:11,668] [INFO] [timer.py:197:stop] 0/4520, RunningAvgSamplesPerSec=12.00654406828836, CurrSamplesPerSec=12.018567756830047, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:50:18,280] [INFO] [timer.py:197:stop] 0/4521, RunningAvgSamplesPerSec=12.006525138792718, CurrSamplesPerSec=11.921606692414054, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:50:22,868] [INFO] [timer.py:197:stop] 0/4522, RunningAvgSamplesPerSec=12.00726219694435, CurrSamplesPerSec=16.617046367197382, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:50:29,392] [INFO] [timer.py:197:stop] 0/4523, RunningAvgSamplesPerSec=12.007242485192332, CurrSamplesPerSec=11.918801766289405, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:50:35,937] [INFO] [timer.py:197:stop] 0/4524, RunningAvgSamplesPerSec=12.007242851903326, CurrSamplesPerSec=12.008900981302723, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:50:42,570] [INFO] [timer.py:197:stop] 0/4525, RunningAvgSamplesPerSec=12.0072279378393, CurrSamplesPerSec=11.940163307760562, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +{'loss': 0.0, 'learning_rate': 1.0755555555555557e-06, 'epoch': 119.08} +[2022-12-20 02:50:49,140] [INFO] [timer.py:197:stop] 0/4526, RunningAvgSamplesPerSec=12.007211313247408, CurrSamplesPerSec=11.932486339463379, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:50:55,608] [INFO] [timer.py:197:stop] 0/4527, RunningAvgSamplesPerSec=12.007195183402365, CurrSamplesPerSec=11.934664652209609, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:51:02,132] [INFO] [timer.py:197:stop] 0/4528, RunningAvgSamplesPerSec=12.007176002578808, CurrSamplesPerSec=11.921005790383301, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:51:08,620] [INFO] [timer.py:197:stop] 0/4529, RunningAvgSamplesPerSec=12.007165609040534, CurrSamplesPerSec=11.960308071811896, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:51:15,222] [INFO] [logging.py:68:log_dist] [Rank 0] step=4530, skipped=8, lr=[1.0644444444444445e-06], mom=[[0.9, 0.999]] +[2022-12-20 02:51:15,222] [INFO] [timer.py:197:stop] 0/4530, RunningAvgSamplesPerSec=12.007137334533539, CurrSamplesPerSec=11.880489037956645, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:51:21,763] [INFO] [timer.py:197:stop] 0/4531, RunningAvgSamplesPerSec=12.007128013287748, CurrSamplesPerSec=11.965069286831199, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:51:28,281] [INFO] [timer.py:197:stop] 0/4532, RunningAvgSamplesPerSec=12.007099513965292, CurrSamplesPerSec=11.879399134033612, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:51:34,691] [INFO] [timer.py:197:stop] 0/4533, RunningAvgSamplesPerSec=12.007098873729236, CurrSamplesPerSec=12.004199304921661, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:51:41,118] [INFO] [timer.py:197:stop] 0/4534, RunningAvgSamplesPerSec=12.007074594510936, CurrSamplesPerSec=11.898064430855868, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:51:47,695] [INFO] [timer.py:197:stop] 0/4535, RunningAvgSamplesPerSec=12.007044330634653, CurrSamplesPerSec=11.871437811832845, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:51:54,254] [INFO] [timer.py:197:stop] 0/4536, RunningAvgSamplesPerSec=12.007017884047125, CurrSamplesPerSec=11.888320877784274, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:52:00,825] [INFO] [timer.py:197:stop] 0/4537, RunningAvgSamplesPerSec=12.006998650797906, CurrSamplesPerSec=11.920424005522104, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:52:07,313] [INFO] [timer.py:197:stop] 0/4538, RunningAvgSamplesPerSec=12.006994134091766, CurrSamplesPerSec=11.986545763221972, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:52:13,721] [INFO] [timer.py:197:stop] 0/4539, RunningAvgSamplesPerSec=12.006996058030902, CurrSamplesPerSec=12.015729394962602, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:52:20,197] [INFO] [logging.py:68:log_dist] [Rank 0] step=4540, skipped=8, lr=[1.0422222222222221e-06], mom=[[0.9, 0.999]] +[2022-12-20 02:52:20,198] [INFO] [timer.py:197:stop] 0/4540, RunningAvgSamplesPerSec=12.00697773543422, CurrSamplesPerSec=11.924419825221046, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:52:26,683] [INFO] [timer.py:197:stop] 0/4541, RunningAvgSamplesPerSec=12.006979627228047, CurrSamplesPerSec=12.015570731590282, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:52:33,220] [INFO] [timer.py:197:stop] 0/4542, RunningAvgSamplesPerSec=12.006942586695429, CurrSamplesPerSec=11.841137798920014, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:52:39,735] [INFO] [timer.py:197:stop] 0/4543, RunningAvgSamplesPerSec=12.006916977327094, CurrSamplesPerSec=11.891765734274253, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:52:46,458] [INFO] [timer.py:197:stop] 0/4544, RunningAvgSamplesPerSec=12.006893189048657, CurrSamplesPerSec=11.899834009489174, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:52:53,093] [INFO] [timer.py:197:stop] 0/4545, RunningAvgSamplesPerSec=12.006892894083292, CurrSamplesPerSec=12.005553310899092, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:52:59,523] [INFO] [timer.py:197:stop] 0/4546, RunningAvgSamplesPerSec=12.006891974373934, CurrSamplesPerSec=12.002715188552102, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:53:06,089] [INFO] [timer.py:197:stop] 0/4547, RunningAvgSamplesPerSec=12.006854748870806, CurrSamplesPerSec=11.8400524916115, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:53:12,756] [INFO] [timer.py:197:stop] 0/4548, RunningAvgSamplesPerSec=12.006831944810742, CurrSamplesPerSec=11.904074700175789, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:53:19,220] [INFO] [timer.py:197:stop] 0/4549, RunningAvgSamplesPerSec=12.006802303976286, CurrSamplesPerSec=11.873550824160635, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:53:25,686] [INFO] [logging.py:68:log_dist] [Rank 0] step=4550, skipped=8, lr=[1.02e-06], mom=[[0.9, 0.999]] +[2022-12-20 02:53:25,687] [INFO] [timer.py:197:stop] 0/4550, RunningAvgSamplesPerSec=12.006808356669554, CurrSamplesPerSec=12.034393195888887, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +{'loss': 0.0, 'learning_rate': 1.02e-06, 'epoch': 119.74} +[2022-12-20 02:53:32,295] [INFO] [timer.py:197:stop] 0/4551, RunningAvgSamplesPerSec=12.0068086401097, CurrSamplesPerSec=12.008097864343952, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:53:38,845] [INFO] [timer.py:197:stop] 0/4552, RunningAvgSamplesPerSec=12.006786858234491, CurrSamplesPerSec=11.908512294913061, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:53:45,411] [INFO] [timer.py:197:stop] 0/4553, RunningAvgSamplesPerSec=12.006759465961652, CurrSamplesPerSec=11.883405369545013, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:53:51,928] [INFO] [timer.py:197:stop] 0/4554, RunningAvgSamplesPerSec=12.006739783207395, CurrSamplesPerSec=11.917827047310144, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:53:58,394] [INFO] [timer.py:197:stop] 0/4555, RunningAvgSamplesPerSec=12.006728694541437, CurrSamplesPerSec=11.956464441588158, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:54:04,911] [INFO] [timer.py:197:stop] 0/4556, RunningAvgSamplesPerSec=12.006701123793741, CurrSamplesPerSec=11.882470620111771, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:54:11,461] [INFO] [timer.py:197:stop] 0/4557, RunningAvgSamplesPerSec=12.006664095249096, CurrSamplesPerSec=11.840372109091177, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:54:18,048] [INFO] [timer.py:197:stop] 0/4558, RunningAvgSamplesPerSec=12.006651106687757, CurrSamplesPerSec=11.947778369499446, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:54:24,726] [INFO] [timer.py:197:stop] 0/4559, RunningAvgSamplesPerSec=12.006631485925938, CurrSamplesPerSec=11.91790006633451, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:54:29,381] [INFO] [logging.py:68:log_dist] [Rank 0] step=4560, skipped=8, lr=[9.97777777777778e-07], mom=[[0.9, 0.999]] +[2022-12-20 02:54:29,382] [INFO] [timer.py:197:stop] 0/4560, RunningAvgSamplesPerSec=12.007344117780661, CurrSamplesPerSec=16.459069389542993, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:54:36,010] [INFO] [timer.py:197:stop] 0/4561, RunningAvgSamplesPerSec=12.00734261089808, CurrSamplesPerSec=12.00047816751331, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:54:42,570] [INFO] [timer.py:197:stop] 0/4562, RunningAvgSamplesPerSec=12.007347865406071, CurrSamplesPerSec=12.03135106551913, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:54:49,058] [INFO] [timer.py:197:stop] 0/4563, RunningAvgSamplesPerSec=12.007333352953518, CurrSamplesPerSec=11.94151937364594, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:54:55,636] [INFO] [timer.py:197:stop] 0/4564, RunningAvgSamplesPerSec=12.007310108551806, CurrSamplesPerSec=11.902220477424395, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:55:02,069] [INFO] [timer.py:197:stop] 0/4565, RunningAvgSamplesPerSec=12.007293197727606, CurrSamplesPerSec=11.930638633309854, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:55:08,459] [INFO] [timer.py:197:stop] 0/4566, RunningAvgSamplesPerSec=12.007298510899473, CurrSamplesPerSec=12.031591575143702, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:55:14,898] [INFO] [timer.py:197:stop] 0/4567, RunningAvgSamplesPerSec=12.007278571041075, CurrSamplesPerSec=11.916957766357438, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:55:21,406] [INFO] [timer.py:197:stop] 0/4568, RunningAvgSamplesPerSec=12.007253013704723, CurrSamplesPerSec=11.891706732009846, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:55:27,873] [INFO] [timer.py:197:stop] 0/4569, RunningAvgSamplesPerSec=12.007236537610227, CurrSamplesPerSec=11.932475200599354, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:55:34,425] [INFO] [logging.py:68:log_dist] [Rank 0] step=4570, skipped=8, lr=[9.755555555555556e-07], mom=[[0.9, 0.999]] +[2022-12-20 02:55:34,426] [INFO] [timer.py:197:stop] 0/4570, RunningAvgSamplesPerSec=12.007217839885337, CurrSamplesPerSec=11.922428464440369, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:55:40,906] [INFO] [timer.py:197:stop] 0/4571, RunningAvgSamplesPerSec=12.007200300747012, CurrSamplesPerSec=11.927612686388766, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:55:47,386] [INFO] [timer.py:197:stop] 0/4572, RunningAvgSamplesPerSec=12.007185959757596, CurrSamplesPerSec=11.942017684397168, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:55:53,850] [INFO] [timer.py:197:stop] 0/4573, RunningAvgSamplesPerSec=12.00718532582527, CurrSamplesPerSec=12.004288954082908, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:56:00,278] [INFO] [timer.py:197:stop] 0/4574, RunningAvgSamplesPerSec=12.007192352205754, CurrSamplesPerSec=12.039396096826657, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:56:06,763] [INFO] [timer.py:197:stop] 0/4575, RunningAvgSamplesPerSec=12.00717161895591, CurrSamplesPerSec=11.913121853053477, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +{'loss': 0.0, 'learning_rate': 9.644444444444444e-07, 'epoch': 120.39} +[2022-12-20 02:56:13,281] [INFO] [timer.py:197:stop] 0/4576, RunningAvgSamplesPerSec=12.007174330845183, CurrSamplesPerSec=12.01958862525755, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:56:19,727] [INFO] [timer.py:197:stop] 0/4577, RunningAvgSamplesPerSec=12.00717601046699, CurrSamplesPerSec=12.014863520405651, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:56:26,198] [INFO] [timer.py:197:stop] 0/4578, RunningAvgSamplesPerSec=12.007166960600811, CurrSamplesPerSec=11.965906129612874, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:56:32,650] [INFO] [timer.py:197:stop] 0/4579, RunningAvgSamplesPerSec=12.00717238976553, CurrSamplesPerSec=12.032067769424673, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:56:39,134] [INFO] [logging.py:68:log_dist] [Rank 0] step=4580, skipped=8, lr=[9.533333333333335e-07], mom=[[0.9, 0.999]] +[2022-12-20 02:56:39,135] [INFO] [timer.py:197:stop] 0/4580, RunningAvgSamplesPerSec=12.007160086725449, CurrSamplesPerSec=11.951111983334052, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:56:45,610] [INFO] [timer.py:197:stop] 0/4581, RunningAvgSamplesPerSec=12.007153797850167, CurrSamplesPerSec=11.97843220980157, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:56:52,131] [INFO] [timer.py:197:stop] 0/4582, RunningAvgSamplesPerSec=12.007132116753585, CurrSamplesPerSec=11.908668671796743, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:56:58,646] [INFO] [timer.py:197:stop] 0/4583, RunningAvgSamplesPerSec=12.00710126330106, CurrSamplesPerSec=11.86743649218033, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:57:05,084] [INFO] [timer.py:197:stop] 0/4584, RunningAvgSamplesPerSec=12.007107765597725, CurrSamplesPerSec=12.036968881729443, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:57:11,579] [INFO] [timer.py:197:stop] 0/4585, RunningAvgSamplesPerSec=12.00707870129704, CurrSamplesPerSec=11.875367229518936, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:57:18,119] [INFO] [timer.py:197:stop] 0/4586, RunningAvgSamplesPerSec=12.007049366050488, CurrSamplesPerSec=11.874094952079492, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:57:24,643] [INFO] [timer.py:197:stop] 0/4587, RunningAvgSamplesPerSec=12.007020970642447, CurrSamplesPerSec=11.87825266370254, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:57:31,106] [INFO] [timer.py:197:stop] 0/4588, RunningAvgSamplesPerSec=12.007017710158063, CurrSamplesPerSec=11.992086982790054, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:57:37,592] [INFO] [timer.py:197:stop] 0/4589, RunningAvgSamplesPerSec=12.007017647636884, CurrSamplesPerSec=12.006730932360982, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:57:44,132] [INFO] [logging.py:68:log_dist] [Rank 0] step=4590, skipped=8, lr=[9.311111111111113e-07], mom=[[0.9, 0.999]] +[2022-12-20 02:57:44,133] [INFO] [timer.py:197:stop] 0/4590, RunningAvgSamplesPerSec=12.006973477953133, CurrSamplesPerSec=11.807729918458687, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:57:50,670] [INFO] [timer.py:197:stop] 0/4591, RunningAvgSamplesPerSec=12.006956645834682, CurrSamplesPerSec=11.930224515924191, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:57:57,130] [INFO] [timer.py:197:stop] 0/4592, RunningAvgSamplesPerSec=12.006937195693586, CurrSamplesPerSec=11.91833925760241, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:58:03,558] [INFO] [timer.py:197:stop] 0/4593, RunningAvgSamplesPerSec=12.006940132508062, CurrSamplesPerSec=12.020435264982451, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:58:10,034] [INFO] [timer.py:197:stop] 0/4594, RunningAvgSamplesPerSec=12.00693607395291, CurrSamplesPerSec=11.98833212386563, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:58:16,489] [INFO] [timer.py:197:stop] 0/4595, RunningAvgSamplesPerSec=12.00691250535897, CurrSamplesPerSec=11.899652544123528, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:58:23,053] [INFO] [timer.py:197:stop] 0/4596, RunningAvgSamplesPerSec=12.006886654860963, CurrSamplesPerSec=11.889318158827043, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:58:29,558] [INFO] [timer.py:197:stop] 0/4597, RunningAvgSamplesPerSec=12.006863980671698, CurrSamplesPerSec=11.903594858698735, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:58:34,124] [INFO] [timer.py:197:stop] 0/4598, RunningAvgSamplesPerSec=12.007595206071795, CurrSamplesPerSec=16.67347157567222, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:58:40,657] [INFO] [timer.py:197:stop] 0/4599, RunningAvgSamplesPerSec=12.007567488234113, CurrSamplesPerSec=11.881513929854682, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:58:47,150] [INFO] [logging.py:68:log_dist] [Rank 0] step=4600, skipped=8, lr=[9.08888888888889e-07], mom=[[0.9, 0.999]] +[2022-12-20 02:58:47,151] [INFO] [timer.py:197:stop] 0/4600, RunningAvgSamplesPerSec=12.007549342741417, CurrSamplesPerSec=11.924710111005098, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +{'loss': 0.0, 'learning_rate': 9.08888888888889e-07, 'epoch': 121.05} +[2022-12-20 02:58:53,581] [INFO] [timer.py:197:stop] 0/4601, RunningAvgSamplesPerSec=12.007546671427257, CurrSamplesPerSec=11.995276522973239, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:59:00,126] [INFO] [timer.py:197:stop] 0/4602, RunningAvgSamplesPerSec=12.007521057570864, CurrSamplesPerSec=11.890867593541614, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:59:06,679] [INFO] [timer.py:197:stop] 0/4603, RunningAvgSamplesPerSec=12.007486615786132, CurrSamplesPerSec=11.851118052096002, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:59:13,186] [INFO] [timer.py:197:stop] 0/4604, RunningAvgSamplesPerSec=12.007447233230527, CurrSamplesPerSec=11.828942418506202, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:59:19,727] [INFO] [timer.py:197:stop] 0/4605, RunningAvgSamplesPerSec=12.007418610490896, CurrSamplesPerSec=11.87712638407323, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:59:26,189] [INFO] [timer.py:197:stop] 0/4606, RunningAvgSamplesPerSec=12.007398778294322, CurrSamplesPerSec=11.916800113647462, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:59:32,682] [INFO] [timer.py:197:stop] 0/4607, RunningAvgSamplesPerSec=12.00738458670786, CurrSamplesPerSec=11.942400211268925, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:59:39,189] [INFO] [timer.py:197:stop] 0/4608, RunningAvgSamplesPerSec=12.007363688375023, CurrSamplesPerSec=11.911892218181954, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:59:45,738] [INFO] [timer.py:197:stop] 0/4609, RunningAvgSamplesPerSec=12.007311109759868, CurrSamplesPerSec=11.76992296191065, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:59:52,312] [INFO] [logging.py:68:log_dist] [Rank 0] step=4610, skipped=8, lr=[8.866666666666668e-07], mom=[[0.9, 0.999]] +[2022-12-20 02:59:52,313] [INFO] [timer.py:197:stop] 0/4610, RunningAvgSamplesPerSec=12.00731524389846, CurrSamplesPerSec=12.026391485650969, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 02:59:58,684] [INFO] [timer.py:197:stop] 0/4611, RunningAvgSamplesPerSec=12.007317865640653, CurrSamplesPerSec=12.019411023666471, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:00:05,153] [INFO] [timer.py:197:stop] 0/4612, RunningAvgSamplesPerSec=12.00730309203446, CurrSamplesPerSec=11.939595583164422, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:00:11,780] [INFO] [timer.py:197:stop] 0/4613, RunningAvgSamplesPerSec=12.007286523378589, CurrSamplesPerSec=11.931387935395357, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:00:18,312] [INFO] [timer.py:197:stop] 0/4614, RunningAvgSamplesPerSec=12.007268052435679, CurrSamplesPerSec=11.922698530237371, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:00:24,771] [INFO] [timer.py:197:stop] 0/4615, RunningAvgSamplesPerSec=12.007263553918241, CurrSamplesPerSec=11.986552186100312, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:00:31,450] [INFO] [timer.py:197:stop] 0/4616, RunningAvgSamplesPerSec=12.007222962013381, CurrSamplesPerSec=11.822848415692116, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:00:37,937] [INFO] [timer.py:197:stop] 0/4617, RunningAvgSamplesPerSec=12.007198554868866, CurrSamplesPerSec=11.895630603840212, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:00:44,391] [INFO] [timer.py:197:stop] 0/4618, RunningAvgSamplesPerSec=12.00718069078383, CurrSamplesPerSec=11.925300261078046, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:00:50,781] [INFO] [timer.py:197:stop] 0/4619, RunningAvgSamplesPerSec=12.007179364366882, CurrSamplesPerSec=12.001059744954834, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:00:57,233] [INFO] [logging.py:68:log_dist] [Rank 0] step=4620, skipped=8, lr=[8.644444444444445e-07], mom=[[0.9, 0.999]] +[2022-12-20 03:00:57,234] [INFO] [timer.py:197:stop] 0/4620, RunningAvgSamplesPerSec=12.007166190477898, CurrSamplesPerSec=11.946648969210244, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:01:03,696] [INFO] [timer.py:197:stop] 0/4621, RunningAvgSamplesPerSec=12.00715926463347, CurrSamplesPerSec=11.975260701877074, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:01:10,180] [INFO] [timer.py:197:stop] 0/4622, RunningAvgSamplesPerSec=12.007134027810288, CurrSamplesPerSec=11.891686186715923, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:01:16,774] [INFO] [timer.py:197:stop] 0/4623, RunningAvgSamplesPerSec=12.007109419986266, CurrSamplesPerSec=11.89448785019356, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:01:23,241] [INFO] [timer.py:197:stop] 0/4624, RunningAvgSamplesPerSec=12.007110157307316, CurrSamplesPerSec=12.010518285187166, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:01:30,013] [INFO] [timer.py:197:stop] 0/4625, RunningAvgSamplesPerSec=12.007092424578941, CurrSamplesPerSec=11.925687545695585, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +{'loss': 0.0, 'learning_rate': 8.533333333333334e-07, 'epoch': 121.71} +[2022-12-20 03:01:36,492] [INFO] [timer.py:197:stop] 0/4626, RunningAvgSamplesPerSec=12.007093990381925, CurrSamplesPerSec=12.014337065148178, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:01:42,944] [INFO] [timer.py:197:stop] 0/4627, RunningAvgSamplesPerSec=12.0070751108741, CurrSamplesPerSec=11.920406536973136, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:01:49,508] [INFO] [timer.py:197:stop] 0/4628, RunningAvgSamplesPerSec=12.007048213105524, CurrSamplesPerSec=11.883921990851828, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:01:56,017] [INFO] [timer.py:197:stop] 0/4629, RunningAvgSamplesPerSec=12.00701708647456, CurrSamplesPerSec=11.864731986619553, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:02:02,456] [INFO] [logging.py:68:log_dist] [Rank 0] step=4630, skipped=8, lr=[8.422222222222224e-07], mom=[[0.9, 0.999]] +[2022-12-20 03:02:02,457] [INFO] [timer.py:197:stop] 0/4630, RunningAvgSamplesPerSec=12.00701280909004, CurrSamplesPerSec=11.987253927015216, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:02:08,955] [INFO] [timer.py:197:stop] 0/4631, RunningAvgSamplesPerSec=12.00699516498391, CurrSamplesPerSec=11.925889939730231, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:02:15,520] [INFO] [timer.py:197:stop] 0/4632, RunningAvgSamplesPerSec=12.006995275529043, CurrSamplesPerSec=12.00750701076006, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:02:21,992] [INFO] [timer.py:197:stop] 0/4633, RunningAvgSamplesPerSec=12.00697000567237, CurrSamplesPerSec=11.891099885556637, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:02:28,547] [INFO] [timer.py:197:stop] 0/4634, RunningAvgSamplesPerSec=12.006960823493602, CurrSamplesPerSec=11.964588248555469, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:02:35,013] [INFO] [timer.py:197:stop] 0/4635, RunningAvgSamplesPerSec=12.006934028174843, CurrSamplesPerSec=11.884088245143264, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:02:39,655] [INFO] [timer.py:197:stop] 0/4636, RunningAvgSamplesPerSec=12.007657629550478, CurrSamplesPerSec=16.65900267582468, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:02:46,130] [INFO] [timer.py:197:stop] 0/4637, RunningAvgSamplesPerSec=12.007644738243954, CurrSamplesPerSec=11.948202215573538, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:02:52,620] [INFO] [timer.py:197:stop] 0/4638, RunningAvgSamplesPerSec=12.007622573698736, CurrSamplesPerSec=11.90576157869771, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:02:59,129] [INFO] [timer.py:197:stop] 0/4639, RunningAvgSamplesPerSec=12.007605582554442, CurrSamplesPerSec=11.929348124126692, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:03:05,564] [INFO] [logging.py:68:log_dist] [Rank 0] step=4640, skipped=8, lr=[8.200000000000001e-07], mom=[[0.9, 0.999]] +[2022-12-20 03:03:05,564] [INFO] [timer.py:197:stop] 0/4640, RunningAvgSamplesPerSec=12.007581930179681, CurrSamplesPerSec=11.898898783578089, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:03:12,122] [INFO] [timer.py:197:stop] 0/4641, RunningAvgSamplesPerSec=12.007565483903136, CurrSamplesPerSec=11.931769251215911, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:03:18,610] [INFO] [timer.py:197:stop] 0/4642, RunningAvgSamplesPerSec=12.007536971217027, CurrSamplesPerSec=11.876708091022678, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:03:25,103] [INFO] [timer.py:197:stop] 0/4643, RunningAvgSamplesPerSec=12.007536528564543, CurrSamplesPerSec=12.005482972377843, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:03:31,761] [INFO] [timer.py:197:stop] 0/4644, RunningAvgSamplesPerSec=12.00751145831812, CurrSamplesPerSec=11.8922772873772, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:03:38,228] [INFO] [timer.py:197:stop] 0/4645, RunningAvgSamplesPerSec=12.007493065267901, CurrSamplesPerSec=11.92271547595021, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:03:44,734] [INFO] [timer.py:197:stop] 0/4646, RunningAvgSamplesPerSec=12.007471034924471, CurrSamplesPerSec=11.906048316381579, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:03:51,345] [INFO] [timer.py:197:stop] 0/4647, RunningAvgSamplesPerSec=12.007474263595746, CurrSamplesPerSec=12.022486963661995, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:03:57,961] [INFO] [timer.py:197:stop] 0/4648, RunningAvgSamplesPerSec=12.00744097602738, CurrSamplesPerSec=11.854786389199438, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:04:04,493] [INFO] [timer.py:197:stop] 0/4649, RunningAvgSamplesPerSec=12.007416511163415, CurrSamplesPerSec=11.894818846650086, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:04:11,032] [INFO] [logging.py:68:log_dist] [Rank 0] step=4650, skipped=8, lr=[7.977777777777779e-07], mom=[[0.9, 0.999]] +[2022-12-20 03:04:11,033] [INFO] [timer.py:197:stop] 0/4650, RunningAvgSamplesPerSec=12.007391078284957, CurrSamplesPerSec=11.890356688390131, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +{'loss': 0.0, 'learning_rate': 7.977777777777779e-07, 'epoch': 122.37} +[2022-12-20 03:04:17,545] [INFO] [timer.py:197:stop] 0/4651, RunningAvgSamplesPerSec=12.007367891043627, CurrSamplesPerSec=11.900552540073276, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:04:24,049] [INFO] [timer.py:197:stop] 0/4652, RunningAvgSamplesPerSec=12.007339043029537, CurrSamplesPerSec=11.874706367655874, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:04:30,530] [INFO] [timer.py:197:stop] 0/4653, RunningAvgSamplesPerSec=12.007322188694227, CurrSamplesPerSec=11.929457864867317, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:04:37,049] [INFO] [timer.py:197:stop] 0/4654, RunningAvgSamplesPerSec=12.007295219518488, CurrSamplesPerSec=11.88315864874105, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:04:43,560] [INFO] [timer.py:197:stop] 0/4655, RunningAvgSamplesPerSec=12.007271495458196, CurrSamplesPerSec=11.89791255116723, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:04:50,022] [INFO] [timer.py:197:stop] 0/4656, RunningAvgSamplesPerSec=12.007264949203428, CurrSamplesPerSec=11.976882316215837, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:04:56,477] [INFO] [timer.py:197:stop] 0/4657, RunningAvgSamplesPerSec=12.007262426048356, CurrSamplesPerSec=11.99553113767545, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:05:02,958] [INFO] [timer.py:197:stop] 0/4658, RunningAvgSamplesPerSec=12.007238018248463, CurrSamplesPerSec=11.89468497006848, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:05:09,412] [INFO] [timer.py:197:stop] 0/4659, RunningAvgSamplesPerSec=12.007237024665125, CurrSamplesPerSec=12.002612682671375, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:05:16,035] [INFO] [logging.py:68:log_dist] [Rank 0] step=4660, skipped=8, lr=[7.755555555555556e-07], mom=[[0.9, 0.999]] +[2022-12-20 03:05:16,036] [INFO] [timer.py:197:stop] 0/4660, RunningAvgSamplesPerSec=12.007240261921488, CurrSamplesPerSec=12.022335121501582, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:05:22,541] [INFO] [timer.py:197:stop] 0/4661, RunningAvgSamplesPerSec=12.007242626042464, CurrSamplesPerSec=12.018264812379327, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:05:29,010] [INFO] [timer.py:197:stop] 0/4662, RunningAvgSamplesPerSec=12.007221787839358, CurrSamplesPerSec=11.910915457913543, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:05:35,445] [INFO] [timer.py:197:stop] 0/4663, RunningAvgSamplesPerSec=12.007219318693304, CurrSamplesPerSec=11.995724116018723, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:05:42,181] [INFO] [timer.py:197:stop] 0/4664, RunningAvgSamplesPerSec=12.007207258734885, CurrSamplesPerSec=11.951257774971513, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:05:48,690] [INFO] [timer.py:197:stop] 0/4665, RunningAvgSamplesPerSec=12.007184618576222, CurrSamplesPerSec=11.90255612650817, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:05:55,200] [INFO] [timer.py:197:stop] 0/4666, RunningAvgSamplesPerSec=12.00717142654244, CurrSamplesPerSec=11.945970580925152, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:06:01,585] [INFO] [timer.py:197:stop] 0/4667, RunningAvgSamplesPerSec=12.00717364255029, CurrSamplesPerSec=12.01751800923427, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:06:08,003] [INFO] [timer.py:197:stop] 0/4668, RunningAvgSamplesPerSec=12.00717492214451, CurrSamplesPerSec=12.013147198898732, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:06:14,643] [INFO] [timer.py:197:stop] 0/4669, RunningAvgSamplesPerSec=12.007181216082797, CurrSamplesPerSec=12.036620751648663, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:06:21,105] [INFO] [logging.py:68:log_dist] [Rank 0] step=4670, skipped=8, lr=[7.533333333333335e-07], mom=[[0.9, 0.999]] +[2022-12-20 03:06:21,106] [INFO] [timer.py:197:stop] 0/4670, RunningAvgSamplesPerSec=12.00716548529169, CurrSamplesPerSec=11.934196136436437, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:06:27,576] [INFO] [timer.py:197:stop] 0/4671, RunningAvgSamplesPerSec=12.007168626561281, CurrSamplesPerSec=12.021850006117615, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:06:34,011] [INFO] [timer.py:197:stop] 0/4672, RunningAvgSamplesPerSec=12.007162645165302, CurrSamplesPerSec=11.979300325437812, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:06:40,502] [INFO] [timer.py:197:stop] 0/4673, RunningAvgSamplesPerSec=12.007151155931998, CurrSamplesPerSec=11.953735180205697, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:06:45,343] [INFO] [timer.py:197:stop] 0/4674, RunningAvgSamplesPerSec=12.007823851178635, CurrSamplesPerSec=16.263947173630875, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:06:51,845] [INFO] [timer.py:197:stop] 0/4675, RunningAvgSamplesPerSec=12.007794787917016, CurrSamplesPerSec=11.873529816360572, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +{'loss': 0.0, 'learning_rate': 7.422222222222223e-07, 'epoch': 123.03} +[2022-12-20 03:06:58,474] [INFO] [timer.py:197:stop] 0/4676, RunningAvgSamplesPerSec=12.00779520866758, CurrSamplesPerSec=12.009761698117066, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:07:04,963] [INFO] [timer.py:197:stop] 0/4677, RunningAvgSamplesPerSec=12.007798678235309, CurrSamplesPerSec=12.024037373153979, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:07:11,462] [INFO] [timer.py:197:stop] 0/4678, RunningAvgSamplesPerSec=12.007772800949523, CurrSamplesPerSec=11.888003404727957, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:07:17,946] [INFO] [timer.py:197:stop] 0/4679, RunningAvgSamplesPerSec=12.00775533227017, CurrSamplesPerSec=11.92662380817271, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:07:24,376] [INFO] [logging.py:68:log_dist] [Rank 0] step=4680, skipped=8, lr=[7.311111111111112e-07], mom=[[0.9, 0.999]] +[2022-12-20 03:07:24,377] [INFO] [timer.py:197:stop] 0/4680, RunningAvgSamplesPerSec=12.007753982046863, CurrSamplesPerSec=12.001442307713802, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:07:30,790] [INFO] [timer.py:197:stop] 0/4681, RunningAvgSamplesPerSec=12.007756256543406, CurrSamplesPerSec=12.018405789957388, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:07:37,434] [INFO] [timer.py:197:stop] 0/4682, RunningAvgSamplesPerSec=12.00776228370734, CurrSamplesPerSec=12.03602978622548, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:07:43,948] [INFO] [timer.py:197:stop] 0/4683, RunningAvgSamplesPerSec=12.007744755925343, CurrSamplesPerSec=11.92627143407489, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:07:50,416] [INFO] [timer.py:197:stop] 0/4684, RunningAvgSamplesPerSec=12.00772013892374, CurrSamplesPerSec=11.893583501578284, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:07:57,078] [INFO] [timer.py:197:stop] 0/4685, RunningAvgSamplesPerSec=12.00768518324306, CurrSamplesPerSec=11.84622384411087, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:08:03,575] [INFO] [timer.py:197:stop] 0/4686, RunningAvgSamplesPerSec=12.007662439156517, CurrSamplesPerSec=11.902088545074514, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:08:10,099] [INFO] [timer.py:197:stop] 0/4687, RunningAvgSamplesPerSec=12.007662479355222, CurrSamplesPerSec=12.0078507730442, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:08:16,721] [INFO] [timer.py:197:stop] 0/4688, RunningAvgSamplesPerSec=12.007646319548405, CurrSamplesPerSec=11.932412080762692, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:08:23,177] [INFO] [timer.py:197:stop] 0/4689, RunningAvgSamplesPerSec=12.007623905009734, CurrSamplesPerSec=11.903500373021682, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:08:29,663] [INFO] [logging.py:68:log_dist] [Rank 0] step=4690, skipped=8, lr=[7.08888888888889e-07], mom=[[0.9, 0.999]] +[2022-12-20 03:08:29,664] [INFO] [timer.py:197:stop] 0/4690, RunningAvgSamplesPerSec=12.007603151812326, CurrSamplesPerSec=11.911114708230434, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:08:36,253] [INFO] [timer.py:197:stop] 0/4691, RunningAvgSamplesPerSec=12.007595867052947, CurrSamplesPerSec=11.973541789367365, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:08:42,745] [INFO] [timer.py:197:stop] 0/4692, RunningAvgSamplesPerSec=12.007577794564892, CurrSamplesPerSec=11.9234298874775, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:08:49,360] [INFO] [timer.py:197:stop] 0/4693, RunningAvgSamplesPerSec=12.00757040029824, CurrSamplesPerSec=11.97299117938145, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:08:55,875] [INFO] [timer.py:197:stop] 0/4694, RunningAvgSamplesPerSec=12.0075501971284, CurrSamplesPerSec=11.913519450534425, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:09:02,321] [INFO] [timer.py:197:stop] 0/4695, RunningAvgSamplesPerSec=12.007548079973766, CurrSamplesPerSec=11.99762260339588, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:09:08,836] [INFO] [timer.py:197:stop] 0/4696, RunningAvgSamplesPerSec=12.007523313552118, CurrSamplesPerSec=11.892409002303738, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:09:15,303] [INFO] [timer.py:197:stop] 0/4697, RunningAvgSamplesPerSec=12.00752503685497, CurrSamplesPerSec=12.015619674759906, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:09:21,804] [INFO] [timer.py:197:stop] 0/4698, RunningAvgSamplesPerSec=12.007495784006828, CurrSamplesPerSec=11.871707147570639, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:09:28,307] [INFO] [timer.py:197:stop] 0/4699, RunningAvgSamplesPerSec=12.007491889733412, CurrSamplesPerSec=11.989232197359216, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:09:34,746] [INFO] [logging.py:68:log_dist] [Rank 0] step=4700, skipped=8, lr=[6.866666666666667e-07], mom=[[0.9, 0.999]] +[2022-12-20 03:09:34,747] [INFO] [timer.py:197:stop] 0/4700, RunningAvgSamplesPerSec=12.007471972972619, CurrSamplesPerSec=11.914646297147183, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +{'loss': 0.0, 'learning_rate': 6.866666666666667e-07, 'epoch': 123.68} +[2022-12-20 03:09:41,476] [INFO] [timer.py:197:stop] 0/4701, RunningAvgSamplesPerSec=12.00745478801676, CurrSamplesPerSec=11.927259193940188, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:09:47,978] [INFO] [timer.py:197:stop] 0/4702, RunningAvgSamplesPerSec=12.007436482726803, CurrSamplesPerSec=11.922031860575064, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:09:54,430] [INFO] [timer.py:197:stop] 0/4703, RunningAvgSamplesPerSec=12.007423154505744, CurrSamplesPerSec=11.945105694437386, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:10:01,167] [INFO] [timer.py:197:stop] 0/4704, RunningAvgSamplesPerSec=12.00742297552288, CurrSamplesPerSec=12.00658163605102, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:10:07,764] [INFO] [timer.py:197:stop] 0/4705, RunningAvgSamplesPerSec=12.007425020300213, CurrSamplesPerSec=12.017047269643397, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:10:14,230] [INFO] [timer.py:197:stop] 0/4706, RunningAvgSamplesPerSec=12.00741884582983, CurrSamplesPerSec=11.978450383346798, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:10:20,710] [INFO] [timer.py:197:stop] 0/4707, RunningAvgSamplesPerSec=12.007386245070105, CurrSamplesPerSec=11.855966559636975, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:10:27,236] [INFO] [timer.py:197:stop] 0/4708, RunningAvgSamplesPerSec=12.007346685297767, CurrSamplesPerSec=11.824059734800755, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:10:33,718] [INFO] [timer.py:197:stop] 0/4709, RunningAvgSamplesPerSec=12.007318915343086, CurrSamplesPerSec=11.878040845859678, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:10:40,182] [INFO] [logging.py:68:log_dist] [Rank 0] step=4710, skipped=8, lr=[6.644444444444446e-07], mom=[[0.9, 0.999]] +[2022-12-20 03:10:40,183] [INFO] [timer.py:197:stop] 0/4710, RunningAvgSamplesPerSec=12.007312355105018, CurrSamplesPerSec=11.976512538809773, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:10:46,654] [INFO] [timer.py:197:stop] 0/4711, RunningAvgSamplesPerSec=12.00729836763905, CurrSamplesPerSec=11.941804648805617, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:10:51,299] [INFO] [timer.py:197:stop] 0/4712, RunningAvgSamplesPerSec=12.007980411455224, CurrSamplesPerSec=16.392759680847522, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:10:57,814] [INFO] [timer.py:197:stop] 0/4713, RunningAvgSamplesPerSec=12.007942948382679, CurrSamplesPerSec=11.834047727675808, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:11:04,347] [INFO] [timer.py:197:stop] 0/4714, RunningAvgSamplesPerSec=12.007920930244438, CurrSamplesPerSec=11.905082018019181, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:11:10,768] [INFO] [timer.py:197:stop] 0/4715, RunningAvgSamplesPerSec=12.0079128404449, CurrSamplesPerSec=11.96991435685414, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:11:17,324] [INFO] [timer.py:197:stop] 0/4716, RunningAvgSamplesPerSec=12.00791268427385, CurrSamplesPerSec=12.00717669523392, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:11:23,858] [INFO] [timer.py:197:stop] 0/4717, RunningAvgSamplesPerSec=12.00787323869034, CurrSamplesPerSec=11.82476289283951, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:11:30,415] [INFO] [timer.py:197:stop] 0/4718, RunningAvgSamplesPerSec=12.007846822030581, CurrSamplesPerSec=11.884571250769142, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:11:36,999] [INFO] [timer.py:197:stop] 0/4719, RunningAvgSamplesPerSec=12.007823852593255, CurrSamplesPerSec=11.900468654276002, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:11:43,631] [INFO] [logging.py:68:log_dist] [Rank 0] step=4720, skipped=8, lr=[6.422222222222223e-07], mom=[[0.9, 0.999]] +[2022-12-20 03:11:43,632] [INFO] [timer.py:197:stop] 0/4720, RunningAvgSamplesPerSec=12.007797212358168, CurrSamplesPerSec=11.883436933713257, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:11:50,121] [INFO] [timer.py:197:stop] 0/4721, RunningAvgSamplesPerSec=12.007774280090256, CurrSamplesPerSec=11.900546209028448, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:11:56,558] [INFO] [timer.py:197:stop] 0/4722, RunningAvgSamplesPerSec=12.007771408616044, CurrSamplesPerSec=11.994236199212247, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:12:03,071] [INFO] [timer.py:197:stop] 0/4723, RunningAvgSamplesPerSec=12.007757178403729, CurrSamplesPerSec=11.94096426831486, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:12:09,556] [INFO] [timer.py:197:stop] 0/4724, RunningAvgSamplesPerSec=12.00773821514169, CurrSamplesPerSec=11.918875326471442, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:12:16,055] [INFO] [timer.py:197:stop] 0/4725, RunningAvgSamplesPerSec=12.007720399854575, CurrSamplesPerSec=11.924181992110379, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +{'loss': 0.0, 'learning_rate': 6.311111111111112e-07, 'epoch': 124.34} +[2022-12-20 03:12:22,435] [INFO] [timer.py:197:stop] 0/4726, RunningAvgSamplesPerSec=12.00770470054889, CurrSamplesPerSec=11.934012029886928, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:12:28,972] [INFO] [timer.py:197:stop] 0/4727, RunningAvgSamplesPerSec=12.007661149333394, CurrSamplesPerSec=11.805391571905437, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:12:35,527] [INFO] [timer.py:197:stop] 0/4728, RunningAvgSamplesPerSec=12.007633973114041, CurrSamplesPerSec=11.88058526194735, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:12:42,025] [INFO] [timer.py:197:stop] 0/4729, RunningAvgSamplesPerSec=12.007611766472156, CurrSamplesPerSec=11.903572688740384, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:12:48,532] [INFO] [logging.py:68:log_dist] [Rank 0] step=4730, skipped=8, lr=[6.200000000000001e-07], mom=[[0.9, 0.999]] +[2022-12-20 03:12:48,533] [INFO] [timer.py:197:stop] 0/4730, RunningAvgSamplesPerSec=12.007591902736403, CurrSamplesPerSec=11.914424718260408, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:12:55,014] [INFO] [timer.py:197:stop] 0/4731, RunningAvgSamplesPerSec=12.007577148680163, CurrSamplesPerSec=11.938222964274829, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:13:01,457] [INFO] [timer.py:197:stop] 0/4732, RunningAvgSamplesPerSec=12.007564122856538, CurrSamplesPerSec=11.946279461334903, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:13:07,992] [INFO] [timer.py:197:stop] 0/4733, RunningAvgSamplesPerSec=12.00753112935926, CurrSamplesPerSec=11.853474550034816, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:13:14,475] [INFO] [timer.py:197:stop] 0/4734, RunningAvgSamplesPerSec=12.00750879905668, CurrSamplesPerSec=11.90278570871771, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:13:20,939] [INFO] [timer.py:197:stop] 0/4735, RunningAvgSamplesPerSec=12.007498983686508, CurrSamplesPerSec=11.961231656984355, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:13:27,496] [INFO] [timer.py:197:stop] 0/4736, RunningAvgSamplesPerSec=12.0074777534793, CurrSamplesPerSec=11.907829250797484, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:13:33,948] [INFO] [timer.py:197:stop] 0/4737, RunningAvgSamplesPerSec=12.007458069568477, CurrSamplesPerSec=11.91499216706057, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:13:40,398] [INFO] [timer.py:197:stop] 0/4738, RunningAvgSamplesPerSec=12.00745839778998, CurrSamplesPerSec=12.009012727827386, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:13:46,898] [INFO] [timer.py:197:stop] 0/4739, RunningAvgSamplesPerSec=12.007429556748079, CurrSamplesPerSec=11.872375026918293, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:13:53,384] [INFO] [logging.py:68:log_dist] [Rank 0] step=4740, skipped=8, lr=[5.977777777777778e-07], mom=[[0.9, 0.999]] +[2022-12-20 03:13:53,384] [INFO] [timer.py:197:stop] 0/4740, RunningAvgSamplesPerSec=12.007420655528287, CurrSamplesPerSec=11.96540315653249, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:13:59,995] [INFO] [timer.py:197:stop] 0/4741, RunningAvgSamplesPerSec=12.007392289623912, CurrSamplesPerSec=11.874482594856357, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:14:06,525] [INFO] [timer.py:197:stop] 0/4742, RunningAvgSamplesPerSec=12.007396197852424, CurrSamplesPerSec=12.025945911268217, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:14:13,087] [INFO] [timer.py:197:stop] 0/4743, RunningAvgSamplesPerSec=12.007368642321252, CurrSamplesPerSec=11.878161207925546, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:14:19,553] [INFO] [timer.py:197:stop] 0/4744, RunningAvgSamplesPerSec=12.007366063016685, CurrSamplesPerSec=11.995150023687739, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:14:25,962] [INFO] [timer.py:197:stop] 0/4745, RunningAvgSamplesPerSec=12.007376610650931, CurrSamplesPerSec=12.057602754016742, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:14:32,425] [INFO] [timer.py:197:stop] 0/4746, RunningAvgSamplesPerSec=12.007357411088886, CurrSamplesPerSec=11.916979457157392, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:14:38,961] [INFO] [timer.py:197:stop] 0/4747, RunningAvgSamplesPerSec=12.0073348378314, CurrSamplesPerSec=11.901194120877284, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:14:45,408] [INFO] [timer.py:197:stop] 0/4748, RunningAvgSamplesPerSec=12.007335449457143, CurrSamplesPerSec=12.010238315366887, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:14:51,895] [INFO] [timer.py:197:stop] 0/4749, RunningAvgSamplesPerSec=12.007318431775184, CurrSamplesPerSec=11.92709225951311, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:14:56,512] [INFO] [logging.py:68:log_dist] [Rank 0] step=4750, skipped=8, lr=[5.755555555555555e-07], mom=[[0.9, 0.999]] +[2022-12-20 03:14:56,512] [INFO] [timer.py:197:stop] 0/4750, RunningAvgSamplesPerSec=12.008026070968972, CurrSamplesPerSec=16.67224856504998, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +{'loss': 0.0, 'learning_rate': 5.755555555555555e-07, 'epoch': 125.0} +[2022-12-20 03:15:03,022] [INFO] [timer.py:197:stop] 0/4751, RunningAvgSamplesPerSec=12.0079948879271, CurrSamplesPerSec=11.861741471345702, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:15:09,517] [INFO] [timer.py:197:stop] 0/4752, RunningAvgSamplesPerSec=12.007973982351334, CurrSamplesPerSec=11.909507682769604, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:15:15,977] [INFO] [timer.py:197:stop] 0/4753, RunningAvgSamplesPerSec=12.00797437621492, CurrSamplesPerSec=12.009845519829891, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:15:22,454] [INFO] [timer.py:197:stop] 0/4754, RunningAvgSamplesPerSec=12.007976966348263, CurrSamplesPerSec=12.020295316304264, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:15:28,915] [INFO] [timer.py:197:stop] 0/4755, RunningAvgSamplesPerSec=12.007958128996734, CurrSamplesPerSec=11.919105538663752, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:15:35,344] [INFO] [timer.py:197:stop] 0/4756, RunningAvgSamplesPerSec=12.007941822684163, CurrSamplesPerSec=11.930935055084172, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:15:41,890] [INFO] [timer.py:197:stop] 0/4757, RunningAvgSamplesPerSec=12.007919569886115, CurrSamplesPerSec=11.9030538297274, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:15:48,327] [INFO] [timer.py:197:stop] 0/4758, RunningAvgSamplesPerSec=12.007904071829321, CurrSamplesPerSec=11.93466040728096, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:15:55,159] [INFO] [timer.py:197:stop] 0/4759, RunningAvgSamplesPerSec=12.007876095420489, CurrSamplesPerSec=11.876278793240989, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:16:02,125] [INFO] [logging.py:68:log_dist] [Rank 0] step=4760, skipped=8, lr=[5.533333333333334e-07], mom=[[0.9, 0.999]] +[2022-12-20 03:16:02,125] [INFO] [timer.py:197:stop] 0/4760, RunningAvgSamplesPerSec=12.007848702364806, CurrSamplesPerSec=11.878939152595663, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:16:09,104] [INFO] [timer.py:197:stop] 0/4761, RunningAvgSamplesPerSec=12.007838504752206, CurrSamplesPerSec=11.959513572277269, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:16:15,898] [INFO] [timer.py:197:stop] 0/4762, RunningAvgSamplesPerSec=12.007810322517404, CurrSamplesPerSec=11.875172850747113, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:16:22,602] [INFO] [timer.py:197:stop] 0/4763, RunningAvgSamplesPerSec=12.007790860639044, CurrSamplesPerSec=11.915861690211294, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:16:29,039] [INFO] [timer.py:197:stop] 0/4764, RunningAvgSamplesPerSec=12.007792851000586, CurrSamplesPerSec=12.017276447968888, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:16:35,514] [INFO] [timer.py:197:stop] 0/4765, RunningAvgSamplesPerSec=12.007797488781604, CurrSamplesPerSec=12.029923305027758, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:16:42,015] [INFO] [timer.py:197:stop] 0/4766, RunningAvgSamplesPerSec=12.007784811024974, CurrSamplesPerSec=11.947702856792892, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:16:48,426] [INFO] [timer.py:197:stop] 0/4767, RunningAvgSamplesPerSec=12.007782102655176, CurrSamplesPerSec=11.994893281222316, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:16:55,100] [INFO] [timer.py:197:stop] 0/4768, RunningAvgSamplesPerSec=12.007754442133137, CurrSamplesPerSec=11.877383365143023, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:17:01,716] [INFO] [timer.py:197:stop] 0/4769, RunningAvgSamplesPerSec=12.007735230290173, CurrSamplesPerSec=11.916864655527794, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:17:08,213] [INFO] [logging.py:68:log_dist] [Rank 0] step=4770, skipped=8, lr=[5.311111111111111e-07], mom=[[0.9, 0.999]] +[2022-12-20 03:17:08,213] [INFO] [timer.py:197:stop] 0/4770, RunningAvgSamplesPerSec=12.007730112326943, CurrSamplesPerSec=11.983382261990528, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:17:14,681] [INFO] [timer.py:197:stop] 0/4771, RunningAvgSamplesPerSec=12.007715145110183, CurrSamplesPerSec=11.936773163096994, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:17:21,147] [INFO] [timer.py:197:stop] 0/4772, RunningAvgSamplesPerSec=12.007690549534663, CurrSamplesPerSec=11.891529201941909, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:17:27,669] [INFO] [timer.py:197:stop] 0/4773, RunningAvgSamplesPerSec=12.007694298780661, CurrSamplesPerSec=12.025604883291777, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:17:34,231] [INFO] [timer.py:197:stop] 0/4774, RunningAvgSamplesPerSec=12.00767820238549, CurrSamplesPerSec=11.931370434697888, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:17:40,925] [INFO] [timer.py:197:stop] 0/4775, RunningAvgSamplesPerSec=12.007628551714367, CurrSamplesPerSec=11.775281159117167, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +{'loss': 0.0, 'learning_rate': 5.2e-07, 'epoch': 125.66} +[2022-12-20 03:17:47,538] [INFO] [timer.py:197:stop] 0/4776, RunningAvgSamplesPerSec=12.007623436629926, CurrSamplesPerSec=11.983258688177019, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:17:53,957] [INFO] [timer.py:197:stop] 0/4777, RunningAvgSamplesPerSec=12.00762596681897, CurrSamplesPerSec=12.019717255141579, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:18:00,411] [INFO] [timer.py:197:stop] 0/4778, RunningAvgSamplesPerSec=12.0076288642664, CurrSamplesPerSec=12.021480138673013, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:18:06,857] [INFO] [timer.py:197:stop] 0/4779, RunningAvgSamplesPerSec=12.00760722295331, CurrSamplesPerSec=11.905130593208764, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:18:13,485] [INFO] [logging.py:68:log_dist] [Rank 0] step=4780, skipped=8, lr=[5.088888888888889e-07], mom=[[0.9, 0.999]] +[2022-12-20 03:18:13,485] [INFO] [timer.py:197:stop] 0/4780, RunningAvgSamplesPerSec=12.007574948543922, CurrSamplesPerSec=11.855354977922318, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:18:20,178] [INFO] [timer.py:197:stop] 0/4781, RunningAvgSamplesPerSec=12.007557643136009, CurrSamplesPerSec=11.92543800648651, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:18:26,784] [INFO] [timer.py:197:stop] 0/4782, RunningAvgSamplesPerSec=12.007525906724045, CurrSamplesPerSec=11.857749827104685, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:18:33,248] [INFO] [timer.py:197:stop] 0/4783, RunningAvgSamplesPerSec=12.00750039127333, CurrSamplesPerSec=11.886763158434274, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:18:39,831] [INFO] [timer.py:197:stop] 0/4784, RunningAvgSamplesPerSec=12.00747217319631, CurrSamplesPerSec=11.874060811313228, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:18:46,328] [INFO] [timer.py:197:stop] 0/4785, RunningAvgSamplesPerSec=12.007442290597565, CurrSamplesPerSec=11.866224662502662, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:18:52,937] [INFO] [timer.py:197:stop] 0/4786, RunningAvgSamplesPerSec=12.00744052888745, CurrSamplesPerSec=11.999020179650419, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:18:59,604] [INFO] [timer.py:197:stop] 0/4787, RunningAvgSamplesPerSec=12.007437229329895, CurrSamplesPerSec=11.991672874277619, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:19:04,235] [INFO] [timer.py:197:stop] 0/4788, RunningAvgSamplesPerSec=12.00813406670647, CurrSamplesPerSec=16.624668046009813, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:19:10,867] [INFO] [timer.py:197:stop] 0/4789, RunningAvgSamplesPerSec=12.008115036064677, CurrSamplesPerSec=11.917720166003813, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:19:17,461] [INFO] [logging.py:68:log_dist] [Rank 0] step=4790, skipped=8, lr=[4.866666666666666e-07], mom=[[0.9, 0.999]] +[2022-12-20 03:19:17,462] [INFO] [timer.py:197:stop] 0/4790, RunningAvgSamplesPerSec=12.008112765110903, CurrSamplesPerSec=11.997251544218647, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:19:23,983] [INFO] [timer.py:197:stop] 0/4791, RunningAvgSamplesPerSec=12.008110085360798, CurrSamplesPerSec=11.995293139603062, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:19:30,430] [INFO] [timer.py:197:stop] 0/4792, RunningAvgSamplesPerSec=12.008111549309566, CurrSamplesPerSec=12.01512649643875, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:19:36,963] [INFO] [timer.py:197:stop] 0/4793, RunningAvgSamplesPerSec=12.008103959404645, CurrSamplesPerSec=11.971858075510415, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:19:43,431] [INFO] [timer.py:197:stop] 0/4794, RunningAvgSamplesPerSec=12.008086690470938, CurrSamplesPerSec=11.92591749128188, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:19:49,927] [INFO] [timer.py:197:stop] 0/4795, RunningAvgSamplesPerSec=12.008061010696723, CurrSamplesPerSec=11.886252079597968, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:19:56,405] [INFO] [timer.py:197:stop] 0/4796, RunningAvgSamplesPerSec=12.00806195881122, CurrSamplesPerSec=12.012607992350178, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:20:02,963] [INFO] [timer.py:197:stop] 0/4797, RunningAvgSamplesPerSec=12.008035178233037, CurrSamplesPerSec=11.881007509141613, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:20:09,464] [INFO] [timer.py:197:stop] 0/4798, RunningAvgSamplesPerSec=12.008030951702304, CurrSamplesPerSec=11.987798890060368, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:20:16,031] [INFO] [timer.py:197:stop] 0/4799, RunningAvgSamplesPerSec=12.007995799742163, CurrSamplesPerSec=11.841741646002601, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:20:22,537] [INFO] [logging.py:68:log_dist] [Rank 0] step=4800, skipped=8, lr=[4.6444444444444446e-07], mom=[[0.9, 0.999]] +[2022-12-20 03:20:22,538] [INFO] [timer.py:197:stop] 0/4800, RunningAvgSamplesPerSec=12.007973833558056, CurrSamplesPerSec=11.903518847756764, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +{'loss': 0.0, 'learning_rate': 4.6444444444444446e-07, 'epoch': 126.32} +[2022-12-20 03:20:29,069] [INFO] [timer.py:197:stop] 0/4801, RunningAvgSamplesPerSec=12.00794699119364, CurrSamplesPerSec=11.880524267324288, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:20:35,590] [INFO] [timer.py:197:stop] 0/4802, RunningAvgSamplesPerSec=12.007926846313742, CurrSamplesPerSec=11.912023839361217, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:20:42,180] [INFO] [timer.py:197:stop] 0/4803, RunningAvgSamplesPerSec=12.007921593222838, CurrSamplesPerSec=11.982759604292617, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:20:48,777] [INFO] [timer.py:197:stop] 0/4804, RunningAvgSamplesPerSec=12.007911099690824, CurrSamplesPerSec=11.957742181119473, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:20:55,276] [INFO] [timer.py:197:stop] 0/4805, RunningAvgSamplesPerSec=12.007889083842851, CurrSamplesPerSec=11.903091832107693, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:21:01,823] [INFO] [timer.py:197:stop] 0/4806, RunningAvgSamplesPerSec=12.007867585986336, CurrSamplesPerSec=11.905493863714826, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:21:08,319] [INFO] [timer.py:197:stop] 0/4807, RunningAvgSamplesPerSec=12.007851597037602, CurrSamplesPerSec=11.931529003498223, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:21:14,799] [INFO] [timer.py:197:stop] 0/4808, RunningAvgSamplesPerSec=12.007853566729846, CurrSamplesPerSec=12.017325405035356, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:21:21,536] [INFO] [timer.py:197:stop] 0/4809, RunningAvgSamplesPerSec=12.007831827771327, CurrSamplesPerSec=11.904255772524287, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:21:28,053] [INFO] [logging.py:68:log_dist] [Rank 0] step=4810, skipped=8, lr=[4.422222222222223e-07], mom=[[0.9, 0.999]] +[2022-12-20 03:21:28,054] [INFO] [timer.py:197:stop] 0/4810, RunningAvgSamplesPerSec=12.00780872815964, CurrSamplesPerSec=11.897786515064093, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:21:34,478] [INFO] [timer.py:197:stop] 0/4811, RunningAvgSamplesPerSec=12.007797844437734, CurrSamplesPerSec=11.955696011974783, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:21:41,115] [INFO] [timer.py:197:stop] 0/4812, RunningAvgSamplesPerSec=12.007725695856406, CurrSamplesPerSec=11.670509049579982, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:21:47,565] [INFO] [timer.py:197:stop] 0/4813, RunningAvgSamplesPerSec=12.007704495267115, CurrSamplesPerSec=11.906588561364435, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:21:54,094] [INFO] [timer.py:197:stop] 0/4814, RunningAvgSamplesPerSec=12.007684027884652, CurrSamplesPerSec=11.910016536053877, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:22:00,594] [INFO] [timer.py:197:stop] 0/4815, RunningAvgSamplesPerSec=12.007647246815802, CurrSamplesPerSec=11.83322818256806, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:22:07,193] [INFO] [timer.py:197:stop] 0/4816, RunningAvgSamplesPerSec=12.007648405185572, CurrSamplesPerSec=12.013226229249657, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:22:13,843] [INFO] [timer.py:197:stop] 0/4817, RunningAvgSamplesPerSec=12.007632898472266, CurrSamplesPerSec=11.933444890081661, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:22:20,390] [INFO] [timer.py:197:stop] 0/4818, RunningAvgSamplesPerSec=12.007622057247733, CurrSamplesPerSec=11.955647555760452, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:22:26,910] [INFO] [timer.py:197:stop] 0/4819, RunningAvgSamplesPerSec=12.007609124238929, CurrSamplesPerSec=11.94564523707839, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:22:33,369] [INFO] [logging.py:68:log_dist] [Rank 0] step=4820, skipped=8, lr=[4.2000000000000006e-07], mom=[[0.9, 0.999]] +[2022-12-20 03:22:33,370] [INFO] [timer.py:197:stop] 0/4820, RunningAvgSamplesPerSec=12.007607879302126, CurrSamplesPerSec=12.001614012814546, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:22:39,846] [INFO] [timer.py:197:stop] 0/4821, RunningAvgSamplesPerSec=12.007588699444542, CurrSamplesPerSec=11.91588602175969, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:22:46,301] [INFO] [timer.py:197:stop] 0/4822, RunningAvgSamplesPerSec=12.007565349670852, CurrSamplesPerSec=11.896087661344861, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:22:52,723] [INFO] [timer.py:197:stop] 0/4823, RunningAvgSamplesPerSec=12.007569812666915, CurrSamplesPerSec=12.029120069132313, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:22:59,298] [INFO] [timer.py:197:stop] 0/4824, RunningAvgSamplesPerSec=12.007566845935644, CurrSamplesPerSec=11.993281254042003, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:23:05,773] [INFO] [timer.py:197:stop] 0/4825, RunningAvgSamplesPerSec=12.007535985876094, CurrSamplesPerSec=11.860550718144555, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +{'loss': 0.0, 'learning_rate': 4.0888888888888897e-07, 'epoch': 126.97} +[2022-12-20 03:23:10,386] [INFO] [timer.py:197:stop] 0/4826, RunningAvgSamplesPerSec=12.008232026516497, CurrSamplesPerSec=16.668255613057443, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:23:16,855] [INFO] [timer.py:197:stop] 0/4827, RunningAvgSamplesPerSec=12.008228931953479, CurrSamplesPerSec=11.993319298892748, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:23:23,323] [INFO] [timer.py:197:stop] 0/4828, RunningAvgSamplesPerSec=12.008233524792338, CurrSamplesPerSec=12.03043495213335, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:23:29,820] [INFO] [timer.py:197:stop] 0/4829, RunningAvgSamplesPerSec=12.008214911708682, CurrSamplesPerSec=11.919055261678842, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:23:36,278] [INFO] [logging.py:68:log_dist] [Rank 0] step=4830, skipped=8, lr=[3.9777777777777783e-07], mom=[[0.9, 0.999]] +[2022-12-20 03:23:36,279] [INFO] [timer.py:197:stop] 0/4830, RunningAvgSamplesPerSec=12.008210175823477, CurrSamplesPerSec=11.985393503194173, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:23:42,735] [INFO] [timer.py:197:stop] 0/4831, RunningAvgSamplesPerSec=12.008175531893828, CurrSamplesPerSec=11.843212866912973, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:23:49,281] [INFO] [timer.py:197:stop] 0/4832, RunningAvgSamplesPerSec=12.008175791295383, CurrSamplesPerSec=12.009428572106042, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:23:55,881] [INFO] [timer.py:197:stop] 0/4833, RunningAvgSamplesPerSec=12.008177397024902, CurrSamplesPerSec=12.015938084015408, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:24:02,388] [INFO] [timer.py:197:stop] 0/4834, RunningAvgSamplesPerSec=12.008157806460495, CurrSamplesPerSec=11.914256027688554, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:24:08,826] [INFO] [timer.py:197:stop] 0/4835, RunningAvgSamplesPerSec=12.008139282067896, CurrSamplesPerSec=11.919291831821115, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:24:15,259] [INFO] [timer.py:197:stop] 0/4836, RunningAvgSamplesPerSec=12.008140390399683, CurrSamplesPerSec=12.013499348944494, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:24:21,805] [INFO] [timer.py:197:stop] 0/4837, RunningAvgSamplesPerSec=12.008121455034622, CurrSamplesPerSec=11.91728049128937, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:24:28,223] [INFO] [timer.py:197:stop] 0/4838, RunningAvgSamplesPerSec=12.008124862552629, CurrSamplesPerSec=12.024622852321613, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:24:34,720] [INFO] [timer.py:197:stop] 0/4839, RunningAvgSamplesPerSec=12.008106035448092, CurrSamplesPerSec=11.917743446917129, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:24:41,141] [INFO] [logging.py:68:log_dist] [Rank 0] step=4840, skipped=8, lr=[3.755555555555556e-07], mom=[[0.9, 0.999]] +[2022-12-20 03:24:41,142] [INFO] [timer.py:197:stop] 0/4840, RunningAvgSamplesPerSec=12.008107536449549, CurrSamplesPerSec=12.015372273812964, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:24:47,570] [INFO] [timer.py:197:stop] 0/4841, RunningAvgSamplesPerSec=12.00811122568446, CurrSamplesPerSec=12.025986318708188, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:24:54,088] [INFO] [timer.py:197:stop] 0/4842, RunningAvgSamplesPerSec=12.008096338193239, CurrSamplesPerSec=11.93648547438439, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:25:00,591] [INFO] [timer.py:197:stop] 0/4843, RunningAvgSamplesPerSec=12.008073885880679, CurrSamplesPerSec=11.900379493938923, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:25:06,978] [INFO] [timer.py:197:stop] 0/4844, RunningAvgSamplesPerSec=12.008077775898142, CurrSamplesPerSec=12.02693893541419, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:25:13,451] [INFO] [timer.py:197:stop] 0/4845, RunningAvgSamplesPerSec=12.008061500541713, CurrSamplesPerSec=11.929770133383345, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:25:19,920] [INFO] [timer.py:197:stop] 0/4846, RunningAvgSamplesPerSec=12.008046774014947, CurrSamplesPerSec=11.937147391490809, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:25:26,395] [INFO] [timer.py:197:stop] 0/4847, RunningAvgSamplesPerSec=12.00803323967971, CurrSamplesPerSec=11.942828989265598, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:25:32,935] [INFO] [timer.py:197:stop] 0/4848, RunningAvgSamplesPerSec=12.008017658428214, CurrSamplesPerSec=11.93299821922192, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:25:39,451] [INFO] [timer.py:197:stop] 0/4849, RunningAvgSamplesPerSec=12.007998877879832, CurrSamplesPerSec=11.917673075343576, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:25:45,960] [INFO] [logging.py:68:log_dist] [Rank 0] step=4850, skipped=8, lr=[3.533333333333334e-07], mom=[[0.9, 0.999]] +[2022-12-20 03:25:45,960] [INFO] [timer.py:197:stop] 0/4850, RunningAvgSamplesPerSec=12.007979713905222, CurrSamplesPerSec=11.91580509329833, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +{'loss': 0.0, 'learning_rate': 3.533333333333334e-07, 'epoch': 127.63} +[2022-12-20 03:25:52,438] [INFO] [timer.py:197:stop] 0/4851, RunningAvgSamplesPerSec=12.007976437906116, CurrSamplesPerSec=11.992115376788824, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:25:58,893] [INFO] [timer.py:197:stop] 0/4852, RunningAvgSamplesPerSec=12.007979040180174, CurrSamplesPerSec=12.020610743698992, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:26:05,301] [INFO] [timer.py:197:stop] 0/4853, RunningAvgSamplesPerSec=12.007959814034033, CurrSamplesPerSec=11.915431672937792, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:26:11,812] [INFO] [timer.py:197:stop] 0/4854, RunningAvgSamplesPerSec=12.007941026415292, CurrSamplesPerSec=11.917488949143284, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:26:18,338] [INFO] [timer.py:197:stop] 0/4855, RunningAvgSamplesPerSec=12.007916923465928, CurrSamplesPerSec=11.892097632921272, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:26:24,929] [INFO] [timer.py:197:stop] 0/4856, RunningAvgSamplesPerSec=12.007867246366436, CurrSamplesPerSec=11.771530218710174, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:26:31,458] [INFO] [timer.py:197:stop] 0/4857, RunningAvgSamplesPerSec=12.007835731372984, CurrSamplesPerSec=11.856786638936757, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:26:37,998] [INFO] [timer.py:197:stop] 0/4858, RunningAvgSamplesPerSec=12.00781633741532, CurrSamplesPerSec=11.914391402815951, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:26:44,472] [INFO] [timer.py:197:stop] 0/4859, RunningAvgSamplesPerSec=12.007792551877213, CurrSamplesPerSec=11.893390633827345, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:26:50,952] [INFO] [logging.py:68:log_dist] [Rank 0] step=4860, skipped=8, lr=[3.3111111111111115e-07], mom=[[0.9, 0.999]] +[2022-12-20 03:26:50,953] [INFO] [timer.py:197:stop] 0/4860, RunningAvgSamplesPerSec=12.007787183520776, CurrSamplesPerSec=11.981769583421135, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:26:57,358] [INFO] [timer.py:197:stop] 0/4861, RunningAvgSamplesPerSec=12.007761012749924, CurrSamplesPerSec=11.88195570280231, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:27:03,816] [INFO] [timer.py:197:stop] 0/4862, RunningAvgSamplesPerSec=12.007744963361064, CurrSamplesPerSec=11.930264282671162, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:27:10,307] [INFO] [timer.py:197:stop] 0/4863, RunningAvgSamplesPerSec=12.007713363227992, CurrSamplesPerSec=11.85607652537053, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:27:14,968] [INFO] [timer.py:197:stop] 0/4864, RunningAvgSamplesPerSec=12.008394703212264, CurrSamplesPerSec=16.582107810358114, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:27:21,478] [INFO] [timer.py:197:stop] 0/4865, RunningAvgSamplesPerSec=12.008381177794822, CurrSamplesPerSec=11.94297882972843, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:27:27,934] [INFO] [timer.py:197:stop] 0/4866, RunningAvgSamplesPerSec=12.008364423895229, CurrSamplesPerSec=11.927439382375324, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:27:34,425] [INFO] [timer.py:197:stop] 0/4867, RunningAvgSamplesPerSec=12.00836591740237, CurrSamplesPerSec=12.015634734276928, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:27:40,890] [INFO] [timer.py:197:stop] 0/4868, RunningAvgSamplesPerSec=12.008349845770836, CurrSamplesPerSec=11.930667267328717, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:27:47,408] [INFO] [timer.py:197:stop] 0/4869, RunningAvgSamplesPerSec=12.008328641805024, CurrSamplesPerSec=11.906029305720908, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:27:53,861] [INFO] [logging.py:68:log_dist] [Rank 0] step=4870, skipped=8, lr=[3.088888888888889e-07], mom=[[0.9, 0.999]] +[2022-12-20 03:27:53,861] [INFO] [timer.py:197:stop] 0/4870, RunningAvgSamplesPerSec=12.008328611170365, CurrSamplesPerSec=12.008179514139151, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:28:00,326] [INFO] [timer.py:197:stop] 0/4871, RunningAvgSamplesPerSec=12.008306063321829, CurrSamplesPerSec=11.899537548588162, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:28:06,755] [INFO] [timer.py:197:stop] 0/4872, RunningAvgSamplesPerSec=12.00829043936644, CurrSamplesPerSec=11.932696390507166, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:28:13,251] [INFO] [timer.py:197:stop] 0/4873, RunningAvgSamplesPerSec=12.008282719361015, CurrSamplesPerSec=11.970803659264242, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:28:19,675] [INFO] [timer.py:197:stop] 0/4874, RunningAvgSamplesPerSec=12.008281095340413, CurrSamplesPerSec=12.000375699838182, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:28:26,277] [INFO] [timer.py:197:stop] 0/4875, RunningAvgSamplesPerSec=12.008222075109458, CurrSamplesPerSec=11.72740136849959, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +{'loss': 0.0, 'learning_rate': 2.977777777777778e-07, 'epoch': 128.29} +[2022-12-20 03:28:32,766] [INFO] [timer.py:197:stop] 0/4876, RunningAvgSamplesPerSec=12.008204105355416, CurrSamplesPerSec=11.921271556321168, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:28:39,293] [INFO] [timer.py:197:stop] 0/4877, RunningAvgSamplesPerSec=12.00817659136585, CurrSamplesPerSec=11.875554784798378, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:28:45,727] [INFO] [timer.py:197:stop] 0/4878, RunningAvgSamplesPerSec=12.008175816171033, CurrSamplesPerSec=12.00439793061498, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:28:52,244] [INFO] [timer.py:197:stop] 0/4879, RunningAvgSamplesPerSec=12.008155371896537, CurrSamplesPerSec=11.909289993502663, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:28:58,801] [INFO] [logging.py:68:log_dist] [Rank 0] step=4880, skipped=8, lr=[2.866666666666667e-07], mom=[[0.9, 0.999]] +[2022-12-20 03:28:58,802] [INFO] [timer.py:197:stop] 0/4880, RunningAvgSamplesPerSec=12.008133710319106, CurrSamplesPerSec=11.903411695091547, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:29:05,322] [INFO] [timer.py:197:stop] 0/4881, RunningAvgSamplesPerSec=12.0081148219262, CurrSamplesPerSec=11.916678967251618, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:29:11,725] [INFO] [timer.py:197:stop] 0/4882, RunningAvgSamplesPerSec=12.008115647098359, CurrSamplesPerSec=12.012143012612695, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:29:18,262] [INFO] [timer.py:197:stop] 0/4883, RunningAvgSamplesPerSec=12.008090492924715, CurrSamplesPerSec=11.886580512444102, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:29:24,795] [INFO] [timer.py:197:stop] 0/4884, RunningAvgSamplesPerSec=12.008068384650926, CurrSamplesPerSec=11.901119195867258, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:29:31,320] [INFO] [timer.py:197:stop] 0/4885, RunningAvgSamplesPerSec=12.008045494787465, CurrSamplesPerSec=11.897327745105322, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:29:37,825] [INFO] [timer.py:197:stop] 0/4886, RunningAvgSamplesPerSec=12.008024669096367, CurrSamplesPerSec=11.907186954427669, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:29:44,307] [INFO] [timer.py:197:stop] 0/4887, RunningAvgSamplesPerSec=12.008024274805106, CurrSamplesPerSec=12.006098865126255, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:29:50,759] [INFO] [timer.py:197:stop] 0/4888, RunningAvgSamplesPerSec=12.008010013820456, CurrSamplesPerSec=11.938747017875645, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:29:57,235] [INFO] [timer.py:197:stop] 0/4889, RunningAvgSamplesPerSec=12.007993126780523, CurrSamplesPerSec=11.926046243489523, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:30:03,748] [INFO] [logging.py:68:log_dist] [Rank 0] step=4890, skipped=8, lr=[2.6444444444444447e-07], mom=[[0.9, 0.999]] +[2022-12-20 03:30:03,749] [INFO] [timer.py:197:stop] 0/4890, RunningAvgSamplesPerSec=12.007968774443079, CurrSamplesPerSec=11.890127058726812, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:30:10,262] [INFO] [timer.py:197:stop] 0/4891, RunningAvgSamplesPerSec=12.007942026023347, CurrSamplesPerSec=11.878604308514047, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:30:16,778] [INFO] [timer.py:197:stop] 0/4892, RunningAvgSamplesPerSec=12.007916276622876, CurrSamplesPerSec=11.88333382471777, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:30:23,246] [INFO] [timer.py:197:stop] 0/4893, RunningAvgSamplesPerSec=12.007913833042332, CurrSamplesPerSec=11.995976605393972, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:30:29,660] [INFO] [timer.py:197:stop] 0/4894, RunningAvgSamplesPerSec=12.007902064031938, CurrSamplesPerSec=11.95061450945089, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:30:36,152] [INFO] [timer.py:197:stop] 0/4895, RunningAvgSamplesPerSec=12.007869817149109, CurrSamplesPerSec=11.85216404778299, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:30:43,166] [INFO] [timer.py:197:stop] 0/4896, RunningAvgSamplesPerSec=12.007815815545746, CurrSamplesPerSec=11.749276250014335, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:30:50,179] [INFO] [timer.py:197:stop] 0/4897, RunningAvgSamplesPerSec=12.007804147923627, CurrSamplesPerSec=11.95097311212588, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:30:57,174] [INFO] [timer.py:197:stop] 0/4898, RunningAvgSamplesPerSec=12.007792873901577, CurrSamplesPerSec=11.95285905660138, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:31:04,004] [INFO] [timer.py:197:stop] 0/4899, RunningAvgSamplesPerSec=12.007778434767992, CurrSamplesPerSec=11.937498285100693, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:31:10,487] [INFO] [logging.py:68:log_dist] [Rank 0] step=4900, skipped=8, lr=[2.4222222222222224e-07], mom=[[0.9, 0.999]] +[2022-12-20 03:31:10,488] [INFO] [timer.py:197:stop] 0/4900, RunningAvgSamplesPerSec=12.007749121003041, CurrSamplesPerSec=11.865895780007767, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +{'loss': 0.0, 'learning_rate': 2.4222222222222224e-07, 'epoch': 128.95} +[2022-12-20 03:31:17,046] [INFO] [timer.py:197:stop] 0/4901, RunningAvgSamplesPerSec=12.007728328827085, CurrSamplesPerSec=11.90674488772877, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:31:21,781] [INFO] [timer.py:197:stop] 0/4902, RunningAvgSamplesPerSec=12.00841276460384, CurrSamplesPerSec=16.660789362634574, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:31:28,291] [INFO] [timer.py:197:stop] 0/4903, RunningAvgSamplesPerSec=12.00839103253242, CurrSamplesPerSec=11.902840070859545, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:31:34,804] [INFO] [timer.py:197:stop] 0/4904, RunningAvgSamplesPerSec=12.008375483262785, CurrSamplesPerSec=11.932649181421256, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:31:41,529] [INFO] [timer.py:197:stop] 0/4905, RunningAvgSamplesPerSec=12.008357497112145, CurrSamplesPerSec=11.920832148920022, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:31:47,956] [INFO] [timer.py:197:stop] 0/4906, RunningAvgSamplesPerSec=12.008343964938597, CurrSamplesPerSec=11.942360363576377, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:31:54,438] [INFO] [timer.py:197:stop] 0/4907, RunningAvgSamplesPerSec=12.008319706194541, CurrSamplesPerSec=11.890522069375665, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:32:00,878] [INFO] [timer.py:197:stop] 0/4908, RunningAvgSamplesPerSec=12.00831250488812, CurrSamplesPerSec=11.97309371397033, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:32:07,509] [INFO] [timer.py:197:stop] 0/4909, RunningAvgSamplesPerSec=12.008297275363606, CurrSamplesPerSec=11.934043332905205, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:32:14,127] [INFO] [logging.py:68:log_dist] [Rank 0] step=4910, skipped=8, lr=[2.2e-07], mom=[[0.9, 0.999]] +[2022-12-20 03:32:14,127] [INFO] [timer.py:197:stop] 0/4910, RunningAvgSamplesPerSec=12.008300129813518, CurrSamplesPerSec=12.022323275820565, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:32:20,725] [INFO] [timer.py:197:stop] 0/4911, RunningAvgSamplesPerSec=12.008295576658654, CurrSamplesPerSec=11.985990210293302, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:32:27,205] [INFO] [timer.py:197:stop] 0/4912, RunningAvgSamplesPerSec=12.00827995655084, CurrSamplesPerSec=11.932087475007712, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:32:33,653] [INFO] [timer.py:197:stop] 0/4913, RunningAvgSamplesPerSec=12.008267326600874, CurrSamplesPerSec=11.946572939103401, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:32:40,141] [INFO] [timer.py:197:stop] 0/4914, RunningAvgSamplesPerSec=12.008250247885666, CurrSamplesPerSec=11.924958560245226, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:32:46,640] [INFO] [timer.py:197:stop] 0/4915, RunningAvgSamplesPerSec=12.008227771700211, CurrSamplesPerSec=11.898830744113273, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:32:53,124] [INFO] [timer.py:197:stop] 0/4916, RunningAvgSamplesPerSec=12.008212936165913, CurrSamplesPerSec=11.935765782554002, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:32:59,578] [INFO] [timer.py:197:stop] 0/4917, RunningAvgSamplesPerSec=12.008216759002758, CurrSamplesPerSec=12.027031618850447, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:33:06,049] [INFO] [timer.py:197:stop] 0/4918, RunningAvgSamplesPerSec=12.008219457853505, CurrSamplesPerSec=12.021498981442404, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:33:12,579] [INFO] [timer.py:197:stop] 0/4919, RunningAvgSamplesPerSec=12.008191156402466, CurrSamplesPerSec=11.870655074996579, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:33:19,095] [INFO] [logging.py:68:log_dist] [Rank 0] step=4920, skipped=8, lr=[1.9777777777777778e-07], mom=[[0.9, 0.999]] +[2022-12-20 03:33:19,096] [INFO] [timer.py:197:stop] 0/4920, RunningAvgSamplesPerSec=12.008193697913676, CurrSamplesPerSec=12.020703329626395, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:33:25,657] [INFO] [timer.py:197:stop] 0/4921, RunningAvgSamplesPerSec=12.008194033671913, CurrSamplesPerSec=12.009845519829891, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:33:32,303] [INFO] [timer.py:197:stop] 0/4922, RunningAvgSamplesPerSec=12.00818348627541, CurrSamplesPerSec=11.956524088211635, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:33:38,865] [INFO] [timer.py:197:stop] 0/4923, RunningAvgSamplesPerSec=12.008165825894736, CurrSamplesPerSec=11.921901077101861, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:33:45,452] [INFO] [timer.py:197:stop] 0/4924, RunningAvgSamplesPerSec=12.008148438817217, CurrSamplesPerSec=11.923192093853508, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:33:51,891] [INFO] [timer.py:197:stop] 0/4925, RunningAvgSamplesPerSec=12.008150382130012, CurrSamplesPerSec=12.017722994227599, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +{'loss': 0.0, 'learning_rate': 1.866666666666667e-07, 'epoch': 129.61} +[2022-12-20 03:33:58,364] [INFO] [timer.py:197:stop] 0/4926, RunningAvgSamplesPerSec=12.008135168154919, CurrSamplesPerSec=11.9337011308578, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:34:04,934] [INFO] [timer.py:197:stop] 0/4927, RunningAvgSamplesPerSec=12.00810935076479, CurrSamplesPerSec=11.88231650841311, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:34:11,526] [INFO] [timer.py:197:stop] 0/4928, RunningAvgSamplesPerSec=12.00809240839249, CurrSamplesPerSec=11.92522715134624, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:34:18,072] [INFO] [timer.py:197:stop] 0/4929, RunningAvgSamplesPerSec=12.008094497762688, CurrSamplesPerSec=12.018395566281338, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:34:24,680] [INFO] [logging.py:68:log_dist] [Rank 0] step=4930, skipped=8, lr=[1.7555555555555558e-07], mom=[[0.9, 0.999]] +[2022-12-20 03:34:24,681] [INFO] [timer.py:197:stop] 0/4930, RunningAvgSamplesPerSec=12.008098823678125, CurrSamplesPerSec=12.029450514939464, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:34:31,300] [INFO] [timer.py:197:stop] 0/4931, RunningAvgSamplesPerSec=12.008081812265713, CurrSamplesPerSec=11.924830891005715, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:34:37,788] [INFO] [timer.py:197:stop] 0/4932, RunningAvgSamplesPerSec=12.008083049039204, CurrSamplesPerSec=12.014182202516514, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:34:44,233] [INFO] [timer.py:197:stop] 0/4933, RunningAvgSamplesPerSec=12.008088521627192, CurrSamplesPerSec=12.035129147810647, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:34:50,661] [INFO] [timer.py:197:stop] 0/4934, RunningAvgSamplesPerSec=12.008068189695962, CurrSamplesPerSec=11.90864172818948, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:34:57,351] [INFO] [timer.py:197:stop] 0/4935, RunningAvgSamplesPerSec=12.008043222075518, CurrSamplesPerSec=11.886153132234142, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:35:03,959] [INFO] [timer.py:197:stop] 0/4936, RunningAvgSamplesPerSec=12.00802450461002, CurrSamplesPerSec=11.916395949244635, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:35:10,385] [INFO] [timer.py:197:stop] 0/4937, RunningAvgSamplesPerSec=12.008029313856795, CurrSamplesPerSec=12.03180512986515, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:35:16,851] [INFO] [timer.py:197:stop] 0/4938, RunningAvgSamplesPerSec=12.008008314881836, CurrSamplesPerSec=11.905265233034521, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:35:23,371] [INFO] [timer.py:197:stop] 0/4939, RunningAvgSamplesPerSec=12.007992513045728, CurrSamplesPerSec=11.930498116505838, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:35:28,029] [INFO] [logging.py:68:log_dist] [Rank 0] step=4940, skipped=8, lr=[1.5333333333333333e-07], mom=[[0.9, 0.999]] +[2022-12-20 03:35:28,030] [INFO] [timer.py:197:stop] 0/4940, RunningAvgSamplesPerSec=12.008651081118746, CurrSamplesPerSec=16.46747615337931, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:35:34,505] [INFO] [timer.py:197:stop] 0/4941, RunningAvgSamplesPerSec=12.00864952877018, CurrSamplesPerSec=12.000988922547425, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:35:41,009] [INFO] [timer.py:197:stop] 0/4942, RunningAvgSamplesPerSec=12.00863608002774, CurrSamplesPerSec=11.942578200875573, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:35:47,462] [INFO] [timer.py:197:stop] 0/4943, RunningAvgSamplesPerSec=12.00862520235678, CurrSamplesPerSec=11.955128939121114, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:35:54,136] [INFO] [timer.py:197:stop] 0/4944, RunningAvgSamplesPerSec=12.008601125897082, CurrSamplesPerSec=11.890806493294674, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:36:00,694] [INFO] [timer.py:197:stop] 0/4945, RunningAvgSamplesPerSec=12.008601521839658, CurrSamplesPerSec=12.010558589019146, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:36:07,162] [INFO] [timer.py:197:stop] 0/4946, RunningAvgSamplesPerSec=12.008585335923078, CurrSamplesPerSec=11.929107973897546, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:36:13,687] [INFO] [timer.py:197:stop] 0/4947, RunningAvgSamplesPerSec=12.008575117926801, CurrSamplesPerSec=11.958269015268167, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:36:20,199] [INFO] [timer.py:197:stop] 0/4948, RunningAvgSamplesPerSec=12.008547910479841, CurrSamplesPerSec=11.875498044761086, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:36:26,667] [INFO] [timer.py:197:stop] 0/4949, RunningAvgSamplesPerSec=12.008540719848433, CurrSamplesPerSec=11.97308089705068, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:36:33,135] [INFO] [logging.py:68:log_dist] [Rank 0] step=4950, skipped=8, lr=[1.3111111111111113e-07], mom=[[0.9, 0.999]] +[2022-12-20 03:36:33,136] [INFO] [timer.py:197:stop] 0/4950, RunningAvgSamplesPerSec=12.008542975135665, CurrSamplesPerSec=12.019710258466205, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +{'loss': 0.0, 'learning_rate': 1.3111111111111113e-07, 'epoch': 130.26} +[2022-12-20 03:36:39,765] [INFO] [timer.py:197:stop] 0/4951, RunningAvgSamplesPerSec=12.008526611752208, CurrSamplesPerSec=11.928102946727659, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:36:46,391] [INFO] [timer.py:197:stop] 0/4952, RunningAvgSamplesPerSec=12.008528960460607, CurrSamplesPerSec=12.02016398281648, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:36:53,041] [INFO] [timer.py:197:stop] 0/4953, RunningAvgSamplesPerSec=12.008514224242036, CurrSamplesPerSec=11.936010446809375, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:36:59,500] [INFO] [timer.py:197:stop] 0/4954, RunningAvgSamplesPerSec=12.008518553104436, CurrSamplesPerSec=12.029989078087217, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:37:05,982] [INFO] [timer.py:197:stop] 0/4955, RunningAvgSamplesPerSec=12.008507816400765, CurrSamplesPerSec=11.955574073459616, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:37:12,458] [INFO] [timer.py:197:stop] 0/4956, RunningAvgSamplesPerSec=12.008495633259239, CurrSamplesPerSec=11.94845430391108, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:37:18,969] [INFO] [timer.py:197:stop] 0/4957, RunningAvgSamplesPerSec=12.008467671467148, CurrSamplesPerSec=11.871524963969993, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:37:25,486] [INFO] [timer.py:197:stop] 0/4958, RunningAvgSamplesPerSec=12.00843664402419, CurrSamplesPerSec=11.856639477147093, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:37:32,117] [INFO] [timer.py:197:stop] 0/4959, RunningAvgSamplesPerSec=12.008418253593625, CurrSamplesPerSec=11.917961974469094, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:37:38,660] [INFO] [logging.py:68:log_dist] [Rank 0] step=4960, skipped=8, lr=[1.088888888888889e-07], mom=[[0.9, 0.999]] +[2022-12-20 03:37:38,661] [INFO] [timer.py:197:stop] 0/4960, RunningAvgSamplesPerSec=12.00842141614929, CurrSamplesPerSec=12.024118701256736, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:37:45,265] [INFO] [timer.py:197:stop] 0/4961, RunningAvgSamplesPerSec=12.008420475184472, CurrSamplesPerSec=12.003756983752352, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:37:51,913] [INFO] [timer.py:197:stop] 0/4962, RunningAvgSamplesPerSec=12.008424730661805, CurrSamplesPerSec=12.02956480058566, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:37:58,357] [INFO] [timer.py:197:stop] 0/4963, RunningAvgSamplesPerSec=12.008423865314539, CurrSamplesPerSec=12.004133276755004, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:38:04,799] [INFO] [timer.py:197:stop] 0/4964, RunningAvgSamplesPerSec=12.00840722106865, CurrSamplesPerSec=11.926399134161821, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:38:11,334] [INFO] [timer.py:197:stop] 0/4965, RunningAvgSamplesPerSec=12.008382745640192, CurrSamplesPerSec=11.888151872603833, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:38:17,806] [INFO] [timer.py:197:stop] 0/4966, RunningAvgSamplesPerSec=12.008367631754806, CurrSamplesPerSec=11.933823154142758, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:38:24,410] [INFO] [timer.py:197:stop] 0/4967, RunningAvgSamplesPerSec=12.00836810130194, CurrSamplesPerSec=12.010699385861837, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:38:30,960] [INFO] [timer.py:197:stop] 0/4968, RunningAvgSamplesPerSec=12.008349833284846, CurrSamplesPerSec=11.918329203464383, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:38:37,458] [INFO] [timer.py:197:stop] 0/4969, RunningAvgSamplesPerSec=12.008328084912334, CurrSamplesPerSec=11.90128856994535, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:38:43,960] [INFO] [logging.py:68:log_dist] [Rank 0] step=4970, skipped=8, lr=[8.666666666666668e-08], mom=[[0.9, 0.999]] +[2022-12-20 03:38:43,961] [INFO] [timer.py:197:stop] 0/4970, RunningAvgSamplesPerSec=12.00831246666597, CurrSamplesPerSec=11.931234673455371, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:38:50,472] [INFO] [timer.py:197:stop] 0/4971, RunningAvgSamplesPerSec=12.008295299518881, CurrSamplesPerSec=11.923610489863293, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:38:56,898] [INFO] [timer.py:197:stop] 0/4972, RunningAvgSamplesPerSec=12.008277537404123, CurrSamplesPerSec=11.920661688745472, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:39:03,372] [INFO] [timer.py:197:stop] 0/4973, RunningAvgSamplesPerSec=12.008264933199136, CurrSamplesPerSec=11.945947189599819, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:39:09,805] [INFO] [timer.py:197:stop] 0/4974, RunningAvgSamplesPerSec=12.0082443087022, CurrSamplesPerSec=11.906588033241784, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:39:16,278] [INFO] [timer.py:197:stop] 0/4975, RunningAvgSamplesPerSec=12.008215835506984, CurrSamplesPerSec=11.868296989042905, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +{'loss': 0.0, 'learning_rate': 7.555555555555556e-08, 'epoch': 130.92} +[2022-12-20 03:39:22,858] [INFO] [timer.py:197:stop] 0/4976, RunningAvgSamplesPerSec=12.008162542120141, CurrSamplesPerSec=11.748858686113625, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:39:29,379] [INFO] [timer.py:197:stop] 0/4977, RunningAvgSamplesPerSec=12.008137926051889, CurrSamplesPerSec=11.886933703273394, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:39:34,000] [INFO] [timer.py:197:stop] 0/4978, RunningAvgSamplesPerSec=12.008788725109264, CurrSamplesPerSec=16.442007651873855, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:39:40,533] [INFO] [timer.py:197:stop] 0/4979, RunningAvgSamplesPerSec=12.008754640537806, CurrSamplesPerSec=11.841512323118716, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:39:47,031] [INFO] [logging.py:68:log_dist] [Rank 0] step=4980, skipped=8, lr=[6.444444444444445e-08], mom=[[0.9, 0.999]] +[2022-12-20 03:39:47,031] [INFO] [timer.py:197:stop] 0/4980, RunningAvgSamplesPerSec=12.00875388989061, CurrSamplesPerSec=12.005019080942184, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:39:53,496] [INFO] [timer.py:197:stop] 0/4981, RunningAvgSamplesPerSec=12.008748184326464, CurrSamplesPerSec=11.980412916178446, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:39:59,934] [INFO] [timer.py:197:stop] 0/4982, RunningAvgSamplesPerSec=12.008738119300284, CurrSamplesPerSec=11.958832656683786, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:40:06,394] [INFO] [timer.py:197:stop] 0/4983, RunningAvgSamplesPerSec=12.008721399193758, CurrSamplesPerSec=11.926028758458687, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:40:12,790] [INFO] [timer.py:197:stop] 0/4984, RunningAvgSamplesPerSec=12.00872402785835, CurrSamplesPerSec=12.021831700643261, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:40:19,271] [INFO] [timer.py:197:stop] 0/4985, RunningAvgSamplesPerSec=12.008705448971128, CurrSamplesPerSec=11.916853545810055, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:40:25,734] [INFO] [timer.py:197:stop] 0/4986, RunningAvgSamplesPerSec=12.008705350771514, CurrSamplesPerSec=12.008216042038594, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:40:32,183] [INFO] [timer.py:197:stop] 0/4987, RunningAvgSamplesPerSec=12.008687509492566, CurrSamplesPerSec=11.920420300068045, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:40:38,610] [INFO] [timer.py:197:stop] 0/4988, RunningAvgSamplesPerSec=12.008669093707226, CurrSamplesPerSec=11.917563022217992, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:40:45,102] [INFO] [timer.py:197:stop] 0/4989, RunningAvgSamplesPerSec=12.00864853574812, CurrSamplesPerSec=11.907014243459093, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:40:51,538] [INFO] [logging.py:68:log_dist] [Rank 0] step=4990, skipped=8, lr=[4.222222222222222e-08], mom=[[0.9, 0.999]] +[2022-12-20 03:40:51,539] [INFO] [timer.py:197:stop] 0/4990, RunningAvgSamplesPerSec=12.00865085939925, CurrSamplesPerSec=12.020250102812403, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:40:58,002] [INFO] [timer.py:197:stop] 0/4991, RunningAvgSamplesPerSec=12.008650174509231, CurrSamplesPerSec=12.00523491485884, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:41:04,484] [INFO] [timer.py:197:stop] 0/4992, RunningAvgSamplesPerSec=12.00864119794204, CurrSamplesPerSec=11.964023531249076, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:41:11,029] [INFO] [timer.py:197:stop] 0/4993, RunningAvgSamplesPerSec=12.008616216765558, CurrSamplesPerSec=11.885241104591671, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:41:17,569] [INFO] [timer.py:197:stop] 0/4994, RunningAvgSamplesPerSec=12.008588982897615, CurrSamplesPerSec=11.874186345250775, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:41:24,043] [INFO] [timer.py:197:stop] 0/4995, RunningAvgSamplesPerSec=12.008567272225012, CurrSamplesPerSec=11.901157185895284, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:41:30,553] [INFO] [timer.py:197:stop] 0/4996, RunningAvgSamplesPerSec=12.008544185573175, CurrSamplesPerSec=11.89436873814182, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:41:37,062] [INFO] [timer.py:197:stop] 0/4997, RunningAvgSamplesPerSec=12.008523161674619, CurrSamplesPerSec=11.904440017973153, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:41:43,507] [INFO] [timer.py:197:stop] 0/4998, RunningAvgSamplesPerSec=12.008524828049, CurrSamplesPerSec=12.016854142583346, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:41:49,927] [INFO] [timer.py:197:stop] 0/4999, RunningAvgSamplesPerSec=12.008525593184363, CurrSamplesPerSec=12.012349426920148, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +[2022-12-20 03:41:56,393] [INFO] [logging.py:68:log_dist] [Rank 0] step=5000, skipped=8, lr=[2e-08], mom=[[0.9, 0.999]] +[2022-12-20 03:41:56,394] [INFO] [timer.py:197:stop] 0/5000, RunningAvgSamplesPerSec=12.008528795640862, CurrSamplesPerSec=12.024552828812292, MemAllocated=1.52GB, MaxMemAllocated=26.06GB +{'loss': 0.0, 'learning_rate': 2e-08, 'epoch': 131.58} +{'eval_loss': 0.451171875, 'eval_wer': 17.988338192419825, 'eval_runtime': 166.9, 'eval_samples_per_second': 7.232, 'eval_steps_per_second': 0.228, 'epoch': 131.58} +[2022-12-20 03:44:45,126] [INFO] [logging.py:68:log_dist] [Rank 0] [Torch] Checkpoint global_step5000 is begin to save! +[2022-12-20 03:44:45,134] [INFO] [logging.py:68:log_dist] [Rank 0] Saving model checkpoint: ./checkpoint-5000/global_step5000/mp_rank_00_model_states.pt +[2022-12-20 03:44:45,134] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving ./checkpoint-5000/global_step5000/mp_rank_00_model_states.pt... +[2022-12-20 03:44:46,917] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved ./checkpoint-5000/global_step5000/mp_rank_00_model_states.pt. +[2022-12-20 03:44:46,918] [INFO] [torch_checkpoint_engine.py:15:save] [Torch] Saving ./checkpoint-5000/global_step5000/zero_pp_rank_0_mp_rank_00_optim_states.pt... +[2022-12-20 03:44:54,246] [INFO] [torch_checkpoint_engine.py:17:save] [Torch] Saved ./checkpoint-5000/global_step5000/zero_pp_rank_0_mp_rank_00_optim_states.pt. +[2022-12-20 03:44:54,246] [INFO] [engine.py:3269:_save_zero_checkpoint] zero checkpoint saved ./checkpoint-5000/global_step5000/zero_pp_rank_0_mp_rank_00_optim_states.pt +[2022-12-20 03:44:54,246] [INFO] [torch_checkpoint_engine.py:27:commit] [Torch] Checkpoint global_step5000 is ready now!