azherali commited on
Commit
62beafe
·
verified ·
1 Parent(s): d0175c9

Training in progress, step 64000, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4431a569e16eb6669f33895a534eefcadcc6125a0fcf8243e551a17cbe6b776e
3
  size 3555504
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:926ed83f6e74d30dd04cd576ac59c6374f40022ad71666f1151acf89ef6a727f
3
  size 3555504
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b5c6c8f3d8741838a0a0349b3de3a3bd22b9e28163b525970af04b332fd7c349
3
  size 7141515
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:63b80956cda8d577cf7bfd206ddcea2443accc461272fcb58c947316dd066ab0
3
  size 7141515
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7dc3fe5842c642250bc8fc58f8686955286eff74353ef4cfdb5c91b975ca0c5b
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c8a9e9fcdb822872caeabe3003c6e6517d9f7eeb88433b860fc3482c1c47480d
3
  size 14645
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:31d1175317fe37be2b1293cac47d5356f1e200c454ceace1ce4e9dd0eded0aa5
3
  size 1383
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1da3ebf38e6de5f93a849d5bb335be74fb8ae94c32eaa092e9a693e9450f5852
3
  size 1383
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d63d134ebc965a606838bb6f037c88484af3b1856e8a790a7b2291b1079013ce
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f0f4b19a41457d0a78e68093b636e25cec379bf5b743c5d43b46b713a83c2f2b
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": 52000,
3
  "best_metric": 0.9908199660129274,
4
  "best_model_checkpoint": "CodeGenDetect-CodeBert_Lora/checkpoint-52000",
5
- "epoch": 1.92,
6
  "eval_steps": 4000,
7
- "global_step": 60000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -4388,6 +4388,298 @@
4388
  "eval_samples_per_second": 130.728,
4389
  "eval_steps_per_second": 8.171,
4390
  "step": 60000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4391
  }
4392
  ],
4393
  "logging_steps": 100,
@@ -4402,7 +4694,7 @@
4402
  "early_stopping_threshold": 0.0
4403
  },
4404
  "attributes": {
4405
- "early_stopping_patience_counter": 2
4406
  }
4407
  },
4408
  "TrainerControl": {
@@ -4411,12 +4703,12 @@
4411
  "should_evaluate": false,
4412
  "should_log": false,
4413
  "should_save": true,
4414
- "should_training_stop": false
4415
  },
4416
  "attributes": {}
4417
  }
4418
  },
4419
- "total_flos": 2.5479625112655552e+17,
4420
  "train_batch_size": 16,
4421
  "trial_name": null,
4422
  "trial_params": null
 
2
  "best_global_step": 52000,
3
  "best_metric": 0.9908199660129274,
4
  "best_model_checkpoint": "CodeGenDetect-CodeBert_Lora/checkpoint-52000",
5
+ "epoch": 2.048,
6
  "eval_steps": 4000,
7
+ "global_step": 64000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
4388
  "eval_samples_per_second": 130.728,
4389
  "eval_steps_per_second": 8.171,
4390
  "step": 60000
4391
+ },
4392
+ {
4393
+ "epoch": 1.9232,
4394
+ "grad_norm": 0.016066577285528183,
4395
+ "learning_rate": 1.2346837881219905e-05,
4396
+ "loss": 0.0324,
4397
+ "step": 60100
4398
+ },
4399
+ {
4400
+ "epoch": 1.9264000000000001,
4401
+ "grad_norm": 1.0818510055541992,
4402
+ "learning_rate": 1.2333996789727128e-05,
4403
+ "loss": 0.0333,
4404
+ "step": 60200
4405
+ },
4406
+ {
4407
+ "epoch": 1.9296,
4408
+ "grad_norm": 1.9333336353302002,
4409
+ "learning_rate": 1.2321155698234351e-05,
4410
+ "loss": 0.0465,
4411
+ "step": 60300
4412
+ },
4413
+ {
4414
+ "epoch": 1.9327999999999999,
4415
+ "grad_norm": 0.015184523537755013,
4416
+ "learning_rate": 1.2308314606741574e-05,
4417
+ "loss": 0.031,
4418
+ "step": 60400
4419
+ },
4420
+ {
4421
+ "epoch": 1.936,
4422
+ "grad_norm": 3.1265370845794678,
4423
+ "learning_rate": 1.2295473515248797e-05,
4424
+ "loss": 0.0265,
4425
+ "step": 60500
4426
+ },
4427
+ {
4428
+ "epoch": 1.9392,
4429
+ "grad_norm": 0.06526759266853333,
4430
+ "learning_rate": 1.228263242375602e-05,
4431
+ "loss": 0.0396,
4432
+ "step": 60600
4433
+ },
4434
+ {
4435
+ "epoch": 1.9424000000000001,
4436
+ "grad_norm": 0.051179856061935425,
4437
+ "learning_rate": 1.2269791332263244e-05,
4438
+ "loss": 0.0457,
4439
+ "step": 60700
4440
+ },
4441
+ {
4442
+ "epoch": 1.9456,
4443
+ "grad_norm": 0.012131915427744389,
4444
+ "learning_rate": 1.2256950240770467e-05,
4445
+ "loss": 0.0377,
4446
+ "step": 60800
4447
+ },
4448
+ {
4449
+ "epoch": 1.9487999999999999,
4450
+ "grad_norm": 6.335480690002441,
4451
+ "learning_rate": 1.224410914927769e-05,
4452
+ "loss": 0.0378,
4453
+ "step": 60900
4454
+ },
4455
+ {
4456
+ "epoch": 1.952,
4457
+ "grad_norm": 0.6301658749580383,
4458
+ "learning_rate": 1.2231268057784913e-05,
4459
+ "loss": 0.0442,
4460
+ "step": 61000
4461
+ },
4462
+ {
4463
+ "epoch": 1.9552,
4464
+ "grad_norm": 0.3388197124004364,
4465
+ "learning_rate": 1.2218426966292136e-05,
4466
+ "loss": 0.0489,
4467
+ "step": 61100
4468
+ },
4469
+ {
4470
+ "epoch": 1.9584000000000001,
4471
+ "grad_norm": 0.007041617762297392,
4472
+ "learning_rate": 1.220558587479936e-05,
4473
+ "loss": 0.0384,
4474
+ "step": 61200
4475
+ },
4476
+ {
4477
+ "epoch": 1.9616,
4478
+ "grad_norm": 0.10959050804376602,
4479
+ "learning_rate": 1.2192744783306582e-05,
4480
+ "loss": 0.0463,
4481
+ "step": 61300
4482
+ },
4483
+ {
4484
+ "epoch": 1.9647999999999999,
4485
+ "grad_norm": 3.765045642852783,
4486
+ "learning_rate": 1.2179903691813806e-05,
4487
+ "loss": 0.043,
4488
+ "step": 61400
4489
+ },
4490
+ {
4491
+ "epoch": 1.968,
4492
+ "grad_norm": 2.442903757095337,
4493
+ "learning_rate": 1.2167062600321029e-05,
4494
+ "loss": 0.0431,
4495
+ "step": 61500
4496
+ },
4497
+ {
4498
+ "epoch": 1.9712,
4499
+ "grad_norm": 3.0291242599487305,
4500
+ "learning_rate": 1.2154221508828252e-05,
4501
+ "loss": 0.0439,
4502
+ "step": 61600
4503
+ },
4504
+ {
4505
+ "epoch": 1.9744000000000002,
4506
+ "grad_norm": 0.1264086663722992,
4507
+ "learning_rate": 1.2141380417335475e-05,
4508
+ "loss": 0.029,
4509
+ "step": 61700
4510
+ },
4511
+ {
4512
+ "epoch": 1.9776,
4513
+ "grad_norm": 0.09380912035703659,
4514
+ "learning_rate": 1.2128539325842698e-05,
4515
+ "loss": 0.05,
4516
+ "step": 61800
4517
+ },
4518
+ {
4519
+ "epoch": 1.9808,
4520
+ "grad_norm": 0.012730620801448822,
4521
+ "learning_rate": 1.211569823434992e-05,
4522
+ "loss": 0.0235,
4523
+ "step": 61900
4524
+ },
4525
+ {
4526
+ "epoch": 1.984,
4527
+ "grad_norm": 0.04120282083749771,
4528
+ "learning_rate": 1.2102857142857143e-05,
4529
+ "loss": 0.0342,
4530
+ "step": 62000
4531
+ },
4532
+ {
4533
+ "epoch": 1.9872,
4534
+ "grad_norm": 0.22701594233512878,
4535
+ "learning_rate": 1.2090016051364366e-05,
4536
+ "loss": 0.0499,
4537
+ "step": 62100
4538
+ },
4539
+ {
4540
+ "epoch": 1.9904,
4541
+ "grad_norm": 0.005121626891195774,
4542
+ "learning_rate": 1.2077174959871589e-05,
4543
+ "loss": 0.0223,
4544
+ "step": 62200
4545
+ },
4546
+ {
4547
+ "epoch": 1.9936,
4548
+ "grad_norm": 4.5423583984375,
4549
+ "learning_rate": 1.2064333868378812e-05,
4550
+ "loss": 0.0351,
4551
+ "step": 62300
4552
+ },
4553
+ {
4554
+ "epoch": 1.9968,
4555
+ "grad_norm": 0.6799706816673279,
4556
+ "learning_rate": 1.2051492776886035e-05,
4557
+ "loss": 0.0374,
4558
+ "step": 62400
4559
+ },
4560
+ {
4561
+ "epoch": 2.0,
4562
+ "grad_norm": 0.4670400023460388,
4563
+ "learning_rate": 1.2038651685393258e-05,
4564
+ "loss": 0.0389,
4565
+ "step": 62500
4566
+ },
4567
+ {
4568
+ "epoch": 2.0032,
4569
+ "grad_norm": 0.0014514782233163714,
4570
+ "learning_rate": 1.2025810593900481e-05,
4571
+ "loss": 0.0407,
4572
+ "step": 62600
4573
+ },
4574
+ {
4575
+ "epoch": 2.0064,
4576
+ "grad_norm": 10.322093963623047,
4577
+ "learning_rate": 1.2012969502407705e-05,
4578
+ "loss": 0.0364,
4579
+ "step": 62700
4580
+ },
4581
+ {
4582
+ "epoch": 2.0096,
4583
+ "grad_norm": 0.7346168160438538,
4584
+ "learning_rate": 1.2000128410914928e-05,
4585
+ "loss": 0.063,
4586
+ "step": 62800
4587
+ },
4588
+ {
4589
+ "epoch": 2.0128,
4590
+ "grad_norm": 15.471858978271484,
4591
+ "learning_rate": 1.1987287319422153e-05,
4592
+ "loss": 0.0365,
4593
+ "step": 62900
4594
+ },
4595
+ {
4596
+ "epoch": 2.016,
4597
+ "grad_norm": 0.22428256273269653,
4598
+ "learning_rate": 1.1974446227929376e-05,
4599
+ "loss": 0.0437,
4600
+ "step": 63000
4601
+ },
4602
+ {
4603
+ "epoch": 2.0192,
4604
+ "grad_norm": 5.6665849685668945,
4605
+ "learning_rate": 1.1961605136436599e-05,
4606
+ "loss": 0.0448,
4607
+ "step": 63100
4608
+ },
4609
+ {
4610
+ "epoch": 2.0224,
4611
+ "grad_norm": 0.011902675963938236,
4612
+ "learning_rate": 1.1948764044943822e-05,
4613
+ "loss": 0.0309,
4614
+ "step": 63200
4615
+ },
4616
+ {
4617
+ "epoch": 2.0256,
4618
+ "grad_norm": 0.01199142262339592,
4619
+ "learning_rate": 1.1935922953451045e-05,
4620
+ "loss": 0.0181,
4621
+ "step": 63300
4622
+ },
4623
+ {
4624
+ "epoch": 2.0288,
4625
+ "grad_norm": 0.2028602510690689,
4626
+ "learning_rate": 1.1923081861958268e-05,
4627
+ "loss": 0.0433,
4628
+ "step": 63400
4629
+ },
4630
+ {
4631
+ "epoch": 2.032,
4632
+ "grad_norm": 0.05186166614294052,
4633
+ "learning_rate": 1.1910240770465491e-05,
4634
+ "loss": 0.0349,
4635
+ "step": 63500
4636
+ },
4637
+ {
4638
+ "epoch": 2.0352,
4639
+ "grad_norm": 8.039327621459961,
4640
+ "learning_rate": 1.1897399678972715e-05,
4641
+ "loss": 0.0447,
4642
+ "step": 63600
4643
+ },
4644
+ {
4645
+ "epoch": 2.0384,
4646
+ "grad_norm": 0.013709252700209618,
4647
+ "learning_rate": 1.1884558587479938e-05,
4648
+ "loss": 0.0264,
4649
+ "step": 63700
4650
+ },
4651
+ {
4652
+ "epoch": 2.0416,
4653
+ "grad_norm": 0.12741540372371674,
4654
+ "learning_rate": 1.187171749598716e-05,
4655
+ "loss": 0.0285,
4656
+ "step": 63800
4657
+ },
4658
+ {
4659
+ "epoch": 2.0448,
4660
+ "grad_norm": 38.967525482177734,
4661
+ "learning_rate": 1.1858876404494384e-05,
4662
+ "loss": 0.0406,
4663
+ "step": 63900
4664
+ },
4665
+ {
4666
+ "epoch": 2.048,
4667
+ "grad_norm": 10.748799324035645,
4668
+ "learning_rate": 1.1846035313001607e-05,
4669
+ "loss": 0.0358,
4670
+ "step": 64000
4671
+ },
4672
+ {
4673
+ "epoch": 2.048,
4674
+ "eval_accuracy": 0.99072,
4675
+ "eval_f1": 0.9907211797405157,
4676
+ "eval_loss": 0.038402359932661057,
4677
+ "eval_precision": 0.990739513426471,
4678
+ "eval_recall": 0.99072,
4679
+ "eval_runtime": 764.7435,
4680
+ "eval_samples_per_second": 130.763,
4681
+ "eval_steps_per_second": 8.173,
4682
+ "step": 64000
4683
  }
4684
  ],
4685
  "logging_steps": 100,
 
4694
  "early_stopping_threshold": 0.0
4695
  },
4696
  "attributes": {
4697
+ "early_stopping_patience_counter": 3
4698
  }
4699
  },
4700
  "TrainerControl": {
 
4703
  "should_evaluate": false,
4704
  "should_log": false,
4705
  "should_save": true,
4706
+ "should_training_stop": true
4707
  },
4708
  "attributes": {}
4709
  }
4710
  },
4711
+ "total_flos": 2.7177787451207846e+17,
4712
  "train_batch_size": 16,
4713
  "trial_name": null,
4714
  "trial_params": null