versae commited on
Commit
e438145
·
1 Parent(s): 821784a

Step... (16000/50000 | Loss: 1.6991859674453735, Acc: 0.659552812576294): 34%|█████████▍ | 16847/50000 [6:34:07<17:05:58, 1.86s/it]

Browse files
Files changed (25) hide show
  1. flax_model.msgpack +1 -1
  2. outputs/checkpoints/checkpoint-10000/training_state.json +0 -1
  3. outputs/checkpoints/checkpoint-11000/training_state.json +0 -1
  4. outputs/checkpoints/{checkpoint-10000 → checkpoint-15000}/config.json +0 -0
  5. outputs/checkpoints/{checkpoint-10000 → checkpoint-15000}/data_collator.joblib +0 -0
  6. outputs/checkpoints/{checkpoint-10000 → checkpoint-15000}/flax_model.msgpack +1 -1
  7. outputs/checkpoints/{checkpoint-10000 → checkpoint-15000}/optimizer_state.msgpack +1 -1
  8. outputs/checkpoints/{checkpoint-10000 → checkpoint-15000}/training_args.joblib +0 -0
  9. outputs/checkpoints/checkpoint-15000/training_state.json +1 -0
  10. outputs/checkpoints/{checkpoint-11000 → checkpoint-16000}/config.json +0 -0
  11. outputs/checkpoints/{checkpoint-11000 → checkpoint-16000}/data_collator.joblib +0 -0
  12. outputs/checkpoints/{checkpoint-11000 → checkpoint-16000}/flax_model.msgpack +1 -1
  13. outputs/checkpoints/{checkpoint-11000 → checkpoint-16000}/optimizer_state.msgpack +1 -1
  14. outputs/checkpoints/{checkpoint-11000 → checkpoint-16000}/training_args.joblib +0 -0
  15. outputs/checkpoints/checkpoint-16000/training_state.json +1 -0
  16. outputs/events.out.tfevents.1627258355.tablespoon.3000110.3.v2 +2 -2
  17. outputs/flax_model.msgpack +1 -1
  18. outputs/optimizer_state.msgpack +1 -1
  19. outputs/training_state.json +1 -1
  20. pytorch_model.bin +1 -1
  21. run_stream.512.log +0 -0
  22. wandb/run-20210726_001233-17u6inbn/files/output.log +1725 -0
  23. wandb/run-20210726_001233-17u6inbn/files/wandb-summary.json +1 -1
  24. wandb/run-20210726_001233-17u6inbn/logs/debug-internal.log +2 -2
  25. wandb/run-20210726_001233-17u6inbn/run-17u6inbn.wandb +2 -2
flax_model.msgpack CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:40b18e55e7e0e173646f5693cf8c145dd0ec756f12776cb671210c598dafdb45
3
  size 249750019
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e1c97577a048157d950d83272279ccc645289bcda0b3c1a4a714df7b8ef89ffa
3
  size 249750019
outputs/checkpoints/checkpoint-10000/training_state.json DELETED
@@ -1 +0,0 @@
1
- {"step": 10001}
 
 
outputs/checkpoints/checkpoint-11000/training_state.json DELETED
@@ -1 +0,0 @@
1
- {"step": 11001}
 
 
outputs/checkpoints/{checkpoint-10000 → checkpoint-15000}/config.json RENAMED
File without changes
outputs/checkpoints/{checkpoint-10000 → checkpoint-15000}/data_collator.joblib RENAMED
File without changes
outputs/checkpoints/{checkpoint-10000 → checkpoint-15000}/flax_model.msgpack RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3471d2de3797fdad00939cfed3c119d97a102b86aab17beb39d8ebf37259b497
3
  size 249750019
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4fae57c94dac9999e94e22d5044434007f1f445ea6c1ed7eba59fea02de5db30
3
  size 249750019
outputs/checkpoints/{checkpoint-10000 → checkpoint-15000}/optimizer_state.msgpack RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bd556fdff51c3980e0d8c551f647a5d5ee9ec23fd987b892a16e1032647149a5
3
  size 499500278
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a5e466347b293d39a54146314ea173df2523a33930d651d708c67ac132dd5088
3
  size 499500278
outputs/checkpoints/{checkpoint-10000 → checkpoint-15000}/training_args.joblib RENAMED
File without changes
outputs/checkpoints/checkpoint-15000/training_state.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"step": 15001}
outputs/checkpoints/{checkpoint-11000 → checkpoint-16000}/config.json RENAMED
File without changes
outputs/checkpoints/{checkpoint-11000 → checkpoint-16000}/data_collator.joblib RENAMED
File without changes
outputs/checkpoints/{checkpoint-11000 → checkpoint-16000}/flax_model.msgpack RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:165a80d10b493e4117c19ffeb7cbc1d340e88d14e329eb9be3ab1d32f050f973
3
  size 249750019
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e1c97577a048157d950d83272279ccc645289bcda0b3c1a4a714df7b8ef89ffa
3
  size 249750019
outputs/checkpoints/{checkpoint-11000 → checkpoint-16000}/optimizer_state.msgpack RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:17907ad9f925f7ff5210c836be64cf4f0b87dea575a17582ec3bce13447deb03
3
  size 499500278
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:33826e22d38c2cec4b568afa016f47cb58598d04aaa49474922cfec9ede1b96b
3
  size 499500278
outputs/checkpoints/{checkpoint-11000 → checkpoint-16000}/training_args.joblib RENAMED
File without changes
outputs/checkpoints/checkpoint-16000/training_state.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"step": 16001}
outputs/events.out.tfevents.1627258355.tablespoon.3000110.3.v2 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4576e5515e6cf1926a9625e2db3778c06552a27d5a56f4b306bfdc6dec02245d
3
- size 2061819
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a36b6088bbbd059f09c03e927f62dc0250503de8e49e98bf4f6dc280b68fb259
3
+ size 2430179
outputs/flax_model.msgpack CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:40b18e55e7e0e173646f5693cf8c145dd0ec756f12776cb671210c598dafdb45
3
  size 249750019
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e1c97577a048157d950d83272279ccc645289bcda0b3c1a4a714df7b8ef89ffa
3
  size 249750019
outputs/optimizer_state.msgpack CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c3b657f7303349384c5ab4bd1d5226d2f8dbc1b641fc9355b1d5d4d2825ce382
3
  size 499500278
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:33826e22d38c2cec4b568afa016f47cb58598d04aaa49474922cfec9ede1b96b
3
  size 499500278
outputs/training_state.json CHANGED
@@ -1 +1 @@
1
- {"step": 14001}
 
1
+ {"step": 16001}
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f0290d7d4fc3d31d587881870f70299d2262836ee8bad199236e57b27fd504a0
3
  size 498858859
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf0278c9ca522b31b01aa1a3b0b84a53e34020e5991c1693d5fede18cc38aaca
3
  size 498858859
run_stream.512.log CHANGED
The diff for this file is too large to render. See raw diff
 
wandb/run-20210726_001233-17u6inbn/files/output.log CHANGED
@@ -9463,6 +9463,1731 @@ You should probably TRAIN this model on a down-stream task to be able to use it
9463
 
9464
 
9465
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9466
 
9467
 
9468
 
 
9463
 
9464
 
9465
 
9466
+
9467
+
9468
+
9469
+
9470
+
9471
+
9472
+
9473
+
9474
+
9475
+
9476
+
9477
+
9478
+
9479
+
9480
+
9481
+
9482
+
9483
+
9484
+
9485
+
9486
+
9487
+
9488
+
9489
+
9490
+
9491
+
9492
+
9493
+
9494
+
9495
+
9496
+
9497
+
9498
+
9499
+
9500
+
9501
+
9502
+
9503
+
9504
+
9505
+
9506
+
9507
+
9508
+
9509
+
9510
+
9511
+
9512
+
9513
+
9514
+
9515
+
9516
+
9517
+
9518
+
9519
+
9520
+
9521
+
9522
+
9523
+
9524
+
9525
+
9526
+
9527
+
9528
+
9529
+
9530
+
9531
+
9532
+
9533
+
9534
+
9535
+
9536
+
9537
+
9538
+
9539
+
9540
+
9541
+
9542
+
9543
+
9544
+
9545
+
9546
+
9547
+
9548
+
9549
+
9550
+
9551
+
9552
+
9553
+
9554
+
9555
+
9556
+
9557
+
9558
+
9559
+
9560
+
9561
+
9562
+
9563
+
9564
+
9565
+
9566
+
9567
+
9568
+
9569
+
9570
+
9571
+
9572
+
9573
+
9574
+
9575
+
9576
+
9577
+
9578
+
9579
+
9580
+
9581
+
9582
+
9583
+
9584
+
9585
+
9586
+
9587
+
9588
+
9589
+
9590
+
9591
+
9592
+
9593
+
9594
+
9595
+
9596
+
9597
+
9598
+
9599
+
9600
+
9601
+
9602
+
9603
+
9604
+
9605
+
9606
+
9607
+
9608
+
9609
+
9610
+
9611
+
9612
+
9613
+
9614
+
9615
+
9616
+
9617
+
9618
+
9619
+
9620
+
9621
+
9622
+
9623
+
9624
+
9625
+
9626
+
9627
+
9628
+
9629
+
9630
+
9631
+
9632
+
9633
+
9634
+
9635
+
9636
+
9637
+
9638
+
9639
+
9640
+
9641
+
9642
+
9643
+
9644
+
9645
+
9646
+
9647
+
9648
+
9649
+
9650
+
9651
+
9652
+
9653
+
9654
+
9655
+
9656
+
9657
+
9658
+
9659
+
9660
+
9661
+
9662
+
9663
+
9664
+
9665
+
9666
+
9667
+
9668
+
9669
+
9670
+
9671
+
9672
+
9673
+
9674
+
9675
+
9676
+
9677
+
9678
+
9679
+
9680
+
9681
+
9682
+
9683
+
9684
+
9685
+
9686
+
9687
+
9688
+
9689
+
9690
+
9691
+
9692
+
9693
+
9694
+
9695
+
9696
+
9697
+
9698
+
9699
+
9700
+
9701
+
9702
+
9703
+
9704
+
9705
+
9706
+
9707
+
9708
+
9709
+
9710
+
9711
+
9712
+
9713
+
9714
+
9715
+
9716
+
9717
+
9718
+
9719
+
9720
+
9721
+
9722
+
9723
+
9724
+
9725
+
9726
+
9727
+
9728
+
9729
+
9730
+
9731
+
9732
+
9733
+
9734
+
9735
+
9736
+
9737
+
9738
+
9739
+
9740
+
9741
+
9742
+
9743
+
9744
+
9745
+
9746
+
9747
+
9748
+
9749
+
9750
+
9751
+
9752
+
9753
+
9754
+
9755
+
9756
+
9757
+
9758
+
9759
+
9760
+
9761
+
9762
+
9763
+
9764
+
9765
+
9766
+
9767
+
9768
+
9769
+
9770
+
9771
+
9772
+
9773
+
9774
+
9775
+
9776
+
9777
+
9778
+
9779
+
9780
+
9781
+
9782
+
9783
+
9784
+
9785
+
9786
+
9787
+
9788
+
9789
+
9790
+
9791
+
9792
+
9793
+
9794
+
9795
+
9796
+
9797
+
9798
+
9799
+
9800
+
9801
+
9802
+
9803
+
9804
+
9805
+
9806
+
9807
+
9808
+
9809
+
9810
+
9811
+
9812
+
9813
+
9814
+
9815
+
9816
+
9817
+
9818
+
9819
+
9820
+
9821
+
9822
+
9823
+
9824
+
9825
+
9826
+
9827
+
9828
+
9829
+
9830
+
9831
+
9832
+
9833
+
9834
+
9835
+
9836
+
9837
+
9838
+
9839
+
9840
+
9841
+
9842
+
9843
+
9844
+
9845
+
9846
+
9847
+
9848
+
9849
+
9850
+
9851
+
9852
+
9853
+
9854
+
9855
+
9856
+
9857
+
9858
+
9859
+
9860
+
9861
+
9862
+
9863
+
9864
+
9865
+
9866
+
9867
+
9868
+
9869
+
9870
+
9871
+
9872
+
9873
+
9874
+
9875
+
9876
+
9877
+
9878
+
9879
+
9880
+
9881
+
9882
+
9883
+
9884
+
9885
+
9886
+
9887
+
9888
+
9889
+
9890
+
9891
+
9892
+
9893
+
9894
+
9895
+
9896
+
9897
+
9898
+
9899
+
9900
+
9901
+
9902
+
9903
+
9904
+
9905
+
9906
+
9907
+
9908
+
9909
+
9910
+
9911
+
9912
+
9913
+
9914
+
9915
+
9916
+
9917
+
9918
+
9919
+
9920
+
9921
+
9922
+
9923
+
9924
+
9925
+
9926
+
9927
+
9928
+
9929
+
9930
+
9931
+
9932
+
9933
+
9934
+ Step... (14000/50000 | Loss: 1.7139594554901123, Acc: 0.6574689745903015): 30%|████████ | 15000/50000 [5:48:39<11:57:48, 1.23s/it]
9935
+ Evaluating ...: 2%|█▍ | 2/130 [00:00<00:08, 14.51it/s]
9936
+ Step... (14500 | Loss: 1.901540994644165, Learning Rate: 0.0004303030436858535)
9937
+
9938
+
9939
+
9940
+
9941
+
9942
+
9943
+
9944
+
9945
+
9946
+
9947
+
9948
+
9949
+ [07:57:02] - INFO - __main__ - Saving checkpoint at 15000 steps█████████████████████████████████████████████████████| 130/130 [00:21<00:00, 4.59it/s]
9950
+ All Flax model weights were used when initializing RobertaForMaskedLM.
9951
+ Some weights of RobertaForMaskedLM were not initialized from the Flax model and are newly initialized: ['lm_head.decoder.weight', 'roberta.embeddings.position_ids', 'lm_head.decoder.bias']
9952
+ You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
9953
+
9954
+
9955
+
9956
+
9957
+
9958
+
9959
+
9960
+
9961
+
9962
+
9963
+
9964
+
9965
+
9966
+
9967
+
9968
+
9969
+
9970
+
9971
+
9972
+
9973
+
9974
+
9975
+
9976
+
9977
+
9978
+
9979
+
9980
+
9981
+
9982
+
9983
+
9984
+
9985
+
9986
+
9987
+
9988
+
9989
+
9990
+
9991
+
9992
+
9993
+
9994
+
9995
+
9996
+
9997
+
9998
+
9999
+
10000
+
10001
+
10002
+
10003
+
10004
+
10005
+
10006
+
10007
+
10008
+
10009
+
10010
+
10011
+
10012
+
10013
+
10014
+
10015
+
10016
+
10017
+
10018
+
10019
+
10020
+
10021
+
10022
+
10023
+
10024
+
10025
+
10026
+
10027
+
10028
+
10029
+
10030
+
10031
+
10032
+
10033
+
10034
+
10035
+
10036
+
10037
+
10038
+
10039
+
10040
+
10041
+
10042
+
10043
+
10044
+
10045
+
10046
+
10047
+
10048
+
10049
+
10050
+
10051
+
10052
+
10053
+
10054
+
10055
+
10056
+
10057
+
10058
+
10059
+
10060
+
10061
+
10062
+
10063
+
10064
+
10065
+
10066
+
10067
+
10068
+
10069
+
10070
+
10071
+
10072
+
10073
+
10074
+
10075
+
10076
+
10077
+
10078
+
10079
+
10080
+
10081
+
10082
+
10083
+
10084
+
10085
+
10086
+
10087
+
10088
+
10089
+
10090
+
10091
+
10092
+
10093
+
10094
+
10095
+
10096
+
10097
+
10098
+
10099
+
10100
+
10101
+
10102
+
10103
+
10104
+
10105
+
10106
+
10107
+
10108
+
10109
+
10110
+
10111
+
10112
+
10113
+
10114
+
10115
+
10116
+
10117
+
10118
+
10119
+
10120
+
10121
+
10122
+
10123
+
10124
+
10125
+
10126
+
10127
+
10128
+
10129
+
10130
+
10131
+
10132
+
10133
+
10134
+
10135
+
10136
+
10137
+
10138
+
10139
+
10140
+
10141
+
10142
+
10143
+
10144
+
10145
+
10146
+
10147
+
10148
+
10149
+
10150
+
10151
+
10152
+
10153
+
10154
+
10155
+
10156
+
10157
+
10158
+
10159
+
10160
+
10161
+
10162
+
10163
+
10164
+
10165
+
10166
+
10167
+
10168
+
10169
+
10170
+
10171
+
10172
+
10173
+
10174
+
10175
+
10176
+
10177
+
10178
+
10179
+
10180
+
10181
+
10182
+
10183
+
10184
+
10185
+
10186
+
10187
+
10188
+
10189
+
10190
+
10191
+
10192
+
10193
+
10194
+
10195
+
10196
+
10197
+
10198
+
10199
+
10200
+
10201
+
10202
+
10203
+
10204
+
10205
+
10206
+
10207
+
10208
+
10209
+
10210
+
10211
+
10212
+
10213
+
10214
+
10215
+
10216
+
10217
+
10218
+
10219
+
10220
+
10221
+
10222
+
10223
+
10224
+
10225
+
10226
+
10227
+
10228
+
10229
+
10230
+
10231
+
10232
+
10233
+
10234
+
10235
+
10236
+
10237
+
10238
+
10239
+
10240
+
10241
+
10242
+
10243
+
10244
+
10245
+
10246
+
10247
+
10248
+
10249
+
10250
+
10251
+
10252
+
10253
+
10254
+
10255
+
10256
+
10257
+
10258
+
10259
+
10260
+
10261
+
10262
+
10263
+
10264
+
10265
+
10266
+
10267
+
10268
+
10269
+
10270
+
10271
+
10272
+
10273
+
10274
+
10275
+
10276
+
10277
+
10278
+
10279
+
10280
+
10281
+
10282
+
10283
+
10284
+
10285
+
10286
+
10287
+
10288
+
10289
+
10290
+
10291
+
10292
+
10293
+
10294
+
10295
+
10296
+
10297
+
10298
+
10299
+
10300
+
10301
+
10302
+
10303
+
10304
+
10305
+
10306
+
10307
+
10308
+
10309
+
10310
+
10311
+
10312
+
10313
+
10314
+
10315
+
10316
+
10317
+
10318
+
10319
+
10320
+
10321
+
10322
+
10323
+
10324
+
10325
+
10326
+
10327
+
10328
+
10329
+
10330
+
10331
+
10332
+
10333
+
10334
+
10335
+
10336
+
10337
+
10338
+
10339
+
10340
+
10341
+
10342
+
10343
+
10344
+
10345
+
10346
+
10347
+
10348
+
10349
+
10350
+
10351
+
10352
+
10353
+
10354
+
10355
+
10356
+
10357
+
10358
+
10359
+
10360
+
10361
+
10362
+
10363
+
10364
+
10365
+
10366
+
10367
+
10368
+
10369
+
10370
+
10371
+
10372
+
10373
+
10374
+
10375
+
10376
+
10377
+
10378
+
10379
+
10380
+
10381
+
10382
+
10383
+
10384
+
10385
+
10386
+
10387
+
10388
+
10389
+
10390
+
10391
+
10392
+
10393
+
10394
+
10395
+
10396
+
10397
+
10398
+
10399
+
10400
+
10401
+
10402
+
10403
+
10404
+
10405
+
10406
+
10407
+
10408
+
10409
+
10410
+
10411
+
10412
+
10413
+
10414
+
10415
+
10416
+
10417
+
10418
+
10419
+
10420
+
10421
+
10422
+
10423
+
10424
+
10425
+
10426
+
10427
+
10428
+
10429
+
10430
+
10431
+
10432
+
10433
+
10434
+
10435
+
10436
+
10437
+
10438
+
10439
+
10440
+
10441
+
10442
+
10443
+
10444
+
10445
+
10446
+
10447
+
10448
+
10449
+
10450
+
10451
+
10452
+
10453
+
10454
+
10455
+
10456
+
10457
+
10458
+
10459
+
10460
+
10461
+
10462
+
10463
+
10464
+
10465
+
10466
+
10467
+
10468
+
10469
+
10470
+
10471
+
10472
+
10473
+
10474
+
10475
+
10476
+
10477
+
10478
+
10479
+
10480
+
10481
+
10482
+
10483
+
10484
+
10485
+
10486
+
10487
+
10488
+
10489
+
10490
+
10491
+
10492
+
10493
+
10494
+
10495
+
10496
+
10497
+
10498
+
10499
+
10500
+
10501
+
10502
+
10503
+
10504
+
10505
+
10506
+
10507
+
10508
+
10509
+
10510
+
10511
+
10512
+
10513
+
10514
+
10515
+
10516
+
10517
+
10518
+
10519
+
10520
+
10521
+
10522
+
10523
+
10524
+
10525
+
10526
+
10527
+
10528
+
10529
+
10530
+
10531
+
10532
+
10533
+
10534
+
10535
+
10536
+
10537
+
10538
+
10539
+
10540
+
10541
+
10542
+
10543
+
10544
+
10545
+
10546
+
10547
+
10548
+
10549
+
10550
+
10551
+
10552
+
10553
+
10554
+
10555
+
10556
+
10557
+
10558
+
10559
+
10560
+
10561
+
10562
+
10563
+
10564
+
10565
+
10566
+
10567
+
10568
+
10569
+
10570
+
10571
+
10572
+
10573
+
10574
+
10575
+
10576
+
10577
+
10578
+
10579
+
10580
+
10581
+
10582
+
10583
+
10584
+
10585
+
10586
+
10587
+
10588
+
10589
+
10590
+
10591
+
10592
+
10593
+
10594
+
10595
+
10596
+
10597
+
10598
+
10599
+
10600
+
10601
+
10602
+
10603
+
10604
+
10605
+
10606
+
10607
+
10608
+
10609
+
10610
+
10611
+
10612
+
10613
+
10614
+
10615
+
10616
+
10617
+
10618
+
10619
+
10620
+
10621
+
10622
+
10623
+
10624
+
10625
+ Step... (15000/50000 | Loss: 1.709453821182251, Acc: 0.6586650609970093): 32%|████████▉ | 16000/50000 [6:13:24<13:18:57, 1.41s/it]
10626
+ Evaluating ...: 0%| | 0/130 [00:00<?, ?it/s]
10627
+ Step... (15500 | Loss: 1.768535852432251, Learning Rate: 0.0004181818221695721)
10628
+
10629
+
10630
+
10631
+
10632
+
10633
+
10634
+
10635
+
10636
+
10637
+
10638
+
10639
+
10640
+ [08:21:47] - INFO - __main__ - Saving checkpoint at 16000 steps█████████████████████████████████████████████████████| 130/130 [00:21<00:00, 4.60it/s]
10641
+ All Flax model weights were used when initializing RobertaForMaskedLM.
10642
+ Some weights of RobertaForMaskedLM were not initialized from the Flax model and are newly initialized: ['lm_head.decoder.weight', 'roberta.embeddings.position_ids', 'lm_head.decoder.bias']
10643
+ You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
10644
+
10645
+
10646
+
10647
+
10648
+
10649
+
10650
+
10651
+
10652
+
10653
+
10654
+
10655
+
10656
+
10657
+
10658
+
10659
+
10660
+
10661
+
10662
+
10663
+
10664
+
10665
+
10666
+
10667
+
10668
+
10669
+
10670
+
10671
+
10672
+
10673
+
10674
+
10675
+
10676
+
10677
+
10678
+
10679
+
10680
+
10681
+
10682
+
10683
+
10684
+
10685
+
10686
+
10687
+
10688
+
10689
+
10690
+
10691
+
10692
+
10693
+
10694
+
10695
+
10696
+
10697
+
10698
+
10699
+
10700
+
10701
+
10702
+
10703
+
10704
+
10705
+
10706
+
10707
+
10708
+
10709
+
10710
+
10711
+
10712
+
10713
+
10714
+
10715
+
10716
+
10717
+
10718
+
10719
+
10720
+
10721
+
10722
+
10723
+
10724
+
10725
+
10726
+
10727
+
10728
+
10729
+
10730
+
10731
+
10732
+
10733
+
10734
+
10735
+
10736
+
10737
+
10738
+
10739
+
10740
+
10741
+
10742
+
10743
+
10744
+
10745
+
10746
+
10747
+
10748
+
10749
+
10750
+
10751
+
10752
+
10753
+
10754
+
10755
+
10756
+
10757
+
10758
+
10759
+
10760
+
10761
+
10762
+
10763
+
10764
+
10765
+
10766
+
10767
+
10768
+
10769
+
10770
+
10771
+
10772
+
10773
+
10774
+
10775
+
10776
+
10777
+
10778
+
10779
+
10780
+
10781
+
10782
+
10783
+
10784
+
10785
+
10786
+
10787
+
10788
+
10789
+
10790
+
10791
+
10792
+
10793
+
10794
+
10795
+
10796
+
10797
+
10798
+
10799
+
10800
+
10801
+
10802
+
10803
+
10804
+
10805
+
10806
+
10807
+
10808
+
10809
+
10810
+
10811
+
10812
+
10813
+
10814
+
10815
+
10816
+
10817
+
10818
+
10819
+
10820
+
10821
+
10822
+
10823
+
10824
+
10825
+
10826
+
10827
+
10828
+
10829
+
10830
+
10831
+
10832
+
10833
+
10834
+
10835
+
10836
+
10837
+
10838
+
10839
+
10840
+
10841
+
10842
+
10843
+
10844
+
10845
+
10846
+
10847
+
10848
+
10849
+
10850
+
10851
+
10852
+
10853
+
10854
+
10855
+
10856
+
10857
+
10858
+
10859
+
10860
+
10861
+
10862
+
10863
+
10864
+
10865
+
10866
+
10867
+
10868
+
10869
+
10870
+
10871
+
10872
+
10873
+
10874
+
10875
+
10876
+
10877
+
10878
+
10879
+
10880
+
10881
+
10882
+
10883
+
10884
+
10885
+
10886
+
10887
+
10888
+
10889
+
10890
+
10891
+
10892
+
10893
+
10894
+
10895
+
10896
+
10897
+
10898
+
10899
+
10900
+
10901
+
10902
+
10903
+
10904
+
10905
+
10906
+
10907
+
10908
+
10909
+
10910
+
10911
+
10912
+
10913
+
10914
+
10915
+
10916
+
10917
+
10918
+
10919
+
10920
+
10921
+
10922
+
10923
+
10924
+
10925
+
10926
+
10927
+
10928
+
10929
+
10930
+
10931
+
10932
+
10933
+
10934
+
10935
+
10936
+
10937
+
10938
+
10939
+
10940
+
10941
+
10942
+
10943
+
10944
+
10945
+
10946
+
10947
+
10948
+
10949
+
10950
+
10951
+
10952
+
10953
+
10954
+
10955
+
10956
+
10957
+
10958
+
10959
+
10960
+
10961
+
10962
+
10963
+
10964
+
10965
+
10966
+
10967
+
10968
+
10969
+
10970
+
10971
+
10972
+
10973
+
10974
+
10975
+
10976
+
10977
+
10978
+
10979
+
10980
+
10981
+
10982
+
10983
+
10984
+
10985
+
10986
+
10987
+
10988
+
10989
+
10990
+
10991
+
10992
+
10993
+
10994
+
10995
+
10996
+
10997
+
10998
+
10999
+
11000
+
11001
+
11002
+
11003
+
11004
+
11005
+
11006
+
11007
+
11008
+
11009
+
11010
+
11011
+
11012
+
11013
+
11014
+
11015
+
11016
+
11017
+
11018
+
11019
+
11020
+
11021
+
11022
+
11023
+
11024
+
11025
+
11026
+
11027
+
11028
+
11029
+
11030
+
11031
+
11032
+
11033
+
11034
+
11035
+
11036
+
11037
+
11038
+
11039
+
11040
+
11041
+
11042
+
11043
+
11044
+
11045
+
11046
+
11047
+
11048
+
11049
+
11050
+
11051
+
11052
+
11053
+
11054
+
11055
+
11056
+
11057
+
11058
+
11059
+
11060
+
11061
+
11062
+
11063
+
11064
+
11065
+
11066
+
11067
+
11068
+
11069
+
11070
+
11071
+
11072
+
11073
+
11074
+
11075
+
11076
+
11077
+
11078
+
11079
+
11080
+
11081
+
11082
+
11083
+
11084
+
11085
+
11086
+
11087
+
11088
+
11089
+
11090
+
11091
+
11092
+
11093
+
11094
+
11095
+
11096
+
11097
+
11098
+
11099
+
11100
+
11101
+
11102
+
11103
+
11104
+
11105
+
11106
+
11107
+
11108
+
11109
+
11110
+
11111
+
11112
+
11113
+
11114
+
11115
+
11116
+
11117
+
11118
+
11119
+
11120
+
11121
+
11122
+
11123
+
11124
+
11125
+
11126
+
11127
+
11128
+
11129
+
11130
+
11131
+
11132
+
11133
+
11134
+
11135
+
11136
+
11137
+
11138
+
11139
+
11140
+
11141
+
11142
+
11143
+
11144
+
11145
+
11146
+
11147
+
11148
+
11149
+
11150
+
11151
+
11152
+
11153
+
11154
+
11155
+
11156
+
11157
+
11158
+
11159
+
11160
+
11161
+
11162
+
11163
+
11164
+
11165
+
11166
+
11167
+
11168
+
11169
+
11170
+
11171
+
11172
+
11173
+
11174
+
11175
+
11176
+
11177
+
11178
+
11179
+
11180
+
11181
+
11182
+
11183
+
11184
+
11185
+
11186
+
11187
+
11188
+
11189
+
11190
+
11191
 
11192
 
11193
 
wandb/run-20210726_001233-17u6inbn/files/wandb-summary.json CHANGED
@@ -1 +1 @@
1
- {"global_step": 14000, "_timestamp": 1627284744.890835, "train_time": 473944.03125, "train_learning_rate": 0.0004363636835478246, "_step": 27916, "train_loss": 1.866248369216919, "eval_accuracy": 0.6557744741439819, "eval_loss": 1.725870966911316}
 
1
+ {"global_step": 16500, "_timestamp": 1627288441.057746, "train_time": 616863.8125, "train_learning_rate": 0.0004060606297571212, "_step": 32901, "train_loss": 1.7374286651611328, "eval_accuracy": 0.659552812576294, "eval_loss": 1.6991859674453735}
wandb/run-20210726_001233-17u6inbn/logs/debug-internal.log CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2e6271a5aebdb0a82b320da3c45ba34bbe503e95144d5ba49806fc88d8a7cba3
3
- size 11092265
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f35d9a9ad3688b7e2e04122606d3ef735831cd7d3b8cb484a6915984c95ff551
3
+ size 13058395
wandb/run-20210726_001233-17u6inbn/run-17u6inbn.wandb CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dd71f592091748dfcbe49a2bde4aab4c1c7e018a8eb1ee1e528775a51a6409d6
3
- size 5451547
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb882315e6f15ca6a3b1cd312b6599d77c0978d9421a510e09007e241cde953c
3
+ size 6438750