versae commited on
Commit
b270287
·
1 Parent(s): e438145

Step... (19000/50000 | Loss: 1.6823453903198242, Acc: 0.6625654101371765): 39%|██████████▍ | 19301/50000 [7:34:52<11:40:44, 1.37s/it]

Browse files
Files changed (32) hide show
  1. flax_model.msgpack +1 -1
  2. outputs/checkpoints/checkpoint-12000/training_state.json +0 -1
  3. outputs/checkpoints/checkpoint-13000/training_state.json +0 -1
  4. outputs/checkpoints/checkpoint-14000/training_state.json +0 -1
  5. outputs/checkpoints/{checkpoint-12000 → checkpoint-17000}/config.json +0 -0
  6. outputs/checkpoints/{checkpoint-12000 → checkpoint-17000}/data_collator.joblib +0 -0
  7. outputs/checkpoints/{checkpoint-12000 → checkpoint-17000}/flax_model.msgpack +1 -1
  8. outputs/checkpoints/{checkpoint-14000 → checkpoint-17000}/optimizer_state.msgpack +1 -1
  9. outputs/checkpoints/{checkpoint-12000 → checkpoint-17000}/training_args.joblib +0 -0
  10. outputs/checkpoints/checkpoint-17000/training_state.json +1 -0
  11. outputs/checkpoints/{checkpoint-13000 → checkpoint-18000}/config.json +0 -0
  12. outputs/checkpoints/{checkpoint-13000 → checkpoint-18000}/data_collator.joblib +0 -0
  13. outputs/checkpoints/{checkpoint-14000 → checkpoint-18000}/flax_model.msgpack +1 -1
  14. outputs/checkpoints/{checkpoint-12000 → checkpoint-18000}/optimizer_state.msgpack +1 -1
  15. outputs/checkpoints/{checkpoint-13000 → checkpoint-18000}/training_args.joblib +0 -0
  16. outputs/checkpoints/checkpoint-18000/training_state.json +1 -0
  17. outputs/checkpoints/{checkpoint-14000 → checkpoint-19000}/config.json +0 -0
  18. outputs/checkpoints/{checkpoint-14000 → checkpoint-19000}/data_collator.joblib +0 -0
  19. outputs/checkpoints/{checkpoint-13000 → checkpoint-19000}/flax_model.msgpack +1 -1
  20. outputs/checkpoints/{checkpoint-13000 → checkpoint-19000}/optimizer_state.msgpack +1 -1
  21. outputs/checkpoints/{checkpoint-14000 → checkpoint-19000}/training_args.joblib +0 -0
  22. outputs/checkpoints/checkpoint-19000/training_state.json +1 -0
  23. outputs/events.out.tfevents.1627258355.tablespoon.3000110.3.v2 +2 -2
  24. outputs/flax_model.msgpack +1 -1
  25. outputs/optimizer_state.msgpack +1 -1
  26. outputs/training_state.json +1 -1
  27. pytorch_model.bin +1 -1
  28. run_stream.512.log +0 -0
  29. wandb/run-20210726_001233-17u6inbn/files/output.log +1722 -0
  30. wandb/run-20210726_001233-17u6inbn/files/wandb-summary.json +1 -1
  31. wandb/run-20210726_001233-17u6inbn/logs/debug-internal.log +2 -2
  32. wandb/run-20210726_001233-17u6inbn/run-17u6inbn.wandb +2 -2
flax_model.msgpack CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e1c97577a048157d950d83272279ccc645289bcda0b3c1a4a714df7b8ef89ffa
3
  size 249750019
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2d0ae4178820ed8ec84d010dda13f1c110189fa19d49afd4d14283cf09774bee
3
  size 249750019
outputs/checkpoints/checkpoint-12000/training_state.json DELETED
@@ -1 +0,0 @@
1
- {"step": 12001}
 
 
outputs/checkpoints/checkpoint-13000/training_state.json DELETED
@@ -1 +0,0 @@
1
- {"step": 13001}
 
 
outputs/checkpoints/checkpoint-14000/training_state.json DELETED
@@ -1 +0,0 @@
1
- {"step": 14001}
 
 
outputs/checkpoints/{checkpoint-12000 → checkpoint-17000}/config.json RENAMED
File without changes
outputs/checkpoints/{checkpoint-12000 → checkpoint-17000}/data_collator.joblib RENAMED
File without changes
outputs/checkpoints/{checkpoint-12000 → checkpoint-17000}/flax_model.msgpack RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a4df1917f93cb5be75e1a67299b85e14508ce6d594537be9e03fa1ea0d5c451b
3
  size 249750019
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6453368e8fd0e3c80ecb0b3dd860a84293d6cc3788ee6f32b9a7cb9a77fa001a
3
  size 249750019
outputs/checkpoints/{checkpoint-14000 → checkpoint-17000}/optimizer_state.msgpack RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c3b657f7303349384c5ab4bd1d5226d2f8dbc1b641fc9355b1d5d4d2825ce382
3
  size 499500278
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:77b05dc72072a294b97d7184e57ba9c0046b55665a7eb760f5ff414d319abe87
3
  size 499500278
outputs/checkpoints/{checkpoint-12000 → checkpoint-17000}/training_args.joblib RENAMED
File without changes
outputs/checkpoints/checkpoint-17000/training_state.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"step": 17001}
outputs/checkpoints/{checkpoint-13000 → checkpoint-18000}/config.json RENAMED
File without changes
outputs/checkpoints/{checkpoint-13000 → checkpoint-18000}/data_collator.joblib RENAMED
File without changes
outputs/checkpoints/{checkpoint-14000 → checkpoint-18000}/flax_model.msgpack RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:40b18e55e7e0e173646f5693cf8c145dd0ec756f12776cb671210c598dafdb45
3
  size 249750019
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e5a36a0b75be789eed389d6d8014081085f305abe5ca5007d4fd9bf9decf73d2
3
  size 249750019
outputs/checkpoints/{checkpoint-12000 → checkpoint-18000}/optimizer_state.msgpack RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d5ef9d9909e0225cdfdb08ba23fd64c8a8a881103ca5b932bc2206768a7e920b
3
  size 499500278
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:472de67734e639ea41e43bd17705bf1c8e3ce22ee74865cab8ef338731f0cf9f
3
  size 499500278
outputs/checkpoints/{checkpoint-13000 → checkpoint-18000}/training_args.joblib RENAMED
File without changes
outputs/checkpoints/checkpoint-18000/training_state.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"step": 18001}
outputs/checkpoints/{checkpoint-14000 → checkpoint-19000}/config.json RENAMED
File without changes
outputs/checkpoints/{checkpoint-14000 → checkpoint-19000}/data_collator.joblib RENAMED
File without changes
outputs/checkpoints/{checkpoint-13000 → checkpoint-19000}/flax_model.msgpack RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7781249560c15a41eb883214ab5f6613f40b42c1ae0886c52a020bbfa19f76fb
3
  size 249750019
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2d0ae4178820ed8ec84d010dda13f1c110189fa19d49afd4d14283cf09774bee
3
  size 249750019
outputs/checkpoints/{checkpoint-13000 → checkpoint-19000}/optimizer_state.msgpack RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:05c37a1e738b919e689e3c653244d8a680235541f5d91c99fb41edd65340a91d
3
  size 499500278
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6fd17bbca5658a6226151a6f85c1c6b4064b42b9ce32213f96be1f4b4993a48c
3
  size 499500278
outputs/checkpoints/{checkpoint-14000 → checkpoint-19000}/training_args.joblib RENAMED
File without changes
outputs/checkpoints/checkpoint-19000/training_state.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"step": 19001}
outputs/events.out.tfevents.1627258355.tablespoon.3000110.3.v2 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a36b6088bbbd059f09c03e927f62dc0250503de8e49e98bf4f6dc280b68fb259
3
- size 2430179
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2d64128316db1be6b780146438299ca6d9e56a42d407935030bc80dd716161f5
3
+ size 2803455
outputs/flax_model.msgpack CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e1c97577a048157d950d83272279ccc645289bcda0b3c1a4a714df7b8ef89ffa
3
  size 249750019
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2d0ae4178820ed8ec84d010dda13f1c110189fa19d49afd4d14283cf09774bee
3
  size 249750019
outputs/optimizer_state.msgpack CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:33826e22d38c2cec4b568afa016f47cb58598d04aaa49474922cfec9ede1b96b
3
  size 499500278
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6fd17bbca5658a6226151a6f85c1c6b4064b42b9ce32213f96be1f4b4993a48c
3
  size 499500278
outputs/training_state.json CHANGED
@@ -1 +1 @@
1
- {"step": 16001}
 
1
+ {"step": 19001}
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cf0278c9ca522b31b01aa1a3b0b84a53e34020e5991c1693d5fede18cc38aaca
3
  size 498858859
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6e1c987c78ed3890e2c466fea8b433708a3e3c70c0d1d02e1b0cdab38c7ab56f
3
  size 498858859
run_stream.512.log CHANGED
The diff for this file is too large to render. See raw diff
 
wandb/run-20210726_001233-17u6inbn/files/output.log CHANGED
@@ -11188,6 +11188,1728 @@ You should probably TRAIN this model on a down-stream task to be able to use it
11188
 
11189
 
11190
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11191
 
11192
 
11193
 
 
11188
 
11189
 
11190
 
11191
+
11192
+
11193
+
11194
+
11195
+
11196
+
11197
+
11198
+
11199
+
11200
+
11201
+
11202
+
11203
+
11204
+
11205
+
11206
+
11207
+
11208
+
11209
+
11210
+
11211
+
11212
+
11213
+
11214
+
11215
+
11216
+
11217
+
11218
+
11219
+
11220
+
11221
+
11222
+
11223
+
11224
+
11225
+
11226
+
11227
+
11228
+
11229
+
11230
+
11231
+
11232
+
11233
+
11234
+
11235
+
11236
+
11237
+
11238
+
11239
+
11240
+
11241
+
11242
+
11243
+
11244
+
11245
+
11246
+
11247
+
11248
+
11249
+
11250
+
11251
+
11252
+
11253
+
11254
+
11255
+
11256
+
11257
+
11258
+
11259
+
11260
+
11261
+
11262
+
11263
+
11264
+
11265
+
11266
+
11267
+
11268
+
11269
+
11270
+
11271
+
11272
+
11273
+
11274
+
11275
+
11276
+
11277
+
11278
+
11279
+
11280
+
11281
+
11282
+
11283
+
11284
+
11285
+
11286
+
11287
+
11288
+
11289
+
11290
+
11291
+
11292
+
11293
+
11294
+
11295
+
11296
+
11297
+
11298
+
11299
+
11300
+
11301
+
11302
+
11303
+
11304
+
11305
+
11306
+
11307
+
11308
+
11309
+
11310
+
11311
+
11312
+
11313
+
11314
+
11315
+
11316
+
11317
+
11318
+
11319
+
11320
+
11321
+
11322
+
11323
+ Step... (16000/50000 | Loss: 1.6991859674453735, Acc: 0.659552812576294): 34%|█████████▌ | 17000/50000 [6:37:47<11:32:41, 1.26s/it]
11324
+ Step... (16500 | Loss: 1.7835588455200195, Learning Rate: 0.0004060606297571212)
11325
+ Step... (17000 | Loss: 1.692732572555542, Learning Rate: 0.00039999998989515007)
11326
+
11327
+
11328
+
11329
+
11330
+
11331
+
11332
+
11333
+
11334
+
11335
+
11336
+
11337
+ [08:46:11] - INFO - __main__ - Saving checkpoint at 17000 steps█████████████████████████████████████████████████████| 130/130 [00:21<00:00, 4.60it/s]
11338
+ All Flax model weights were used when initializing RobertaForMaskedLM.
11339
+ Some weights of RobertaForMaskedLM were not initialized from the Flax model and are newly initialized: ['lm_head.decoder.weight', 'roberta.embeddings.position_ids', 'lm_head.decoder.bias']
11340
+ You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
11341
+
11342
+
11343
+
11344
+
11345
+
11346
+
11347
+
11348
+
11349
+
11350
+
11351
+
11352
+
11353
+
11354
+
11355
+
11356
+
11357
+
11358
+
11359
+
11360
+
11361
+
11362
+
11363
+
11364
+
11365
+
11366
+
11367
+
11368
+
11369
+
11370
+
11371
+
11372
+
11373
+
11374
+
11375
+
11376
+
11377
+
11378
+
11379
+
11380
+
11381
+
11382
+
11383
+
11384
+
11385
+
11386
+
11387
+
11388
+
11389
+
11390
+
11391
+
11392
+
11393
+
11394
+
11395
+
11396
+
11397
+
11398
+
11399
+
11400
+
11401
+
11402
+
11403
+
11404
+
11405
+
11406
+
11407
+
11408
+
11409
+
11410
+
11411
+
11412
+
11413
+
11414
+
11415
+
11416
+
11417
+
11418
+
11419
+
11420
+
11421
+
11422
+
11423
+
11424
+
11425
+
11426
+
11427
+
11428
+
11429
+
11430
+
11431
+
11432
+
11433
+
11434
+
11435
+
11436
+
11437
+
11438
+
11439
+
11440
+
11441
+
11442
+
11443
+
11444
+
11445
+
11446
+
11447
+
11448
+
11449
+
11450
+
11451
+
11452
+
11453
+
11454
+
11455
+
11456
+
11457
+
11458
+
11459
+
11460
+
11461
+
11462
+
11463
+
11464
+
11465
+
11466
+
11467
+
11468
+
11469
+
11470
+
11471
+
11472
+
11473
+
11474
+
11475
+
11476
+
11477
+
11478
+
11479
+
11480
+
11481
+
11482
+
11483
+
11484
+
11485
+
11486
+
11487
+
11488
+
11489
+
11490
+
11491
+
11492
+
11493
+
11494
+
11495
+
11496
+
11497
+
11498
+
11499
+
11500
+
11501
+
11502
+
11503
+
11504
+
11505
+
11506
+
11507
+
11508
+
11509
+
11510
+
11511
+
11512
+
11513
+
11514
+
11515
+
11516
+
11517
+
11518
+
11519
+
11520
+
11521
+
11522
+
11523
+
11524
+
11525
+
11526
+
11527
+
11528
+
11529
+
11530
+
11531
+
11532
+
11533
+
11534
+
11535
+
11536
+
11537
+
11538
+
11539
+
11540
+
11541
+
11542
+
11543
+
11544
+
11545
+
11546
+
11547
+
11548
+
11549
+
11550
+
11551
+
11552
+
11553
+
11554
+
11555
+
11556
+
11557
+
11558
+
11559
+
11560
+
11561
+
11562
+
11563
+
11564
+
11565
+
11566
+
11567
+
11568
+
11569
+
11570
+
11571
+
11572
+
11573
+
11574
+
11575
+
11576
+
11577
+
11578
+
11579
+
11580
+
11581
+
11582
+
11583
+
11584
+
11585
+
11586
+
11587
+
11588
+
11589
+
11590
+
11591
+
11592
+
11593
+
11594
+
11595
+
11596
+
11597
+
11598
+
11599
+
11600
+
11601
+
11602
+
11603
+
11604
+
11605
+
11606
+
11607
+
11608
+
11609
+
11610
+
11611
+
11612
+
11613
+
11614
+
11615
+
11616
+
11617
+
11618
+
11619
+
11620
+
11621
+
11622
+
11623
+
11624
+
11625
+
11626
+
11627
+
11628
+
11629
+
11630
+
11631
+
11632
+
11633
+
11634
+
11635
+
11636
+
11637
+
11638
+
11639
+
11640
+
11641
+
11642
+
11643
+
11644
+
11645
+
11646
+
11647
+
11648
+
11649
+
11650
+
11651
+
11652
+
11653
+
11654
+
11655
+
11656
+
11657
+
11658
+
11659
+
11660
+
11661
+
11662
+
11663
+
11664
+
11665
+
11666
+
11667
+
11668
+
11669
+
11670
+
11671
+
11672
+
11673
+
11674
+
11675
+
11676
+
11677
+
11678
+
11679
+
11680
+
11681
+
11682
+
11683
+
11684
+
11685
+
11686
+
11687
+
11688
+
11689
+
11690
+
11691
+
11692
+
11693
+
11694
+
11695
+
11696
+
11697
+
11698
+
11699
+
11700
+
11701
+
11702
+
11703
+
11704
+
11705
+
11706
+
11707
+
11708
+
11709
+
11710
+
11711
+
11712
+
11713
+
11714
+
11715
+
11716
+
11717
+
11718
+
11719
+
11720
+
11721
+
11722
+
11723
+
11724
+
11725
+
11726
+
11727
+
11728
+
11729
+
11730
+
11731
+
11732
+
11733
+
11734
+
11735
+
11736
+
11737
+
11738
+
11739
+
11740
+
11741
+
11742
+
11743
+
11744
+
11745
+
11746
+
11747
+
11748
+
11749
+
11750
+
11751
+
11752
+
11753
+
11754
+
11755
+
11756
+
11757
+
11758
+
11759
+
11760
+
11761
+
11762
+
11763
+
11764
+
11765
+
11766
+
11767
+
11768
+
11769
+
11770
+
11771
+
11772
+
11773
+
11774
+
11775
+
11776
+
11777
+
11778
+
11779
+
11780
+
11781
+
11782
+
11783
+
11784
+
11785
+
11786
+
11787
+
11788
+
11789
+
11790
+
11791
+
11792
+
11793
+
11794
+
11795
+
11796
+
11797
+
11798
+
11799
+
11800
+
11801
+
11802
+
11803
+
11804
+
11805
+
11806
+
11807
+
11808
+
11809
+
11810
+
11811
+
11812
+
11813
+
11814
+
11815
+
11816
+
11817
+
11818
+
11819
+
11820
+
11821
+
11822
+
11823
+
11824
+
11825
+
11826
+
11827
+
11828
+
11829
+
11830
+
11831
+
11832
+
11833
+
11834
+
11835
+
11836
+
11837
+
11838
+
11839
+
11840
+
11841
+
11842
+
11843
+
11844
+
11845
+
11846
+
11847
+
11848
+
11849
+
11850
+
11851
+
11852
+
11853
+
11854
+
11855
+
11856
+
11857
+
11858
+
11859
+
11860
+
11861
+
11862
+
11863
+
11864
+
11865
+
11866
+
11867
+
11868
+
11869
+
11870
+
11871
+
11872
+
11873
+
11874
+
11875
+
11876
+
11877
+
11878
+
11879
+
11880
+
11881
+
11882
+
11883
+
11884
+
11885
+
11886
+
11887
+
11888
+
11889
+
11890
+
11891
+
11892
+
11893
+
11894
+
11895
+
11896
+
11897
+
11898
+
11899
+
11900
+
11901
+
11902
+
11903
+
11904
+
11905
+
11906
+
11907
+
11908
+
11909
+
11910
+
11911
+
11912
+
11913
+
11914
+
11915
+
11916
+
11917
+
11918
+
11919
+
11920
+
11921
+
11922
+
11923
+
11924
+
11925
+
11926
+
11927
+
11928
+
11929
+
11930
+
11931
+
11932
+
11933
+
11934
+
11935
+
11936
+
11937
+
11938
+
11939
+
11940
+
11941
+
11942
+
11943
+
11944
+
11945
+
11946
+
11947
+
11948
+
11949
+
11950
+
11951
+
11952
+
11953
+
11954
+
11955
+
11956
+
11957
+
11958
+
11959
+
11960
+
11961
+
11962
+
11963
+
11964
+
11965
+
11966
+
11967
+
11968
+
11969
+
11970
+
11971
+
11972
+
11973
+
11974
+
11975
+
11976
+
11977
+
11978
+
11979
+
11980
+
11981
+
11982
+
11983
+
11984
+
11985
+
11986
+
11987
+
11988
+
11989
+
11990
+
11991
+
11992
+
11993
+
11994
+
11995
+
11996
+
11997
+
11998
+
11999
+
12000
+
12001
+
12002
+
12003
+
12004
+
12005
+
12006
+
12007
+
12008
+
12009
+
12010
+
12011
+
12012
+
12013
+
12014
+
12015
+
12016
+
12017
+
12018
+
12019
+
12020
+ Step... (17000/50000 | Loss: 1.6971577405929565, Acc: 0.6604305505752563): 36%|█████████▋ | 18000/50000 [7:02:23<12:10:56, 1.37s/it]
12021
+ Step... (17500 | Loss: 1.9012951850891113, Learning Rate: 0.00039393940824083984)
12022
+ Step... (17000/50000 | Loss: 1.6971577405929565, Acc: 0.6604305505752563): 36%|█████████▋ | 18000/50000 [7:02:27<12:10:56, 1.37s/it]
12023
+
12024
+
12025
+
12026
+
12027
+
12028
+
12029
+
12030
+
12031
+
12032
+
12033
+
12034
+
12035
+ [09:10:50] - INFO - __main__ - Saving checkpoint at 18000 steps█████████████████████████████████████████████████████| 130/130 [00:21<00:00, 4.60it/s]
12036
+ All Flax model weights were used when initializing RobertaForMaskedLM.
12037
+ Some weights of RobertaForMaskedLM were not initialized from the Flax model and are newly initialized: ['lm_head.decoder.weight', 'roberta.embeddings.position_ids', 'lm_head.decoder.bias']
12038
+ You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
12039
+
12040
+
12041
+
12042
+
12043
+
12044
+
12045
+
12046
+
12047
+
12048
+
12049
+
12050
+
12051
+
12052
+
12053
+
12054
+
12055
+
12056
+
12057
+
12058
+
12059
+
12060
+
12061
+
12062
+
12063
+
12064
+
12065
+
12066
+
12067
+
12068
+
12069
+
12070
+
12071
+
12072
+
12073
+
12074
+
12075
+
12076
+
12077
+
12078
+
12079
+
12080
+
12081
+
12082
+
12083
+
12084
+
12085
+
12086
+
12087
+
12088
+
12089
+
12090
+
12091
+
12092
+
12093
+
12094
+
12095
+
12096
+
12097
+
12098
+
12099
+
12100
+
12101
+
12102
+
12103
+
12104
+
12105
+
12106
+
12107
+
12108
+
12109
+
12110
+
12111
+
12112
+
12113
+
12114
+
12115
+
12116
+
12117
+
12118
+
12119
+
12120
+
12121
+
12122
+
12123
+
12124
+
12125
+
12126
+
12127
+
12128
+
12129
+
12130
+
12131
+
12132
+
12133
+
12134
+
12135
+
12136
+
12137
+
12138
+
12139
+
12140
+
12141
+
12142
+
12143
+
12144
+
12145
+
12146
+
12147
+
12148
+
12149
+
12150
+
12151
+
12152
+
12153
+
12154
+
12155
+
12156
+
12157
+
12158
+
12159
+
12160
+
12161
+
12162
+
12163
+
12164
+
12165
+
12166
+
12167
+
12168
+
12169
+
12170
+
12171
+
12172
+
12173
+
12174
+
12175
+
12176
+
12177
+
12178
+
12179
+
12180
+
12181
+
12182
+
12183
+
12184
+
12185
+
12186
+
12187
+
12188
+
12189
+
12190
+
12191
+
12192
+
12193
+
12194
+
12195
+
12196
+
12197
+
12198
+
12199
+
12200
+
12201
+
12202
+
12203
+
12204
+
12205
+
12206
+
12207
+
12208
+
12209
+
12210
+
12211
+
12212
+
12213
+
12214
+
12215
+
12216
+
12217
+
12218
+
12219
+
12220
+
12221
+
12222
+
12223
+
12224
+
12225
+
12226
+
12227
+
12228
+
12229
+
12230
+
12231
+
12232
+
12233
+
12234
+
12235
+
12236
+
12237
+
12238
+
12239
+
12240
+
12241
+
12242
+
12243
+
12244
+
12245
+
12246
+
12247
+
12248
+
12249
+
12250
+
12251
+
12252
+
12253
+
12254
+
12255
+
12256
+
12257
+
12258
+
12259
+
12260
+
12261
+
12262
+
12263
+
12264
+
12265
+
12266
+
12267
+
12268
+
12269
+
12270
+
12271
+
12272
+
12273
+
12274
+
12275
+
12276
+
12277
+
12278
+
12279
+
12280
+
12281
+
12282
+
12283
+
12284
+
12285
+
12286
+
12287
+
12288
+
12289
+
12290
+
12291
+
12292
+
12293
+
12294
+
12295
+
12296
+
12297
+
12298
+
12299
+
12300
+
12301
+
12302
+
12303
+
12304
+
12305
+
12306
+
12307
+
12308
+
12309
+
12310
+
12311
+
12312
+
12313
+
12314
+
12315
+
12316
+
12317
+
12318
+
12319
+
12320
+
12321
+
12322
+
12323
+
12324
+
12325
+
12326
+
12327
+
12328
+
12329
+
12330
+
12331
+
12332
+
12333
+
12334
+
12335
+
12336
+
12337
+
12338
+
12339
+
12340
+
12341
+
12342
+
12343
+
12344
+
12345
+
12346
+
12347
+
12348
+
12349
+
12350
+
12351
+
12352
+
12353
+
12354
+
12355
+
12356
+
12357
+
12358
+
12359
+
12360
+
12361
+
12362
+
12363
+
12364
+
12365
+
12366
+
12367
+
12368
+
12369
+
12370
+
12371
+
12372
+
12373
+
12374
+
12375
+
12376
+
12377
+
12378
+
12379
+
12380
+
12381
+
12382
+
12383
+
12384
+
12385
+
12386
+
12387
+
12388
+
12389
+
12390
+
12391
+
12392
+
12393
+
12394
+
12395
+
12396
+
12397
+
12398
+
12399
+
12400
+
12401
+
12402
+
12403
+
12404
+
12405
+
12406
+
12407
+
12408
+
12409
+
12410
+
12411
+
12412
+
12413
+
12414
+
12415
+
12416
+
12417
+
12418
+
12419
+
12420
+
12421
+
12422
+
12423
+
12424
+
12425
+
12426
+
12427
+
12428
+
12429
+
12430
+
12431
+
12432
+
12433
+
12434
+
12435
+
12436
+
12437
+
12438
+
12439
+
12440
+
12441
+
12442
+
12443
+
12444
+
12445
+
12446
+
12447
+
12448
+
12449
+
12450
+
12451
+
12452
+
12453
+
12454
+
12455
+
12456
+
12457
+
12458
+
12459
+
12460
+
12461
+
12462
+
12463
+
12464
+
12465
+
12466
+
12467
+
12468
+
12469
+
12470
+
12471
+
12472
+
12473
+
12474
+
12475
+
12476
+
12477
+
12478
+
12479
+
12480
+
12481
+
12482
+
12483
+
12484
+
12485
+
12486
+
12487
+
12488
+
12489
+
12490
+
12491
+
12492
+
12493
+
12494
+
12495
+
12496
+
12497
+
12498
+
12499
+
12500
+
12501
+
12502
+
12503
+
12504
+
12505
+
12506
+
12507
+
12508
+
12509
+
12510
+
12511
+
12512
+
12513
+
12514
+
12515
+
12516
+
12517
+
12518
+
12519
+
12520
+
12521
+
12522
+
12523
+
12524
+
12525
+
12526
+
12527
+
12528
+
12529
+
12530
+
12531
+
12532
+
12533
+
12534
+
12535
+
12536
+
12537
+
12538
+
12539
+
12540
+
12541
+
12542
+
12543
+
12544
+
12545
+
12546
+
12547
+
12548
+
12549
+
12550
+
12551
+
12552
+
12553
+
12554
+
12555
+
12556
+
12557
+
12558
+
12559
+
12560
+
12561
+
12562
+
12563
+
12564
+
12565
+
12566
+
12567
+
12568
+
12569
+
12570
+
12571
+
12572
+
12573
+
12574
+
12575
+
12576
+
12577
+
12578
+
12579
+
12580
+
12581
+
12582
+
12583
+
12584
+
12585
+
12586
+
12587
+
12588
+
12589
+
12590
+
12591
+
12592
+
12593
+
12594
+
12595
+
12596
+
12597
+
12598
+
12599
+
12600
+
12601
+
12602
+
12603
+
12604
+
12605
+
12606
+
12607
+
12608
+
12609
+
12610
+
12611
+
12612
+
12613
+
12614
+
12615
+
12616
+
12617
+
12618
+
12619
+
12620
+
12621
+
12622
+
12623
+
12624
+
12625
+
12626
+
12627
+
12628
+
12629
+
12630
+
12631
+
12632
+
12633
+
12634
+
12635
+
12636
+
12637
+
12638
+
12639
+
12640
+
12641
+
12642
+
12643
+
12644
+
12645
+
12646
+
12647
+
12648
+
12649
+
12650
+
12651
+
12652
+
12653
+
12654
+
12655
+
12656
+
12657
+
12658
+
12659
+
12660
+
12661
+
12662
+
12663
+
12664
+
12665
+
12666
+
12667
+
12668
+
12669
+
12670
+
12671
+
12672
+
12673
+
12674
+
12675
+
12676
+
12677
+
12678
+
12679
+
12680
+
12681
+
12682
+
12683
+
12684
+
12685
+
12686
+
12687
+
12688
+
12689
+
12690
+
12691
+
12692
+
12693
+
12694
+
12695
+
12696
+
12697
+
12698
+
12699
+
12700
+
12701
+
12702
+
12703
+
12704
+
12705
+
12706
+
12707
+
12708
+
12709
+
12710
+
12711
+
12712
+
12713
+
12714
+
12715
+
12716
+
12717
+
12718
+ Step... (18000/50000 | Loss: 1.6918002367019653, Acc: 0.6613297462463379): 38%|██████████▎ | 19000/50000 [7:26:48<11:49:37, 1.37s/it]
12719
+ Evaluating ...: 3%|██▉ | 4/130 [00:00<00:08, 15.60it/s]
12720
+ Step... (18500 | Loss: 1.7828737497329712, Learning Rate: 0.00038181818672455847)
12721
+
12722
+
12723
+
12724
+
12725
+
12726
+
12727
+
12728
+
12729
+
12730
+
12731
+
12732
+
12733
+ [09:35:12] - INFO - __main__ - Saving checkpoint at 19000 steps█████████████████████████████████████████████████████| 130/130 [00:21<00:00, 4.60it/s]
12734
+ All Flax model weights were used when initializing RobertaForMaskedLM.
12735
+ Some weights of RobertaForMaskedLM were not initialized from the Flax model and are newly initialized: ['lm_head.decoder.weight', 'roberta.embeddings.position_ids', 'lm_head.decoder.bias']
12736
+ You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
12737
+
12738
+
12739
+
12740
+
12741
+
12742
+
12743
+
12744
+
12745
+
12746
+
12747
+
12748
+
12749
+
12750
+
12751
+
12752
+
12753
+
12754
+
12755
+
12756
+
12757
+
12758
+
12759
+
12760
+
12761
+
12762
+
12763
+
12764
+
12765
+
12766
+
12767
+
12768
+
12769
+
12770
+
12771
+
12772
+
12773
+
12774
+
12775
+
12776
+
12777
+
12778
+
12779
+
12780
+
12781
+
12782
+
12783
+
12784
+
12785
+
12786
+
12787
+
12788
+
12789
+
12790
+
12791
+
12792
+
12793
+
12794
+
12795
+
12796
+
12797
+
12798
+
12799
+
12800
+
12801
+
12802
+
12803
+
12804
+
12805
+
12806
+
12807
+
12808
+
12809
+
12810
+
12811
+
12812
+
12813
+
12814
+
12815
+
12816
+
12817
+
12818
+
12819
+
12820
+
12821
+
12822
+
12823
+
12824
+
12825
+
12826
+
12827
+
12828
+
12829
+
12830
+
12831
+
12832
+
12833
+
12834
+
12835
+
12836
+
12837
+
12838
+
12839
+
12840
+
12841
+
12842
+
12843
+
12844
+
12845
+
12846
+
12847
+
12848
+
12849
+
12850
+
12851
+
12852
+
12853
+
12854
+
12855
+
12856
+
12857
+
12858
+
12859
+
12860
+
12861
+
12862
+
12863
+
12864
+
12865
+
12866
+
12867
+
12868
+
12869
+
12870
+
12871
+
12872
+
12873
+
12874
+
12875
+
12876
+
12877
+
12878
+
12879
+
12880
+
12881
+
12882
+
12883
+
12884
+
12885
+
12886
+
12887
+
12888
+
12889
+
12890
+
12891
+
12892
+
12893
+
12894
+
12895
+
12896
+
12897
+
12898
+
12899
+
12900
+
12901
+
12902
+
12903
+
12904
+
12905
+
12906
+
12907
+
12908
+
12909
+
12910
+
12911
+
12912
+
12913
 
12914
 
12915
 
wandb/run-20210726_001233-17u6inbn/files/wandb-summary.json CHANGED
@@ -1 +1 @@
1
- {"global_step": 16500, "_timestamp": 1627288441.057746, "train_time": 616863.8125, "train_learning_rate": 0.0004060606297571212, "_step": 32901, "train_loss": 1.7374286651611328, "eval_accuracy": 0.659552812576294, "eval_loss": 1.6991859674453735}
 
1
+ {"global_step": 19000, "_timestamp": 1627292083.608254, "train_time": 778139.25, "train_learning_rate": 0.00037575760507024825, "_step": 37886, "train_loss": 1.8002192974090576, "eval_accuracy": 0.6613297462463379, "eval_loss": 1.6918002367019653}
wandb/run-20210726_001233-17u6inbn/logs/debug-internal.log CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f35d9a9ad3688b7e2e04122606d3ef735831cd7d3b8cb484a6915984c95ff551
3
- size 13058395
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5735b88f7fe8d8de75852323f4e472b4ff32a198dfcd9c4aa9b6b77935cfe7a3
3
+ size 15022911
wandb/run-20210726_001233-17u6inbn/run-17u6inbn.wandb CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cb882315e6f15ca6a3b1cd312b6599d77c0978d9421a510e09007e241cde953c
3
- size 6438750
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:65ba593b01d33851355b3d3d6b7ed6ce7fd160ce6e024522afb8bfc04d32aa8c
3
+ size 7433980