Training in progress, step 10000, checkpoint
Browse files
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1856040378
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:685b8f929080a0c1cd4f7170b0965566ab4a1803a5e81ba420d0f9f65131466a
|
| 3 |
size 1856040378
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 928000378
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2024ef4bbfddb085322638248bf96be3bcf53d8f186db9d8857784f44d9ae05b
|
| 3 |
size 928000378
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:530f9198ae1e6fedb877735edfe54080dd4166eeb3eec9809a588fd9e5798b16
|
| 3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1000
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:54618d07bedadf28d2cb00e55d812e928e86078b77fbd6bce3af014f3dfa80f0
|
| 3 |
size 1000
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
-
"best_metric": 1.
|
| 3 |
-
"best_model_checkpoint": "model/chessformer-3/checkpoint-
|
| 4 |
-
"epoch": 0.
|
| 5 |
"eval_steps": 500,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -31651,6 +31651,3522 @@
|
|
| 31651 |
"eval_samples_per_second": 558.929,
|
| 31652 |
"eval_steps_per_second": 69.886,
|
| 31653 |
"step": 9000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31654 |
}
|
| 31655 |
],
|
| 31656 |
"logging_steps": 2,
|
|
@@ -31670,7 +35186,7 @@
|
|
| 31670 |
"attributes": {}
|
| 31671 |
}
|
| 31672 |
},
|
| 31673 |
-
"total_flos":
|
| 31674 |
"train_batch_size": 768,
|
| 31675 |
"trial_name": null,
|
| 31676 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
+
"best_metric": 1.5108540058135986,
|
| 3 |
+
"best_model_checkpoint": "model/chessformer-3/checkpoint-10000",
|
| 4 |
+
"epoch": 0.44462229336178916,
|
| 5 |
"eval_steps": 500,
|
| 6 |
+
"global_step": 10000,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 31651 |
"eval_samples_per_second": 558.929,
|
| 31652 |
"eval_steps_per_second": 69.886,
|
| 31653 |
"step": 9000
|
| 31654 |
+
},
|
| 31655 |
+
{
|
| 31656 |
+
"epoch": 0.4002489884842826,
|
| 31657 |
+
"grad_norm": 0.07135413587093353,
|
| 31658 |
+
"learning_rate": 0.0009801698410686404,
|
| 31659 |
+
"loss": 1.5427,
|
| 31660 |
+
"step": 9002
|
| 31661 |
+
},
|
| 31662 |
+
{
|
| 31663 |
+
"epoch": 0.40033791294295495,
|
| 31664 |
+
"grad_norm": 0.07339084148406982,
|
| 31665 |
+
"learning_rate": 0.0009801599933182692,
|
| 31666 |
+
"loss": 1.5352,
|
| 31667 |
+
"step": 9004
|
| 31668 |
+
},
|
| 31669 |
+
{
|
| 31670 |
+
"epoch": 0.4004268374016273,
|
| 31671 |
+
"grad_norm": 0.06984297931194305,
|
| 31672 |
+
"learning_rate": 0.00098015014317278,
|
| 31673 |
+
"loss": 1.5358,
|
| 31674 |
+
"step": 9006
|
| 31675 |
+
},
|
| 31676 |
+
{
|
| 31677 |
+
"epoch": 0.4005157618602997,
|
| 31678 |
+
"grad_norm": 0.07473129779100418,
|
| 31679 |
+
"learning_rate": 0.0009801402906322225,
|
| 31680 |
+
"loss": 1.536,
|
| 31681 |
+
"step": 9008
|
| 31682 |
+
},
|
| 31683 |
+
{
|
| 31684 |
+
"epoch": 0.40060468631897206,
|
| 31685 |
+
"grad_norm": 0.07112877070903778,
|
| 31686 |
+
"learning_rate": 0.0009801304356966455,
|
| 31687 |
+
"loss": 1.5474,
|
| 31688 |
+
"step": 9010
|
| 31689 |
+
},
|
| 31690 |
+
{
|
| 31691 |
+
"epoch": 0.40069361077764437,
|
| 31692 |
+
"grad_norm": 0.07204887270927429,
|
| 31693 |
+
"learning_rate": 0.000980120578366098,
|
| 31694 |
+
"loss": 1.5418,
|
| 31695 |
+
"step": 9012
|
| 31696 |
+
},
|
| 31697 |
+
{
|
| 31698 |
+
"epoch": 0.40078253523631674,
|
| 31699 |
+
"grad_norm": 0.06817831844091415,
|
| 31700 |
+
"learning_rate": 0.0009801107186406296,
|
| 31701 |
+
"loss": 1.531,
|
| 31702 |
+
"step": 9014
|
| 31703 |
+
},
|
| 31704 |
+
{
|
| 31705 |
+
"epoch": 0.4008714596949891,
|
| 31706 |
+
"grad_norm": 0.07230226695537567,
|
| 31707 |
+
"learning_rate": 0.000980100856520289,
|
| 31708 |
+
"loss": 1.5388,
|
| 31709 |
+
"step": 9016
|
| 31710 |
+
},
|
| 31711 |
+
{
|
| 31712 |
+
"epoch": 0.4009603841536615,
|
| 31713 |
+
"grad_norm": 0.07334619015455246,
|
| 31714 |
+
"learning_rate": 0.0009800909920051257,
|
| 31715 |
+
"loss": 1.5435,
|
| 31716 |
+
"step": 9018
|
| 31717 |
+
},
|
| 31718 |
+
{
|
| 31719 |
+
"epoch": 0.40104930861233384,
|
| 31720 |
+
"grad_norm": 0.06880249083042145,
|
| 31721 |
+
"learning_rate": 0.0009800811250951888,
|
| 31722 |
+
"loss": 1.5336,
|
| 31723 |
+
"step": 9020
|
| 31724 |
+
},
|
| 31725 |
+
{
|
| 31726 |
+
"epoch": 0.40113823307100616,
|
| 31727 |
+
"grad_norm": 0.07309377938508987,
|
| 31728 |
+
"learning_rate": 0.0009800712557905278,
|
| 31729 |
+
"loss": 1.5394,
|
| 31730 |
+
"step": 9022
|
| 31731 |
+
},
|
| 31732 |
+
{
|
| 31733 |
+
"epoch": 0.4012271575296785,
|
| 31734 |
+
"grad_norm": 0.07203828543424606,
|
| 31735 |
+
"learning_rate": 0.0009800613840911913,
|
| 31736 |
+
"loss": 1.5421,
|
| 31737 |
+
"step": 9024
|
| 31738 |
+
},
|
| 31739 |
+
{
|
| 31740 |
+
"epoch": 0.4013160819883509,
|
| 31741 |
+
"grad_norm": 0.07213805615901947,
|
| 31742 |
+
"learning_rate": 0.0009800515099972294,
|
| 31743 |
+
"loss": 1.5456,
|
| 31744 |
+
"step": 9026
|
| 31745 |
+
},
|
| 31746 |
+
{
|
| 31747 |
+
"epoch": 0.40140500644702326,
|
| 31748 |
+
"grad_norm": 0.07065927237272263,
|
| 31749 |
+
"learning_rate": 0.0009800416335086907,
|
| 31750 |
+
"loss": 1.5389,
|
| 31751 |
+
"step": 9028
|
| 31752 |
+
},
|
| 31753 |
+
{
|
| 31754 |
+
"epoch": 0.40149393090569563,
|
| 31755 |
+
"grad_norm": 0.0714295282959938,
|
| 31756 |
+
"learning_rate": 0.0009800317546256245,
|
| 31757 |
+
"loss": 1.5386,
|
| 31758 |
+
"step": 9030
|
| 31759 |
+
},
|
| 31760 |
+
{
|
| 31761 |
+
"epoch": 0.401582855364368,
|
| 31762 |
+
"grad_norm": 0.07336141169071198,
|
| 31763 |
+
"learning_rate": 0.0009800218733480802,
|
| 31764 |
+
"loss": 1.54,
|
| 31765 |
+
"step": 9032
|
| 31766 |
+
},
|
| 31767 |
+
{
|
| 31768 |
+
"epoch": 0.4016717798230403,
|
| 31769 |
+
"grad_norm": 0.0692136138677597,
|
| 31770 |
+
"learning_rate": 0.0009800119896761074,
|
| 31771 |
+
"loss": 1.5393,
|
| 31772 |
+
"step": 9034
|
| 31773 |
+
},
|
| 31774 |
+
{
|
| 31775 |
+
"epoch": 0.4017607042817127,
|
| 31776 |
+
"grad_norm": 0.07176699489355087,
|
| 31777 |
+
"learning_rate": 0.0009800021036097549,
|
| 31778 |
+
"loss": 1.5424,
|
| 31779 |
+
"step": 9036
|
| 31780 |
+
},
|
| 31781 |
+
{
|
| 31782 |
+
"epoch": 0.40184962874038505,
|
| 31783 |
+
"grad_norm": 0.06777803599834442,
|
| 31784 |
+
"learning_rate": 0.0009799922151490722,
|
| 31785 |
+
"loss": 1.5379,
|
| 31786 |
+
"step": 9038
|
| 31787 |
+
},
|
| 31788 |
+
{
|
| 31789 |
+
"epoch": 0.4019385531990574,
|
| 31790 |
+
"grad_norm": 0.07473397254943848,
|
| 31791 |
+
"learning_rate": 0.0009799823242941088,
|
| 31792 |
+
"loss": 1.54,
|
| 31793 |
+
"step": 9040
|
| 31794 |
+
},
|
| 31795 |
+
{
|
| 31796 |
+
"epoch": 0.4020274776577298,
|
| 31797 |
+
"grad_norm": 0.07176635414361954,
|
| 31798 |
+
"learning_rate": 0.000979972431044914,
|
| 31799 |
+
"loss": 1.5363,
|
| 31800 |
+
"step": 9042
|
| 31801 |
+
},
|
| 31802 |
+
{
|
| 31803 |
+
"epoch": 0.4021164021164021,
|
| 31804 |
+
"grad_norm": 0.0704076886177063,
|
| 31805 |
+
"learning_rate": 0.0009799625354015367,
|
| 31806 |
+
"loss": 1.5381,
|
| 31807 |
+
"step": 9044
|
| 31808 |
+
},
|
| 31809 |
+
{
|
| 31810 |
+
"epoch": 0.40220532657507446,
|
| 31811 |
+
"grad_norm": 0.07321424782276154,
|
| 31812 |
+
"learning_rate": 0.0009799526373640267,
|
| 31813 |
+
"loss": 1.5393,
|
| 31814 |
+
"step": 9046
|
| 31815 |
+
},
|
| 31816 |
+
{
|
| 31817 |
+
"epoch": 0.40229425103374683,
|
| 31818 |
+
"grad_norm": 0.0706743597984314,
|
| 31819 |
+
"learning_rate": 0.0009799427369324334,
|
| 31820 |
+
"loss": 1.5404,
|
| 31821 |
+
"step": 9048
|
| 31822 |
+
},
|
| 31823 |
+
{
|
| 31824 |
+
"epoch": 0.4023831754924192,
|
| 31825 |
+
"grad_norm": 0.07315555214881897,
|
| 31826 |
+
"learning_rate": 0.000979932834106806,
|
| 31827 |
+
"loss": 1.5373,
|
| 31828 |
+
"step": 9050
|
| 31829 |
+
},
|
| 31830 |
+
{
|
| 31831 |
+
"epoch": 0.40247209995109157,
|
| 31832 |
+
"grad_norm": 0.07187056541442871,
|
| 31833 |
+
"learning_rate": 0.000979922928887194,
|
| 31834 |
+
"loss": 1.5369,
|
| 31835 |
+
"step": 9052
|
| 31836 |
+
},
|
| 31837 |
+
{
|
| 31838 |
+
"epoch": 0.4025610244097639,
|
| 31839 |
+
"grad_norm": 0.06762688606977463,
|
| 31840 |
+
"learning_rate": 0.0009799130212736467,
|
| 31841 |
+
"loss": 1.5424,
|
| 31842 |
+
"step": 9054
|
| 31843 |
+
},
|
| 31844 |
+
{
|
| 31845 |
+
"epoch": 0.40264994886843625,
|
| 31846 |
+
"grad_norm": 0.07067506015300751,
|
| 31847 |
+
"learning_rate": 0.0009799031112662138,
|
| 31848 |
+
"loss": 1.5421,
|
| 31849 |
+
"step": 9056
|
| 31850 |
+
},
|
| 31851 |
+
{
|
| 31852 |
+
"epoch": 0.4027388733271086,
|
| 31853 |
+
"grad_norm": 0.07152686268091202,
|
| 31854 |
+
"learning_rate": 0.0009798931988649442,
|
| 31855 |
+
"loss": 1.5434,
|
| 31856 |
+
"step": 9058
|
| 31857 |
+
},
|
| 31858 |
+
{
|
| 31859 |
+
"epoch": 0.402827797785781,
|
| 31860 |
+
"grad_norm": 0.07224275916814804,
|
| 31861 |
+
"learning_rate": 0.0009798832840698878,
|
| 31862 |
+
"loss": 1.5396,
|
| 31863 |
+
"step": 9060
|
| 31864 |
+
},
|
| 31865 |
+
{
|
| 31866 |
+
"epoch": 0.40291672224445335,
|
| 31867 |
+
"grad_norm": 0.06946686655282974,
|
| 31868 |
+
"learning_rate": 0.000979873366881094,
|
| 31869 |
+
"loss": 1.5462,
|
| 31870 |
+
"step": 9062
|
| 31871 |
+
},
|
| 31872 |
+
{
|
| 31873 |
+
"epoch": 0.4030056467031257,
|
| 31874 |
+
"grad_norm": 0.07184547185897827,
|
| 31875 |
+
"learning_rate": 0.000979863447298612,
|
| 31876 |
+
"loss": 1.5358,
|
| 31877 |
+
"step": 9064
|
| 31878 |
+
},
|
| 31879 |
+
{
|
| 31880 |
+
"epoch": 0.40309457116179803,
|
| 31881 |
+
"grad_norm": 0.06754238903522491,
|
| 31882 |
+
"learning_rate": 0.0009798535253224916,
|
| 31883 |
+
"loss": 1.5369,
|
| 31884 |
+
"step": 9066
|
| 31885 |
+
},
|
| 31886 |
+
{
|
| 31887 |
+
"epoch": 0.4031834956204704,
|
| 31888 |
+
"grad_norm": 0.06945636868476868,
|
| 31889 |
+
"learning_rate": 0.0009798436009527823,
|
| 31890 |
+
"loss": 1.5443,
|
| 31891 |
+
"step": 9068
|
| 31892 |
+
},
|
| 31893 |
+
{
|
| 31894 |
+
"epoch": 0.40327242007914277,
|
| 31895 |
+
"grad_norm": 0.07276376336812973,
|
| 31896 |
+
"learning_rate": 0.0009798336741895332,
|
| 31897 |
+
"loss": 1.5391,
|
| 31898 |
+
"step": 9070
|
| 31899 |
+
},
|
| 31900 |
+
{
|
| 31901 |
+
"epoch": 0.40336134453781514,
|
| 31902 |
+
"grad_norm": 0.07267561554908752,
|
| 31903 |
+
"learning_rate": 0.0009798237450327942,
|
| 31904 |
+
"loss": 1.5445,
|
| 31905 |
+
"step": 9072
|
| 31906 |
+
},
|
| 31907 |
+
{
|
| 31908 |
+
"epoch": 0.4034502689964875,
|
| 31909 |
+
"grad_norm": 0.07162649929523468,
|
| 31910 |
+
"learning_rate": 0.000979813813482615,
|
| 31911 |
+
"loss": 1.5416,
|
| 31912 |
+
"step": 9074
|
| 31913 |
+
},
|
| 31914 |
+
{
|
| 31915 |
+
"epoch": 0.4035391934551598,
|
| 31916 |
+
"grad_norm": 0.06982056051492691,
|
| 31917 |
+
"learning_rate": 0.0009798038795390445,
|
| 31918 |
+
"loss": 1.5402,
|
| 31919 |
+
"step": 9076
|
| 31920 |
+
},
|
| 31921 |
+
{
|
| 31922 |
+
"epoch": 0.4036281179138322,
|
| 31923 |
+
"grad_norm": 0.06865835189819336,
|
| 31924 |
+
"learning_rate": 0.0009797939432021326,
|
| 31925 |
+
"loss": 1.5426,
|
| 31926 |
+
"step": 9078
|
| 31927 |
+
},
|
| 31928 |
+
{
|
| 31929 |
+
"epoch": 0.40371704237250455,
|
| 31930 |
+
"grad_norm": 0.07144743204116821,
|
| 31931 |
+
"learning_rate": 0.000979784004471929,
|
| 31932 |
+
"loss": 1.5405,
|
| 31933 |
+
"step": 9080
|
| 31934 |
+
},
|
| 31935 |
+
{
|
| 31936 |
+
"epoch": 0.4038059668311769,
|
| 31937 |
+
"grad_norm": 0.0699494332075119,
|
| 31938 |
+
"learning_rate": 0.000979774063348483,
|
| 31939 |
+
"loss": 1.5377,
|
| 31940 |
+
"step": 9082
|
| 31941 |
+
},
|
| 31942 |
+
{
|
| 31943 |
+
"epoch": 0.4038948912898493,
|
| 31944 |
+
"grad_norm": 0.06973431259393692,
|
| 31945 |
+
"learning_rate": 0.0009797641198318445,
|
| 31946 |
+
"loss": 1.5392,
|
| 31947 |
+
"step": 9084
|
| 31948 |
+
},
|
| 31949 |
+
{
|
| 31950 |
+
"epoch": 0.40398381574852166,
|
| 31951 |
+
"grad_norm": 0.07053232192993164,
|
| 31952 |
+
"learning_rate": 0.000979754173922063,
|
| 31953 |
+
"loss": 1.537,
|
| 31954 |
+
"step": 9086
|
| 31955 |
+
},
|
| 31956 |
+
{
|
| 31957 |
+
"epoch": 0.40407274020719397,
|
| 31958 |
+
"grad_norm": 0.06677805632352829,
|
| 31959 |
+
"learning_rate": 0.0009797442256191877,
|
| 31960 |
+
"loss": 1.5489,
|
| 31961 |
+
"step": 9088
|
| 31962 |
+
},
|
| 31963 |
+
{
|
| 31964 |
+
"epoch": 0.40416166466586634,
|
| 31965 |
+
"grad_norm": 0.07138942182064056,
|
| 31966 |
+
"learning_rate": 0.000979734274923269,
|
| 31967 |
+
"loss": 1.5407,
|
| 31968 |
+
"step": 9090
|
| 31969 |
+
},
|
| 31970 |
+
{
|
| 31971 |
+
"epoch": 0.4042505891245387,
|
| 31972 |
+
"grad_norm": 0.07154888659715652,
|
| 31973 |
+
"learning_rate": 0.0009797243218343558,
|
| 31974 |
+
"loss": 1.5373,
|
| 31975 |
+
"step": 9092
|
| 31976 |
+
},
|
| 31977 |
+
{
|
| 31978 |
+
"epoch": 0.4043395135832111,
|
| 31979 |
+
"grad_norm": 0.07199876755475998,
|
| 31980 |
+
"learning_rate": 0.000979714366352498,
|
| 31981 |
+
"loss": 1.5344,
|
| 31982 |
+
"step": 9094
|
| 31983 |
+
},
|
| 31984 |
+
{
|
| 31985 |
+
"epoch": 0.40442843804188344,
|
| 31986 |
+
"grad_norm": 0.07113402336835861,
|
| 31987 |
+
"learning_rate": 0.0009797044084777456,
|
| 31988 |
+
"loss": 1.54,
|
| 31989 |
+
"step": 9096
|
| 31990 |
+
},
|
| 31991 |
+
{
|
| 31992 |
+
"epoch": 0.40451736250055575,
|
| 31993 |
+
"grad_norm": 0.07240010052919388,
|
| 31994 |
+
"learning_rate": 0.0009796944482101477,
|
| 31995 |
+
"loss": 1.5366,
|
| 31996 |
+
"step": 9098
|
| 31997 |
+
},
|
| 31998 |
+
{
|
| 31999 |
+
"epoch": 0.4046062869592281,
|
| 32000 |
+
"grad_norm": 0.07278742641210556,
|
| 32001 |
+
"learning_rate": 0.0009796844855497545,
|
| 32002 |
+
"loss": 1.5375,
|
| 32003 |
+
"step": 9100
|
| 32004 |
+
},
|
| 32005 |
+
{
|
| 32006 |
+
"epoch": 0.4046952114179005,
|
| 32007 |
+
"grad_norm": 0.06928924471139908,
|
| 32008 |
+
"learning_rate": 0.0009796745204966152,
|
| 32009 |
+
"loss": 1.5358,
|
| 32010 |
+
"step": 9102
|
| 32011 |
+
},
|
| 32012 |
+
{
|
| 32013 |
+
"epoch": 0.40478413587657286,
|
| 32014 |
+
"grad_norm": 0.06925825029611588,
|
| 32015 |
+
"learning_rate": 0.0009796645530507802,
|
| 32016 |
+
"loss": 1.544,
|
| 32017 |
+
"step": 9104
|
| 32018 |
+
},
|
| 32019 |
+
{
|
| 32020 |
+
"epoch": 0.4048730603352452,
|
| 32021 |
+
"grad_norm": 0.07367236167192459,
|
| 32022 |
+
"learning_rate": 0.0009796545832122985,
|
| 32023 |
+
"loss": 1.5375,
|
| 32024 |
+
"step": 9106
|
| 32025 |
+
},
|
| 32026 |
+
{
|
| 32027 |
+
"epoch": 0.40496198479391754,
|
| 32028 |
+
"grad_norm": 0.07212896645069122,
|
| 32029 |
+
"learning_rate": 0.00097964461098122,
|
| 32030 |
+
"loss": 1.5415,
|
| 32031 |
+
"step": 9108
|
| 32032 |
+
},
|
| 32033 |
+
{
|
| 32034 |
+
"epoch": 0.4050509092525899,
|
| 32035 |
+
"grad_norm": 0.07600012421607971,
|
| 32036 |
+
"learning_rate": 0.0009796346363575947,
|
| 32037 |
+
"loss": 1.5381,
|
| 32038 |
+
"step": 9110
|
| 32039 |
+
},
|
| 32040 |
+
{
|
| 32041 |
+
"epoch": 0.4051398337112623,
|
| 32042 |
+
"grad_norm": 0.07662640511989594,
|
| 32043 |
+
"learning_rate": 0.0009796246593414724,
|
| 32044 |
+
"loss": 1.5339,
|
| 32045 |
+
"step": 9112
|
| 32046 |
+
},
|
| 32047 |
+
{
|
| 32048 |
+
"epoch": 0.40522875816993464,
|
| 32049 |
+
"grad_norm": 0.070067398250103,
|
| 32050 |
+
"learning_rate": 0.0009796146799329025,
|
| 32051 |
+
"loss": 1.5423,
|
| 32052 |
+
"step": 9114
|
| 32053 |
+
},
|
| 32054 |
+
{
|
| 32055 |
+
"epoch": 0.405317682628607,
|
| 32056 |
+
"grad_norm": 0.07174143195152283,
|
| 32057 |
+
"learning_rate": 0.0009796046981319349,
|
| 32058 |
+
"loss": 1.5416,
|
| 32059 |
+
"step": 9116
|
| 32060 |
+
},
|
| 32061 |
+
{
|
| 32062 |
+
"epoch": 0.4054066070872794,
|
| 32063 |
+
"grad_norm": 0.06986463814973831,
|
| 32064 |
+
"learning_rate": 0.0009795947139386195,
|
| 32065 |
+
"loss": 1.5367,
|
| 32066 |
+
"step": 9118
|
| 32067 |
+
},
|
| 32068 |
+
{
|
| 32069 |
+
"epoch": 0.4054955315459517,
|
| 32070 |
+
"grad_norm": 0.0711868554353714,
|
| 32071 |
+
"learning_rate": 0.000979584727353006,
|
| 32072 |
+
"loss": 1.5423,
|
| 32073 |
+
"step": 9120
|
| 32074 |
+
},
|
| 32075 |
+
{
|
| 32076 |
+
"epoch": 0.40558445600462406,
|
| 32077 |
+
"grad_norm": 0.07185269147157669,
|
| 32078 |
+
"learning_rate": 0.0009795747383751446,
|
| 32079 |
+
"loss": 1.5374,
|
| 32080 |
+
"step": 9122
|
| 32081 |
+
},
|
| 32082 |
+
{
|
| 32083 |
+
"epoch": 0.40567338046329643,
|
| 32084 |
+
"grad_norm": 0.07167672365903854,
|
| 32085 |
+
"learning_rate": 0.0009795647470050846,
|
| 32086 |
+
"loss": 1.5397,
|
| 32087 |
+
"step": 9124
|
| 32088 |
+
},
|
| 32089 |
+
{
|
| 32090 |
+
"epoch": 0.4057623049219688,
|
| 32091 |
+
"grad_norm": 0.07179324328899384,
|
| 32092 |
+
"learning_rate": 0.000979554753242876,
|
| 32093 |
+
"loss": 1.5418,
|
| 32094 |
+
"step": 9126
|
| 32095 |
+
},
|
| 32096 |
+
{
|
| 32097 |
+
"epoch": 0.40585122938064117,
|
| 32098 |
+
"grad_norm": 0.07255962491035461,
|
| 32099 |
+
"learning_rate": 0.0009795447570885686,
|
| 32100 |
+
"loss": 1.5377,
|
| 32101 |
+
"step": 9128
|
| 32102 |
+
},
|
| 32103 |
+
{
|
| 32104 |
+
"epoch": 0.4059401538393135,
|
| 32105 |
+
"grad_norm": 0.0690963864326477,
|
| 32106 |
+
"learning_rate": 0.0009795347585422123,
|
| 32107 |
+
"loss": 1.5444,
|
| 32108 |
+
"step": 9130
|
| 32109 |
+
},
|
| 32110 |
+
{
|
| 32111 |
+
"epoch": 0.40602907829798585,
|
| 32112 |
+
"grad_norm": 0.07536199688911438,
|
| 32113 |
+
"learning_rate": 0.0009795247576038573,
|
| 32114 |
+
"loss": 1.5363,
|
| 32115 |
+
"step": 9132
|
| 32116 |
+
},
|
| 32117 |
+
{
|
| 32118 |
+
"epoch": 0.4061180027566582,
|
| 32119 |
+
"grad_norm": 0.06956053525209427,
|
| 32120 |
+
"learning_rate": 0.000979514754273553,
|
| 32121 |
+
"loss": 1.541,
|
| 32122 |
+
"step": 9134
|
| 32123 |
+
},
|
| 32124 |
+
{
|
| 32125 |
+
"epoch": 0.4062069272153306,
|
| 32126 |
+
"grad_norm": 0.07050742208957672,
|
| 32127 |
+
"learning_rate": 0.0009795047485513498,
|
| 32128 |
+
"loss": 1.5427,
|
| 32129 |
+
"step": 9136
|
| 32130 |
+
},
|
| 32131 |
+
{
|
| 32132 |
+
"epoch": 0.40629585167400295,
|
| 32133 |
+
"grad_norm": 0.0698935016989708,
|
| 32134 |
+
"learning_rate": 0.000979494740437297,
|
| 32135 |
+
"loss": 1.5354,
|
| 32136 |
+
"step": 9138
|
| 32137 |
+
},
|
| 32138 |
+
{
|
| 32139 |
+
"epoch": 0.4063847761326753,
|
| 32140 |
+
"grad_norm": 0.0688849613070488,
|
| 32141 |
+
"learning_rate": 0.0009794847299314448,
|
| 32142 |
+
"loss": 1.5354,
|
| 32143 |
+
"step": 9140
|
| 32144 |
+
},
|
| 32145 |
+
{
|
| 32146 |
+
"epoch": 0.40647370059134763,
|
| 32147 |
+
"grad_norm": 0.0699022114276886,
|
| 32148 |
+
"learning_rate": 0.0009794747170338435,
|
| 32149 |
+
"loss": 1.5383,
|
| 32150 |
+
"step": 9142
|
| 32151 |
+
},
|
| 32152 |
+
{
|
| 32153 |
+
"epoch": 0.40656262505002,
|
| 32154 |
+
"grad_norm": 0.06858205050230026,
|
| 32155 |
+
"learning_rate": 0.0009794647017445425,
|
| 32156 |
+
"loss": 1.5436,
|
| 32157 |
+
"step": 9144
|
| 32158 |
+
},
|
| 32159 |
+
{
|
| 32160 |
+
"epoch": 0.40665154950869237,
|
| 32161 |
+
"grad_norm": 0.06846922636032104,
|
| 32162 |
+
"learning_rate": 0.000979454684063592,
|
| 32163 |
+
"loss": 1.5442,
|
| 32164 |
+
"step": 9146
|
| 32165 |
+
},
|
| 32166 |
+
{
|
| 32167 |
+
"epoch": 0.40674047396736474,
|
| 32168 |
+
"grad_norm": 0.07031798362731934,
|
| 32169 |
+
"learning_rate": 0.000979444663991042,
|
| 32170 |
+
"loss": 1.5362,
|
| 32171 |
+
"step": 9148
|
| 32172 |
+
},
|
| 32173 |
+
{
|
| 32174 |
+
"epoch": 0.4068293984260371,
|
| 32175 |
+
"grad_norm": 0.07397054880857468,
|
| 32176 |
+
"learning_rate": 0.0009794346415269424,
|
| 32177 |
+
"loss": 1.5395,
|
| 32178 |
+
"step": 9150
|
| 32179 |
+
},
|
| 32180 |
+
{
|
| 32181 |
+
"epoch": 0.4069183228847094,
|
| 32182 |
+
"grad_norm": 0.0680195763707161,
|
| 32183 |
+
"learning_rate": 0.0009794246166713433,
|
| 32184 |
+
"loss": 1.5375,
|
| 32185 |
+
"step": 9152
|
| 32186 |
+
},
|
| 32187 |
+
{
|
| 32188 |
+
"epoch": 0.4070072473433818,
|
| 32189 |
+
"grad_norm": 0.06949716061353683,
|
| 32190 |
+
"learning_rate": 0.0009794145894242946,
|
| 32191 |
+
"loss": 1.5439,
|
| 32192 |
+
"step": 9154
|
| 32193 |
+
},
|
| 32194 |
+
{
|
| 32195 |
+
"epoch": 0.40709617180205415,
|
| 32196 |
+
"grad_norm": 0.07266386598348618,
|
| 32197 |
+
"learning_rate": 0.0009794045597858463,
|
| 32198 |
+
"loss": 1.5396,
|
| 32199 |
+
"step": 9156
|
| 32200 |
+
},
|
| 32201 |
+
{
|
| 32202 |
+
"epoch": 0.4071850962607265,
|
| 32203 |
+
"grad_norm": 0.07391185313463211,
|
| 32204 |
+
"learning_rate": 0.0009793945277560485,
|
| 32205 |
+
"loss": 1.5412,
|
| 32206 |
+
"step": 9158
|
| 32207 |
+
},
|
| 32208 |
+
{
|
| 32209 |
+
"epoch": 0.4072740207193989,
|
| 32210 |
+
"grad_norm": 0.07154107093811035,
|
| 32211 |
+
"learning_rate": 0.0009793844933349513,
|
| 32212 |
+
"loss": 1.5426,
|
| 32213 |
+
"step": 9160
|
| 32214 |
+
},
|
| 32215 |
+
{
|
| 32216 |
+
"epoch": 0.4073629451780712,
|
| 32217 |
+
"grad_norm": 0.06954929977655411,
|
| 32218 |
+
"learning_rate": 0.0009793744565226045,
|
| 32219 |
+
"loss": 1.5383,
|
| 32220 |
+
"step": 9162
|
| 32221 |
+
},
|
| 32222 |
+
{
|
| 32223 |
+
"epoch": 0.40745186963674357,
|
| 32224 |
+
"grad_norm": 0.06934426724910736,
|
| 32225 |
+
"learning_rate": 0.0009793644173190584,
|
| 32226 |
+
"loss": 1.5397,
|
| 32227 |
+
"step": 9164
|
| 32228 |
+
},
|
| 32229 |
+
{
|
| 32230 |
+
"epoch": 0.40754079409541594,
|
| 32231 |
+
"grad_norm": 0.07320673018693924,
|
| 32232 |
+
"learning_rate": 0.000979354375724363,
|
| 32233 |
+
"loss": 1.5324,
|
| 32234 |
+
"step": 9166
|
| 32235 |
+
},
|
| 32236 |
+
{
|
| 32237 |
+
"epoch": 0.4076297185540883,
|
| 32238 |
+
"grad_norm": 0.07140939682722092,
|
| 32239 |
+
"learning_rate": 0.0009793443317385685,
|
| 32240 |
+
"loss": 1.5336,
|
| 32241 |
+
"step": 9168
|
| 32242 |
+
},
|
| 32243 |
+
{
|
| 32244 |
+
"epoch": 0.4077186430127607,
|
| 32245 |
+
"grad_norm": 0.07228023558855057,
|
| 32246 |
+
"learning_rate": 0.0009793342853617248,
|
| 32247 |
+
"loss": 1.5398,
|
| 32248 |
+
"step": 9170
|
| 32249 |
+
},
|
| 32250 |
+
{
|
| 32251 |
+
"epoch": 0.40780756747143304,
|
| 32252 |
+
"grad_norm": 0.07187279313802719,
|
| 32253 |
+
"learning_rate": 0.0009793242365938822,
|
| 32254 |
+
"loss": 1.5452,
|
| 32255 |
+
"step": 9172
|
| 32256 |
+
},
|
| 32257 |
+
{
|
| 32258 |
+
"epoch": 0.40789649193010535,
|
| 32259 |
+
"grad_norm": 0.06871677190065384,
|
| 32260 |
+
"learning_rate": 0.000979314185435091,
|
| 32261 |
+
"loss": 1.5341,
|
| 32262 |
+
"step": 9174
|
| 32263 |
+
},
|
| 32264 |
+
{
|
| 32265 |
+
"epoch": 0.4079854163887777,
|
| 32266 |
+
"grad_norm": 0.06969435513019562,
|
| 32267 |
+
"learning_rate": 0.0009793041318854007,
|
| 32268 |
+
"loss": 1.5386,
|
| 32269 |
+
"step": 9176
|
| 32270 |
+
},
|
| 32271 |
+
{
|
| 32272 |
+
"epoch": 0.4080743408474501,
|
| 32273 |
+
"grad_norm": 0.07604380697011948,
|
| 32274 |
+
"learning_rate": 0.0009792940759448619,
|
| 32275 |
+
"loss": 1.5462,
|
| 32276 |
+
"step": 9178
|
| 32277 |
+
},
|
| 32278 |
+
{
|
| 32279 |
+
"epoch": 0.40816326530612246,
|
| 32280 |
+
"grad_norm": 0.06866500526666641,
|
| 32281 |
+
"learning_rate": 0.0009792840176135248,
|
| 32282 |
+
"loss": 1.5392,
|
| 32283 |
+
"step": 9180
|
| 32284 |
+
},
|
| 32285 |
+
{
|
| 32286 |
+
"epoch": 0.4082521897647948,
|
| 32287 |
+
"grad_norm": 0.06986360996961594,
|
| 32288 |
+
"learning_rate": 0.0009792739568914393,
|
| 32289 |
+
"loss": 1.5381,
|
| 32290 |
+
"step": 9182
|
| 32291 |
+
},
|
| 32292 |
+
{
|
| 32293 |
+
"epoch": 0.40834111422346714,
|
| 32294 |
+
"grad_norm": 0.06875632703304291,
|
| 32295 |
+
"learning_rate": 0.0009792638937786559,
|
| 32296 |
+
"loss": 1.5407,
|
| 32297 |
+
"step": 9184
|
| 32298 |
+
},
|
| 32299 |
+
{
|
| 32300 |
+
"epoch": 0.4084300386821395,
|
| 32301 |
+
"grad_norm": 0.06942211091518402,
|
| 32302 |
+
"learning_rate": 0.0009792538282752245,
|
| 32303 |
+
"loss": 1.5391,
|
| 32304 |
+
"step": 9186
|
| 32305 |
+
},
|
| 32306 |
+
{
|
| 32307 |
+
"epoch": 0.4085189631408119,
|
| 32308 |
+
"grad_norm": 0.06980740278959274,
|
| 32309 |
+
"learning_rate": 0.0009792437603811954,
|
| 32310 |
+
"loss": 1.5366,
|
| 32311 |
+
"step": 9188
|
| 32312 |
+
},
|
| 32313 |
+
{
|
| 32314 |
+
"epoch": 0.40860788759948424,
|
| 32315 |
+
"grad_norm": 0.0708228349685669,
|
| 32316 |
+
"learning_rate": 0.000979233690096619,
|
| 32317 |
+
"loss": 1.5413,
|
| 32318 |
+
"step": 9190
|
| 32319 |
+
},
|
| 32320 |
+
{
|
| 32321 |
+
"epoch": 0.4086968120581566,
|
| 32322 |
+
"grad_norm": 0.07046905905008316,
|
| 32323 |
+
"learning_rate": 0.0009792236174215455,
|
| 32324 |
+
"loss": 1.5366,
|
| 32325 |
+
"step": 9192
|
| 32326 |
+
},
|
| 32327 |
+
{
|
| 32328 |
+
"epoch": 0.408785736516829,
|
| 32329 |
+
"grad_norm": 0.0711686909198761,
|
| 32330 |
+
"learning_rate": 0.000979213542356025,
|
| 32331 |
+
"loss": 1.5439,
|
| 32332 |
+
"step": 9194
|
| 32333 |
+
},
|
| 32334 |
+
{
|
| 32335 |
+
"epoch": 0.4088746609755013,
|
| 32336 |
+
"grad_norm": 0.06964288651943207,
|
| 32337 |
+
"learning_rate": 0.0009792034649001079,
|
| 32338 |
+
"loss": 1.5353,
|
| 32339 |
+
"step": 9196
|
| 32340 |
+
},
|
| 32341 |
+
{
|
| 32342 |
+
"epoch": 0.40896358543417366,
|
| 32343 |
+
"grad_norm": 0.07251102477312088,
|
| 32344 |
+
"learning_rate": 0.0009791933850538442,
|
| 32345 |
+
"loss": 1.5396,
|
| 32346 |
+
"step": 9198
|
| 32347 |
+
},
|
| 32348 |
+
{
|
| 32349 |
+
"epoch": 0.40905250989284603,
|
| 32350 |
+
"grad_norm": 0.07063327729701996,
|
| 32351 |
+
"learning_rate": 0.0009791833028172843,
|
| 32352 |
+
"loss": 1.5388,
|
| 32353 |
+
"step": 9200
|
| 32354 |
+
},
|
| 32355 |
+
{
|
| 32356 |
+
"epoch": 0.4091414343515184,
|
| 32357 |
+
"grad_norm": 0.07538381218910217,
|
| 32358 |
+
"learning_rate": 0.0009791732181904788,
|
| 32359 |
+
"loss": 1.5417,
|
| 32360 |
+
"step": 9202
|
| 32361 |
+
},
|
| 32362 |
+
{
|
| 32363 |
+
"epoch": 0.40923035881019076,
|
| 32364 |
+
"grad_norm": 0.07273292541503906,
|
| 32365 |
+
"learning_rate": 0.0009791631311734774,
|
| 32366 |
+
"loss": 1.5364,
|
| 32367 |
+
"step": 9204
|
| 32368 |
+
},
|
| 32369 |
+
{
|
| 32370 |
+
"epoch": 0.4093192832688631,
|
| 32371 |
+
"grad_norm": 0.07258638739585876,
|
| 32372 |
+
"learning_rate": 0.0009791530417663308,
|
| 32373 |
+
"loss": 1.5409,
|
| 32374 |
+
"step": 9206
|
| 32375 |
+
},
|
| 32376 |
+
{
|
| 32377 |
+
"epoch": 0.40940820772753544,
|
| 32378 |
+
"grad_norm": 0.07772589474916458,
|
| 32379 |
+
"learning_rate": 0.0009791429499690896,
|
| 32380 |
+
"loss": 1.5398,
|
| 32381 |
+
"step": 9208
|
| 32382 |
+
},
|
| 32383 |
+
{
|
| 32384 |
+
"epoch": 0.4094971321862078,
|
| 32385 |
+
"grad_norm": 0.07376066595315933,
|
| 32386 |
+
"learning_rate": 0.0009791328557818035,
|
| 32387 |
+
"loss": 1.5427,
|
| 32388 |
+
"step": 9210
|
| 32389 |
+
},
|
| 32390 |
+
{
|
| 32391 |
+
"epoch": 0.4095860566448802,
|
| 32392 |
+
"grad_norm": 0.07310586422681808,
|
| 32393 |
+
"learning_rate": 0.0009791227592045235,
|
| 32394 |
+
"loss": 1.5431,
|
| 32395 |
+
"step": 9212
|
| 32396 |
+
},
|
| 32397 |
+
{
|
| 32398 |
+
"epoch": 0.40967498110355255,
|
| 32399 |
+
"grad_norm": 0.07371335476636887,
|
| 32400 |
+
"learning_rate": 0.0009791126602372996,
|
| 32401 |
+
"loss": 1.5384,
|
| 32402 |
+
"step": 9214
|
| 32403 |
+
},
|
| 32404 |
+
{
|
| 32405 |
+
"epoch": 0.4097639055622249,
|
| 32406 |
+
"grad_norm": 0.0725250244140625,
|
| 32407 |
+
"learning_rate": 0.000979102558880182,
|
| 32408 |
+
"loss": 1.5362,
|
| 32409 |
+
"step": 9216
|
| 32410 |
+
},
|
| 32411 |
+
{
|
| 32412 |
+
"epoch": 0.40985283002089723,
|
| 32413 |
+
"grad_norm": 0.07021638751029968,
|
| 32414 |
+
"learning_rate": 0.0009790924551332215,
|
| 32415 |
+
"loss": 1.5344,
|
| 32416 |
+
"step": 9218
|
| 32417 |
+
},
|
| 32418 |
+
{
|
| 32419 |
+
"epoch": 0.4099417544795696,
|
| 32420 |
+
"grad_norm": 0.06901945173740387,
|
| 32421 |
+
"learning_rate": 0.0009790823489964683,
|
| 32422 |
+
"loss": 1.5343,
|
| 32423 |
+
"step": 9220
|
| 32424 |
+
},
|
| 32425 |
+
{
|
| 32426 |
+
"epoch": 0.41003067893824197,
|
| 32427 |
+
"grad_norm": 0.06901168823242188,
|
| 32428 |
+
"learning_rate": 0.0009790722404699726,
|
| 32429 |
+
"loss": 1.5369,
|
| 32430 |
+
"step": 9222
|
| 32431 |
+
},
|
| 32432 |
+
{
|
| 32433 |
+
"epoch": 0.41011960339691433,
|
| 32434 |
+
"grad_norm": 0.06912058591842651,
|
| 32435 |
+
"learning_rate": 0.0009790621295537852,
|
| 32436 |
+
"loss": 1.5356,
|
| 32437 |
+
"step": 9224
|
| 32438 |
+
},
|
| 32439 |
+
{
|
| 32440 |
+
"epoch": 0.4102085278555867,
|
| 32441 |
+
"grad_norm": 0.07027976959943771,
|
| 32442 |
+
"learning_rate": 0.0009790520162479563,
|
| 32443 |
+
"loss": 1.5343,
|
| 32444 |
+
"step": 9226
|
| 32445 |
+
},
|
| 32446 |
+
{
|
| 32447 |
+
"epoch": 0.410297452314259,
|
| 32448 |
+
"grad_norm": 0.07126928120851517,
|
| 32449 |
+
"learning_rate": 0.0009790419005525366,
|
| 32450 |
+
"loss": 1.5394,
|
| 32451 |
+
"step": 9228
|
| 32452 |
+
},
|
| 32453 |
+
{
|
| 32454 |
+
"epoch": 0.4103863767729314,
|
| 32455 |
+
"grad_norm": 0.0708712786436081,
|
| 32456 |
+
"learning_rate": 0.0009790317824675762,
|
| 32457 |
+
"loss": 1.5438,
|
| 32458 |
+
"step": 9230
|
| 32459 |
+
},
|
| 32460 |
+
{
|
| 32461 |
+
"epoch": 0.41047530123160375,
|
| 32462 |
+
"grad_norm": 0.07011223584413528,
|
| 32463 |
+
"learning_rate": 0.000979021661993126,
|
| 32464 |
+
"loss": 1.5359,
|
| 32465 |
+
"step": 9232
|
| 32466 |
+
},
|
| 32467 |
+
{
|
| 32468 |
+
"epoch": 0.4105642256902761,
|
| 32469 |
+
"grad_norm": 0.07202787697315216,
|
| 32470 |
+
"learning_rate": 0.000979011539129236,
|
| 32471 |
+
"loss": 1.5422,
|
| 32472 |
+
"step": 9234
|
| 32473 |
+
},
|
| 32474 |
+
{
|
| 32475 |
+
"epoch": 0.4106531501489485,
|
| 32476 |
+
"grad_norm": 0.06934972107410431,
|
| 32477 |
+
"learning_rate": 0.0009790014138759571,
|
| 32478 |
+
"loss": 1.5399,
|
| 32479 |
+
"step": 9236
|
| 32480 |
+
},
|
| 32481 |
+
{
|
| 32482 |
+
"epoch": 0.4107420746076208,
|
| 32483 |
+
"grad_norm": 0.07186301797628403,
|
| 32484 |
+
"learning_rate": 0.0009789912862333394,
|
| 32485 |
+
"loss": 1.5426,
|
| 32486 |
+
"step": 9238
|
| 32487 |
+
},
|
| 32488 |
+
{
|
| 32489 |
+
"epoch": 0.41083099906629317,
|
| 32490 |
+
"grad_norm": 0.07302137464284897,
|
| 32491 |
+
"learning_rate": 0.0009789811562014338,
|
| 32492 |
+
"loss": 1.5408,
|
| 32493 |
+
"step": 9240
|
| 32494 |
+
},
|
| 32495 |
+
{
|
| 32496 |
+
"epoch": 0.41091992352496554,
|
| 32497 |
+
"grad_norm": 0.07044612616300583,
|
| 32498 |
+
"learning_rate": 0.0009789710237802908,
|
| 32499 |
+
"loss": 1.545,
|
| 32500 |
+
"step": 9242
|
| 32501 |
+
},
|
| 32502 |
+
{
|
| 32503 |
+
"epoch": 0.4110088479836379,
|
| 32504 |
+
"grad_norm": 0.07422881573438644,
|
| 32505 |
+
"learning_rate": 0.0009789608889699607,
|
| 32506 |
+
"loss": 1.5366,
|
| 32507 |
+
"step": 9244
|
| 32508 |
+
},
|
| 32509 |
+
{
|
| 32510 |
+
"epoch": 0.41109777244231027,
|
| 32511 |
+
"grad_norm": 0.07070881128311157,
|
| 32512 |
+
"learning_rate": 0.0009789507517704943,
|
| 32513 |
+
"loss": 1.5344,
|
| 32514 |
+
"step": 9246
|
| 32515 |
+
},
|
| 32516 |
+
{
|
| 32517 |
+
"epoch": 0.41118669690098264,
|
| 32518 |
+
"grad_norm": 0.07254374772310257,
|
| 32519 |
+
"learning_rate": 0.0009789406121819418,
|
| 32520 |
+
"loss": 1.5384,
|
| 32521 |
+
"step": 9248
|
| 32522 |
+
},
|
| 32523 |
+
{
|
| 32524 |
+
"epoch": 0.41127562135965495,
|
| 32525 |
+
"grad_norm": 0.07279791682958603,
|
| 32526 |
+
"learning_rate": 0.0009789304702043542,
|
| 32527 |
+
"loss": 1.5434,
|
| 32528 |
+
"step": 9250
|
| 32529 |
+
},
|
| 32530 |
+
{
|
| 32531 |
+
"epoch": 0.4113645458183273,
|
| 32532 |
+
"grad_norm": 0.07429590076208115,
|
| 32533 |
+
"learning_rate": 0.000978920325837782,
|
| 32534 |
+
"loss": 1.5449,
|
| 32535 |
+
"step": 9252
|
| 32536 |
+
},
|
| 32537 |
+
{
|
| 32538 |
+
"epoch": 0.4114534702769997,
|
| 32539 |
+
"grad_norm": 0.07568711042404175,
|
| 32540 |
+
"learning_rate": 0.0009789101790822756,
|
| 32541 |
+
"loss": 1.541,
|
| 32542 |
+
"step": 9254
|
| 32543 |
+
},
|
| 32544 |
+
{
|
| 32545 |
+
"epoch": 0.41154239473567206,
|
| 32546 |
+
"grad_norm": 0.07262945920228958,
|
| 32547 |
+
"learning_rate": 0.0009789000299378857,
|
| 32548 |
+
"loss": 1.5372,
|
| 32549 |
+
"step": 9256
|
| 32550 |
+
},
|
| 32551 |
+
{
|
| 32552 |
+
"epoch": 0.4116313191943444,
|
| 32553 |
+
"grad_norm": 0.07350386679172516,
|
| 32554 |
+
"learning_rate": 0.000978889878404663,
|
| 32555 |
+
"loss": 1.5409,
|
| 32556 |
+
"step": 9258
|
| 32557 |
+
},
|
| 32558 |
+
{
|
| 32559 |
+
"epoch": 0.41172024365301674,
|
| 32560 |
+
"grad_norm": 0.0701087936758995,
|
| 32561 |
+
"learning_rate": 0.000978879724482658,
|
| 32562 |
+
"loss": 1.5446,
|
| 32563 |
+
"step": 9260
|
| 32564 |
+
},
|
| 32565 |
+
{
|
| 32566 |
+
"epoch": 0.4118091681116891,
|
| 32567 |
+
"grad_norm": 0.06851620972156525,
|
| 32568 |
+
"learning_rate": 0.0009788695681719217,
|
| 32569 |
+
"loss": 1.5366,
|
| 32570 |
+
"step": 9262
|
| 32571 |
+
},
|
| 32572 |
+
{
|
| 32573 |
+
"epoch": 0.4118980925703615,
|
| 32574 |
+
"grad_norm": 0.06817908585071564,
|
| 32575 |
+
"learning_rate": 0.0009788594094725042,
|
| 32576 |
+
"loss": 1.5356,
|
| 32577 |
+
"step": 9264
|
| 32578 |
+
},
|
| 32579 |
+
{
|
| 32580 |
+
"epoch": 0.41198701702903384,
|
| 32581 |
+
"grad_norm": 0.06984448432922363,
|
| 32582 |
+
"learning_rate": 0.0009788492483844566,
|
| 32583 |
+
"loss": 1.5404,
|
| 32584 |
+
"step": 9266
|
| 32585 |
+
},
|
| 32586 |
+
{
|
| 32587 |
+
"epoch": 0.4120759414877062,
|
| 32588 |
+
"grad_norm": 0.07095382362604141,
|
| 32589 |
+
"learning_rate": 0.0009788390849078295,
|
| 32590 |
+
"loss": 1.5423,
|
| 32591 |
+
"step": 9268
|
| 32592 |
+
},
|
| 32593 |
+
{
|
| 32594 |
+
"epoch": 0.4121648659463786,
|
| 32595 |
+
"grad_norm": 0.06866953521966934,
|
| 32596 |
+
"learning_rate": 0.0009788289190426735,
|
| 32597 |
+
"loss": 1.5337,
|
| 32598 |
+
"step": 9270
|
| 32599 |
+
},
|
| 32600 |
+
{
|
| 32601 |
+
"epoch": 0.4122537904050509,
|
| 32602 |
+
"grad_norm": 0.06914438307285309,
|
| 32603 |
+
"learning_rate": 0.0009788187507890395,
|
| 32604 |
+
"loss": 1.538,
|
| 32605 |
+
"step": 9272
|
| 32606 |
+
},
|
| 32607 |
+
{
|
| 32608 |
+
"epoch": 0.41234271486372326,
|
| 32609 |
+
"grad_norm": 0.06855052709579468,
|
| 32610 |
+
"learning_rate": 0.0009788085801469778,
|
| 32611 |
+
"loss": 1.5351,
|
| 32612 |
+
"step": 9274
|
| 32613 |
+
},
|
| 32614 |
+
{
|
| 32615 |
+
"epoch": 0.4124316393223956,
|
| 32616 |
+
"grad_norm": 0.0681350976228714,
|
| 32617 |
+
"learning_rate": 0.0009787984071165398,
|
| 32618 |
+
"loss": 1.5395,
|
| 32619 |
+
"step": 9276
|
| 32620 |
+
},
|
| 32621 |
+
{
|
| 32622 |
+
"epoch": 0.412520563781068,
|
| 32623 |
+
"grad_norm": 0.07035652548074722,
|
| 32624 |
+
"learning_rate": 0.0009787882316977757,
|
| 32625 |
+
"loss": 1.5417,
|
| 32626 |
+
"step": 9278
|
| 32627 |
+
},
|
| 32628 |
+
{
|
| 32629 |
+
"epoch": 0.41260948823974036,
|
| 32630 |
+
"grad_norm": 0.06886482238769531,
|
| 32631 |
+
"learning_rate": 0.0009787780538907365,
|
| 32632 |
+
"loss": 1.537,
|
| 32633 |
+
"step": 9280
|
| 32634 |
+
},
|
| 32635 |
+
{
|
| 32636 |
+
"epoch": 0.4126984126984127,
|
| 32637 |
+
"grad_norm": 0.07588198781013489,
|
| 32638 |
+
"learning_rate": 0.000978767873695473,
|
| 32639 |
+
"loss": 1.5403,
|
| 32640 |
+
"step": 9282
|
| 32641 |
+
},
|
| 32642 |
+
{
|
| 32643 |
+
"epoch": 0.41278733715708504,
|
| 32644 |
+
"grad_norm": 0.069828562438488,
|
| 32645 |
+
"learning_rate": 0.0009787576911120355,
|
| 32646 |
+
"loss": 1.544,
|
| 32647 |
+
"step": 9284
|
| 32648 |
+
},
|
| 32649 |
+
{
|
| 32650 |
+
"epoch": 0.4128762616157574,
|
| 32651 |
+
"grad_norm": 0.07009178400039673,
|
| 32652 |
+
"learning_rate": 0.0009787475061404753,
|
| 32653 |
+
"loss": 1.5365,
|
| 32654 |
+
"step": 9286
|
| 32655 |
+
},
|
| 32656 |
+
{
|
| 32657 |
+
"epoch": 0.4129651860744298,
|
| 32658 |
+
"grad_norm": 0.06862553209066391,
|
| 32659 |
+
"learning_rate": 0.0009787373187808432,
|
| 32660 |
+
"loss": 1.538,
|
| 32661 |
+
"step": 9288
|
| 32662 |
+
},
|
| 32663 |
+
{
|
| 32664 |
+
"epoch": 0.41305411053310215,
|
| 32665 |
+
"grad_norm": 0.07022769749164581,
|
| 32666 |
+
"learning_rate": 0.00097872712903319,
|
| 32667 |
+
"loss": 1.543,
|
| 32668 |
+
"step": 9290
|
| 32669 |
+
},
|
| 32670 |
+
{
|
| 32671 |
+
"epoch": 0.41314303499177446,
|
| 32672 |
+
"grad_norm": 0.07054140418767929,
|
| 32673 |
+
"learning_rate": 0.000978716936897566,
|
| 32674 |
+
"loss": 1.5383,
|
| 32675 |
+
"step": 9292
|
| 32676 |
+
},
|
| 32677 |
+
{
|
| 32678 |
+
"epoch": 0.41323195945044683,
|
| 32679 |
+
"grad_norm": 0.07116847485303879,
|
| 32680 |
+
"learning_rate": 0.0009787067423740227,
|
| 32681 |
+
"loss": 1.5386,
|
| 32682 |
+
"step": 9294
|
| 32683 |
+
},
|
| 32684 |
+
{
|
| 32685 |
+
"epoch": 0.4133208839091192,
|
| 32686 |
+
"grad_norm": 0.06990710645914078,
|
| 32687 |
+
"learning_rate": 0.0009786965454626107,
|
| 32688 |
+
"loss": 1.5343,
|
| 32689 |
+
"step": 9296
|
| 32690 |
+
},
|
| 32691 |
+
{
|
| 32692 |
+
"epoch": 0.41340980836779156,
|
| 32693 |
+
"grad_norm": 0.06872306019067764,
|
| 32694 |
+
"learning_rate": 0.0009786863461633808,
|
| 32695 |
+
"loss": 1.5424,
|
| 32696 |
+
"step": 9298
|
| 32697 |
+
},
|
| 32698 |
+
{
|
| 32699 |
+
"epoch": 0.41349873282646393,
|
| 32700 |
+
"grad_norm": 0.06872355192899704,
|
| 32701 |
+
"learning_rate": 0.0009786761444763841,
|
| 32702 |
+
"loss": 1.541,
|
| 32703 |
+
"step": 9300
|
| 32704 |
+
},
|
| 32705 |
+
{
|
| 32706 |
+
"epoch": 0.4135876572851363,
|
| 32707 |
+
"grad_norm": 0.07212715595960617,
|
| 32708 |
+
"learning_rate": 0.0009786659404016711,
|
| 32709 |
+
"loss": 1.5376,
|
| 32710 |
+
"step": 9302
|
| 32711 |
+
},
|
| 32712 |
+
{
|
| 32713 |
+
"epoch": 0.4136765817438086,
|
| 32714 |
+
"grad_norm": 0.06772853434085846,
|
| 32715 |
+
"learning_rate": 0.0009786557339392932,
|
| 32716 |
+
"loss": 1.5369,
|
| 32717 |
+
"step": 9304
|
| 32718 |
+
},
|
| 32719 |
+
{
|
| 32720 |
+
"epoch": 0.413765506202481,
|
| 32721 |
+
"grad_norm": 0.06942387670278549,
|
| 32722 |
+
"learning_rate": 0.000978645525089301,
|
| 32723 |
+
"loss": 1.5344,
|
| 32724 |
+
"step": 9306
|
| 32725 |
+
},
|
| 32726 |
+
{
|
| 32727 |
+
"epoch": 0.41385443066115335,
|
| 32728 |
+
"grad_norm": 0.07053601741790771,
|
| 32729 |
+
"learning_rate": 0.0009786353138517454,
|
| 32730 |
+
"loss": 1.5401,
|
| 32731 |
+
"step": 9308
|
| 32732 |
+
},
|
| 32733 |
+
{
|
| 32734 |
+
"epoch": 0.4139433551198257,
|
| 32735 |
+
"grad_norm": 0.07274100184440613,
|
| 32736 |
+
"learning_rate": 0.0009786251002266773,
|
| 32737 |
+
"loss": 1.5324,
|
| 32738 |
+
"step": 9310
|
| 32739 |
+
},
|
| 32740 |
+
{
|
| 32741 |
+
"epoch": 0.4140322795784981,
|
| 32742 |
+
"grad_norm": 0.07127835601568222,
|
| 32743 |
+
"learning_rate": 0.000978614884214148,
|
| 32744 |
+
"loss": 1.5427,
|
| 32745 |
+
"step": 9312
|
| 32746 |
+
},
|
| 32747 |
+
{
|
| 32748 |
+
"epoch": 0.4141212040371704,
|
| 32749 |
+
"grad_norm": 0.0735345184803009,
|
| 32750 |
+
"learning_rate": 0.0009786046658142081,
|
| 32751 |
+
"loss": 1.5359,
|
| 32752 |
+
"step": 9314
|
| 32753 |
+
},
|
| 32754 |
+
{
|
| 32755 |
+
"epoch": 0.41421012849584277,
|
| 32756 |
+
"grad_norm": 0.07226788252592087,
|
| 32757 |
+
"learning_rate": 0.0009785944450269087,
|
| 32758 |
+
"loss": 1.5428,
|
| 32759 |
+
"step": 9316
|
| 32760 |
+
},
|
| 32761 |
+
{
|
| 32762 |
+
"epoch": 0.41429905295451513,
|
| 32763 |
+
"grad_norm": 0.0691119134426117,
|
| 32764 |
+
"learning_rate": 0.0009785842218523006,
|
| 32765 |
+
"loss": 1.5388,
|
| 32766 |
+
"step": 9318
|
| 32767 |
+
},
|
| 32768 |
+
{
|
| 32769 |
+
"epoch": 0.4143879774131875,
|
| 32770 |
+
"grad_norm": 0.06924230605363846,
|
| 32771 |
+
"learning_rate": 0.000978573996290435,
|
| 32772 |
+
"loss": 1.5377,
|
| 32773 |
+
"step": 9320
|
| 32774 |
+
},
|
| 32775 |
+
{
|
| 32776 |
+
"epoch": 0.41447690187185987,
|
| 32777 |
+
"grad_norm": 0.07052425295114517,
|
| 32778 |
+
"learning_rate": 0.000978563768341363,
|
| 32779 |
+
"loss": 1.5378,
|
| 32780 |
+
"step": 9322
|
| 32781 |
+
},
|
| 32782 |
+
{
|
| 32783 |
+
"epoch": 0.41456582633053224,
|
| 32784 |
+
"grad_norm": 0.07418438792228699,
|
| 32785 |
+
"learning_rate": 0.0009785535380051355,
|
| 32786 |
+
"loss": 1.5331,
|
| 32787 |
+
"step": 9324
|
| 32788 |
+
},
|
| 32789 |
+
{
|
| 32790 |
+
"epoch": 0.41465475078920455,
|
| 32791 |
+
"grad_norm": 0.07372714579105377,
|
| 32792 |
+
"learning_rate": 0.0009785433052818034,
|
| 32793 |
+
"loss": 1.5363,
|
| 32794 |
+
"step": 9326
|
| 32795 |
+
},
|
| 32796 |
+
{
|
| 32797 |
+
"epoch": 0.4147436752478769,
|
| 32798 |
+
"grad_norm": 0.07146646827459335,
|
| 32799 |
+
"learning_rate": 0.000978533070171418,
|
| 32800 |
+
"loss": 1.5329,
|
| 32801 |
+
"step": 9328
|
| 32802 |
+
},
|
| 32803 |
+
{
|
| 32804 |
+
"epoch": 0.4148325997065493,
|
| 32805 |
+
"grad_norm": 0.07278136909008026,
|
| 32806 |
+
"learning_rate": 0.00097852283267403,
|
| 32807 |
+
"loss": 1.5412,
|
| 32808 |
+
"step": 9330
|
| 32809 |
+
},
|
| 32810 |
+
{
|
| 32811 |
+
"epoch": 0.41492152416522166,
|
| 32812 |
+
"grad_norm": 0.07172350585460663,
|
| 32813 |
+
"learning_rate": 0.0009785125927896908,
|
| 32814 |
+
"loss": 1.5373,
|
| 32815 |
+
"step": 9332
|
| 32816 |
+
},
|
| 32817 |
+
{
|
| 32818 |
+
"epoch": 0.415010448623894,
|
| 32819 |
+
"grad_norm": 0.07292774319648743,
|
| 32820 |
+
"learning_rate": 0.0009785023505184513,
|
| 32821 |
+
"loss": 1.5368,
|
| 32822 |
+
"step": 9334
|
| 32823 |
+
},
|
| 32824 |
+
{
|
| 32825 |
+
"epoch": 0.41509937308256634,
|
| 32826 |
+
"grad_norm": 0.07089949399232864,
|
| 32827 |
+
"learning_rate": 0.0009784921058603629,
|
| 32828 |
+
"loss": 1.5319,
|
| 32829 |
+
"step": 9336
|
| 32830 |
+
},
|
| 32831 |
+
{
|
| 32832 |
+
"epoch": 0.4151882975412387,
|
| 32833 |
+
"grad_norm": 0.0746246799826622,
|
| 32834 |
+
"learning_rate": 0.0009784818588154762,
|
| 32835 |
+
"loss": 1.541,
|
| 32836 |
+
"step": 9338
|
| 32837 |
+
},
|
| 32838 |
+
{
|
| 32839 |
+
"epoch": 0.4152772219999111,
|
| 32840 |
+
"grad_norm": 0.07343286275863647,
|
| 32841 |
+
"learning_rate": 0.0009784716093838425,
|
| 32842 |
+
"loss": 1.5374,
|
| 32843 |
+
"step": 9340
|
| 32844 |
+
},
|
| 32845 |
+
{
|
| 32846 |
+
"epoch": 0.41536614645858344,
|
| 32847 |
+
"grad_norm": 0.06831394135951996,
|
| 32848 |
+
"learning_rate": 0.0009784613575655131,
|
| 32849 |
+
"loss": 1.532,
|
| 32850 |
+
"step": 9342
|
| 32851 |
+
},
|
| 32852 |
+
{
|
| 32853 |
+
"epoch": 0.4154550709172558,
|
| 32854 |
+
"grad_norm": 0.07356590032577515,
|
| 32855 |
+
"learning_rate": 0.000978451103360539,
|
| 32856 |
+
"loss": 1.5349,
|
| 32857 |
+
"step": 9344
|
| 32858 |
+
},
|
| 32859 |
+
{
|
| 32860 |
+
"epoch": 0.4155439953759281,
|
| 32861 |
+
"grad_norm": 0.07031702995300293,
|
| 32862 |
+
"learning_rate": 0.0009784408467689717,
|
| 32863 |
+
"loss": 1.5351,
|
| 32864 |
+
"step": 9346
|
| 32865 |
+
},
|
| 32866 |
+
{
|
| 32867 |
+
"epoch": 0.4156329198346005,
|
| 32868 |
+
"grad_norm": 0.07025104761123657,
|
| 32869 |
+
"learning_rate": 0.0009784305877908615,
|
| 32870 |
+
"loss": 1.5372,
|
| 32871 |
+
"step": 9348
|
| 32872 |
+
},
|
| 32873 |
+
{
|
| 32874 |
+
"epoch": 0.41572184429327286,
|
| 32875 |
+
"grad_norm": 0.06900499761104584,
|
| 32876 |
+
"learning_rate": 0.0009784203264262604,
|
| 32877 |
+
"loss": 1.5366,
|
| 32878 |
+
"step": 9350
|
| 32879 |
+
},
|
| 32880 |
+
{
|
| 32881 |
+
"epoch": 0.4158107687519452,
|
| 32882 |
+
"grad_norm": 0.06960804015398026,
|
| 32883 |
+
"learning_rate": 0.0009784100626752193,
|
| 32884 |
+
"loss": 1.5422,
|
| 32885 |
+
"step": 9352
|
| 32886 |
+
},
|
| 32887 |
+
{
|
| 32888 |
+
"epoch": 0.4158996932106176,
|
| 32889 |
+
"grad_norm": 0.07161404937505722,
|
| 32890 |
+
"learning_rate": 0.0009783997965377893,
|
| 32891 |
+
"loss": 1.5398,
|
| 32892 |
+
"step": 9354
|
| 32893 |
+
},
|
| 32894 |
+
{
|
| 32895 |
+
"epoch": 0.41598861766928996,
|
| 32896 |
+
"grad_norm": 0.07421243190765381,
|
| 32897 |
+
"learning_rate": 0.000978389528014022,
|
| 32898 |
+
"loss": 1.5351,
|
| 32899 |
+
"step": 9356
|
| 32900 |
+
},
|
| 32901 |
+
{
|
| 32902 |
+
"epoch": 0.4160775421279623,
|
| 32903 |
+
"grad_norm": 0.07687795907258987,
|
| 32904 |
+
"learning_rate": 0.000978379257103968,
|
| 32905 |
+
"loss": 1.5411,
|
| 32906 |
+
"step": 9358
|
| 32907 |
+
},
|
| 32908 |
+
{
|
| 32909 |
+
"epoch": 0.41616646658663464,
|
| 32910 |
+
"grad_norm": 0.07376468926668167,
|
| 32911 |
+
"learning_rate": 0.0009783689838076788,
|
| 32912 |
+
"loss": 1.5342,
|
| 32913 |
+
"step": 9360
|
| 32914 |
+
},
|
| 32915 |
+
{
|
| 32916 |
+
"epoch": 0.416255391045307,
|
| 32917 |
+
"grad_norm": 0.07067352533340454,
|
| 32918 |
+
"learning_rate": 0.000978358708125206,
|
| 32919 |
+
"loss": 1.5381,
|
| 32920 |
+
"step": 9362
|
| 32921 |
+
},
|
| 32922 |
+
{
|
| 32923 |
+
"epoch": 0.4163443155039794,
|
| 32924 |
+
"grad_norm": 0.06940723210573196,
|
| 32925 |
+
"learning_rate": 0.0009783484300566003,
|
| 32926 |
+
"loss": 1.541,
|
| 32927 |
+
"step": 9364
|
| 32928 |
+
},
|
| 32929 |
+
{
|
| 32930 |
+
"epoch": 0.41643323996265175,
|
| 32931 |
+
"grad_norm": 0.06866216659545898,
|
| 32932 |
+
"learning_rate": 0.0009783381496019136,
|
| 32933 |
+
"loss": 1.5412,
|
| 32934 |
+
"step": 9366
|
| 32935 |
+
},
|
| 32936 |
+
{
|
| 32937 |
+
"epoch": 0.41652216442132406,
|
| 32938 |
+
"grad_norm": 0.06930825859308243,
|
| 32939 |
+
"learning_rate": 0.0009783278667611964,
|
| 32940 |
+
"loss": 1.5347,
|
| 32941 |
+
"step": 9368
|
| 32942 |
+
},
|
| 32943 |
+
{
|
| 32944 |
+
"epoch": 0.4166110888799964,
|
| 32945 |
+
"grad_norm": 0.06734399497509003,
|
| 32946 |
+
"learning_rate": 0.0009783175815345005,
|
| 32947 |
+
"loss": 1.537,
|
| 32948 |
+
"step": 9370
|
| 32949 |
+
},
|
| 32950 |
+
{
|
| 32951 |
+
"epoch": 0.4167000133386688,
|
| 32952 |
+
"grad_norm": 0.06903259456157684,
|
| 32953 |
+
"learning_rate": 0.0009783072939218772,
|
| 32954 |
+
"loss": 1.5386,
|
| 32955 |
+
"step": 9372
|
| 32956 |
+
},
|
| 32957 |
+
{
|
| 32958 |
+
"epoch": 0.41678893779734116,
|
| 32959 |
+
"grad_norm": 0.0670986995100975,
|
| 32960 |
+
"learning_rate": 0.0009782970039233776,
|
| 32961 |
+
"loss": 1.5329,
|
| 32962 |
+
"step": 9374
|
| 32963 |
+
},
|
| 32964 |
+
{
|
| 32965 |
+
"epoch": 0.41687786225601353,
|
| 32966 |
+
"grad_norm": 0.07043662667274475,
|
| 32967 |
+
"learning_rate": 0.0009782867115390532,
|
| 32968 |
+
"loss": 1.5364,
|
| 32969 |
+
"step": 9376
|
| 32970 |
+
},
|
| 32971 |
+
{
|
| 32972 |
+
"epoch": 0.4169667867146859,
|
| 32973 |
+
"grad_norm": 0.06715195626020432,
|
| 32974 |
+
"learning_rate": 0.0009782764167689554,
|
| 32975 |
+
"loss": 1.5325,
|
| 32976 |
+
"step": 9378
|
| 32977 |
+
},
|
| 32978 |
+
{
|
| 32979 |
+
"epoch": 0.4170557111733582,
|
| 32980 |
+
"grad_norm": 0.06987708806991577,
|
| 32981 |
+
"learning_rate": 0.0009782661196131354,
|
| 32982 |
+
"loss": 1.5398,
|
| 32983 |
+
"step": 9380
|
| 32984 |
+
},
|
| 32985 |
+
{
|
| 32986 |
+
"epoch": 0.4171446356320306,
|
| 32987 |
+
"grad_norm": 0.06837180256843567,
|
| 32988 |
+
"learning_rate": 0.0009782558200716446,
|
| 32989 |
+
"loss": 1.5339,
|
| 32990 |
+
"step": 9382
|
| 32991 |
+
},
|
| 32992 |
+
{
|
| 32993 |
+
"epoch": 0.41723356009070295,
|
| 32994 |
+
"grad_norm": 0.0699625238776207,
|
| 32995 |
+
"learning_rate": 0.0009782455181445344,
|
| 32996 |
+
"loss": 1.5387,
|
| 32997 |
+
"step": 9384
|
| 32998 |
+
},
|
| 32999 |
+
{
|
| 33000 |
+
"epoch": 0.4173224845493753,
|
| 33001 |
+
"grad_norm": 0.07226444780826569,
|
| 33002 |
+
"learning_rate": 0.0009782352138318561,
|
| 33003 |
+
"loss": 1.5419,
|
| 33004 |
+
"step": 9386
|
| 33005 |
+
},
|
| 33006 |
+
{
|
| 33007 |
+
"epoch": 0.4174114090080477,
|
| 33008 |
+
"grad_norm": 0.06939677894115448,
|
| 33009 |
+
"learning_rate": 0.0009782249071336611,
|
| 33010 |
+
"loss": 1.5394,
|
| 33011 |
+
"step": 9388
|
| 33012 |
+
},
|
| 33013 |
+
{
|
| 33014 |
+
"epoch": 0.41750033346672,
|
| 33015 |
+
"grad_norm": 0.06957529485225677,
|
| 33016 |
+
"learning_rate": 0.000978214598050001,
|
| 33017 |
+
"loss": 1.5342,
|
| 33018 |
+
"step": 9390
|
| 33019 |
+
},
|
| 33020 |
+
{
|
| 33021 |
+
"epoch": 0.41758925792539237,
|
| 33022 |
+
"grad_norm": 0.06821033358573914,
|
| 33023 |
+
"learning_rate": 0.000978204286580927,
|
| 33024 |
+
"loss": 1.535,
|
| 33025 |
+
"step": 9392
|
| 33026 |
+
},
|
| 33027 |
+
{
|
| 33028 |
+
"epoch": 0.41767818238406473,
|
| 33029 |
+
"grad_norm": 0.06692170351743698,
|
| 33030 |
+
"learning_rate": 0.000978193972726491,
|
| 33031 |
+
"loss": 1.5296,
|
| 33032 |
+
"step": 9394
|
| 33033 |
+
},
|
| 33034 |
+
{
|
| 33035 |
+
"epoch": 0.4177671068427371,
|
| 33036 |
+
"grad_norm": 0.06882653385400772,
|
| 33037 |
+
"learning_rate": 0.0009781836564867437,
|
| 33038 |
+
"loss": 1.5383,
|
| 33039 |
+
"step": 9396
|
| 33040 |
+
},
|
| 33041 |
+
{
|
| 33042 |
+
"epoch": 0.41785603130140947,
|
| 33043 |
+
"grad_norm": 0.07427268475294113,
|
| 33044 |
+
"learning_rate": 0.000978173337861737,
|
| 33045 |
+
"loss": 1.5384,
|
| 33046 |
+
"step": 9398
|
| 33047 |
+
},
|
| 33048 |
+
{
|
| 33049 |
+
"epoch": 0.41794495576008184,
|
| 33050 |
+
"grad_norm": 0.07215935736894608,
|
| 33051 |
+
"learning_rate": 0.0009781630168515223,
|
| 33052 |
+
"loss": 1.5401,
|
| 33053 |
+
"step": 9400
|
| 33054 |
+
},
|
| 33055 |
+
{
|
| 33056 |
+
"epoch": 0.41803388021875415,
|
| 33057 |
+
"grad_norm": 0.07148215919733047,
|
| 33058 |
+
"learning_rate": 0.0009781526934561514,
|
| 33059 |
+
"loss": 1.5448,
|
| 33060 |
+
"step": 9402
|
| 33061 |
+
},
|
| 33062 |
+
{
|
| 33063 |
+
"epoch": 0.4181228046774265,
|
| 33064 |
+
"grad_norm": 0.06973208487033844,
|
| 33065 |
+
"learning_rate": 0.0009781423676756753,
|
| 33066 |
+
"loss": 1.5379,
|
| 33067 |
+
"step": 9404
|
| 33068 |
+
},
|
| 33069 |
+
{
|
| 33070 |
+
"epoch": 0.4182117291360989,
|
| 33071 |
+
"grad_norm": 0.07168926298618317,
|
| 33072 |
+
"learning_rate": 0.0009781320395101456,
|
| 33073 |
+
"loss": 1.5365,
|
| 33074 |
+
"step": 9406
|
| 33075 |
+
},
|
| 33076 |
+
{
|
| 33077 |
+
"epoch": 0.41830065359477125,
|
| 33078 |
+
"grad_norm": 0.0705469623208046,
|
| 33079 |
+
"learning_rate": 0.0009781217089596139,
|
| 33080 |
+
"loss": 1.5359,
|
| 33081 |
+
"step": 9408
|
| 33082 |
+
},
|
| 33083 |
+
{
|
| 33084 |
+
"epoch": 0.4183895780534436,
|
| 33085 |
+
"grad_norm": 0.06955131143331528,
|
| 33086 |
+
"learning_rate": 0.000978111376024132,
|
| 33087 |
+
"loss": 1.5371,
|
| 33088 |
+
"step": 9410
|
| 33089 |
+
},
|
| 33090 |
+
{
|
| 33091 |
+
"epoch": 0.41847850251211594,
|
| 33092 |
+
"grad_norm": 0.07158604264259338,
|
| 33093 |
+
"learning_rate": 0.000978101040703751,
|
| 33094 |
+
"loss": 1.54,
|
| 33095 |
+
"step": 9412
|
| 33096 |
+
},
|
| 33097 |
+
{
|
| 33098 |
+
"epoch": 0.4185674269707883,
|
| 33099 |
+
"grad_norm": 0.07020861655473709,
|
| 33100 |
+
"learning_rate": 0.0009780907029985227,
|
| 33101 |
+
"loss": 1.5381,
|
| 33102 |
+
"step": 9414
|
| 33103 |
+
},
|
| 33104 |
+
{
|
| 33105 |
+
"epoch": 0.41865635142946067,
|
| 33106 |
+
"grad_norm": 0.07165636867284775,
|
| 33107 |
+
"learning_rate": 0.0009780803629084987,
|
| 33108 |
+
"loss": 1.5382,
|
| 33109 |
+
"step": 9416
|
| 33110 |
+
},
|
| 33111 |
+
{
|
| 33112 |
+
"epoch": 0.41874527588813304,
|
| 33113 |
+
"grad_norm": 0.07206343859434128,
|
| 33114 |
+
"learning_rate": 0.0009780700204337304,
|
| 33115 |
+
"loss": 1.5348,
|
| 33116 |
+
"step": 9418
|
| 33117 |
+
},
|
| 33118 |
+
{
|
| 33119 |
+
"epoch": 0.4188342003468054,
|
| 33120 |
+
"grad_norm": 0.06763043999671936,
|
| 33121 |
+
"learning_rate": 0.0009780596755742694,
|
| 33122 |
+
"loss": 1.5375,
|
| 33123 |
+
"step": 9420
|
| 33124 |
+
},
|
| 33125 |
+
{
|
| 33126 |
+
"epoch": 0.4189231248054777,
|
| 33127 |
+
"grad_norm": 0.07210266590118408,
|
| 33128 |
+
"learning_rate": 0.0009780493283301675,
|
| 33129 |
+
"loss": 1.5403,
|
| 33130 |
+
"step": 9422
|
| 33131 |
+
},
|
| 33132 |
+
{
|
| 33133 |
+
"epoch": 0.4190120492641501,
|
| 33134 |
+
"grad_norm": 0.07414006441831589,
|
| 33135 |
+
"learning_rate": 0.000978038978701476,
|
| 33136 |
+
"loss": 1.537,
|
| 33137 |
+
"step": 9424
|
| 33138 |
+
},
|
| 33139 |
+
{
|
| 33140 |
+
"epoch": 0.41910097372282246,
|
| 33141 |
+
"grad_norm": 0.07020355015993118,
|
| 33142 |
+
"learning_rate": 0.000978028626688247,
|
| 33143 |
+
"loss": 1.5321,
|
| 33144 |
+
"step": 9426
|
| 33145 |
+
},
|
| 33146 |
+
{
|
| 33147 |
+
"epoch": 0.4191898981814948,
|
| 33148 |
+
"grad_norm": 0.07178431004285812,
|
| 33149 |
+
"learning_rate": 0.0009780182722905317,
|
| 33150 |
+
"loss": 1.5328,
|
| 33151 |
+
"step": 9428
|
| 33152 |
+
},
|
| 33153 |
+
{
|
| 33154 |
+
"epoch": 0.4192788226401672,
|
| 33155 |
+
"grad_norm": 0.07044417411088943,
|
| 33156 |
+
"learning_rate": 0.0009780079155083821,
|
| 33157 |
+
"loss": 1.5356,
|
| 33158 |
+
"step": 9430
|
| 33159 |
+
},
|
| 33160 |
+
{
|
| 33161 |
+
"epoch": 0.41936774709883956,
|
| 33162 |
+
"grad_norm": 0.07170826941728592,
|
| 33163 |
+
"learning_rate": 0.0009779975563418495,
|
| 33164 |
+
"loss": 1.5315,
|
| 33165 |
+
"step": 9432
|
| 33166 |
+
},
|
| 33167 |
+
{
|
| 33168 |
+
"epoch": 0.4194566715575119,
|
| 33169 |
+
"grad_norm": 0.07005254924297333,
|
| 33170 |
+
"learning_rate": 0.0009779871947909857,
|
| 33171 |
+
"loss": 1.5367,
|
| 33172 |
+
"step": 9434
|
| 33173 |
+
},
|
| 33174 |
+
{
|
| 33175 |
+
"epoch": 0.41954559601618424,
|
| 33176 |
+
"grad_norm": 0.06897489726543427,
|
| 33177 |
+
"learning_rate": 0.0009779768308558427,
|
| 33178 |
+
"loss": 1.5291,
|
| 33179 |
+
"step": 9436
|
| 33180 |
+
},
|
| 33181 |
+
{
|
| 33182 |
+
"epoch": 0.4196345204748566,
|
| 33183 |
+
"grad_norm": 0.07095732539892197,
|
| 33184 |
+
"learning_rate": 0.0009779664645364716,
|
| 33185 |
+
"loss": 1.5301,
|
| 33186 |
+
"step": 9438
|
| 33187 |
+
},
|
| 33188 |
+
{
|
| 33189 |
+
"epoch": 0.419723444933529,
|
| 33190 |
+
"grad_norm": 0.07314879447221756,
|
| 33191 |
+
"learning_rate": 0.0009779560958329246,
|
| 33192 |
+
"loss": 1.5323,
|
| 33193 |
+
"step": 9440
|
| 33194 |
+
},
|
| 33195 |
+
{
|
| 33196 |
+
"epoch": 0.41981236939220135,
|
| 33197 |
+
"grad_norm": 0.07073177397251129,
|
| 33198 |
+
"learning_rate": 0.0009779457247452532,
|
| 33199 |
+
"loss": 1.5375,
|
| 33200 |
+
"step": 9442
|
| 33201 |
+
},
|
| 33202 |
+
{
|
| 33203 |
+
"epoch": 0.41990129385087366,
|
| 33204 |
+
"grad_norm": 0.0689346119761467,
|
| 33205 |
+
"learning_rate": 0.0009779353512735093,
|
| 33206 |
+
"loss": 1.5345,
|
| 33207 |
+
"step": 9444
|
| 33208 |
+
},
|
| 33209 |
+
{
|
| 33210 |
+
"epoch": 0.419990218309546,
|
| 33211 |
+
"grad_norm": 0.06825485080480576,
|
| 33212 |
+
"learning_rate": 0.0009779249754177444,
|
| 33213 |
+
"loss": 1.5362,
|
| 33214 |
+
"step": 9446
|
| 33215 |
+
},
|
| 33216 |
+
{
|
| 33217 |
+
"epoch": 0.4200791427682184,
|
| 33218 |
+
"grad_norm": 0.07014141231775284,
|
| 33219 |
+
"learning_rate": 0.0009779145971780103,
|
| 33220 |
+
"loss": 1.5383,
|
| 33221 |
+
"step": 9448
|
| 33222 |
+
},
|
| 33223 |
+
{
|
| 33224 |
+
"epoch": 0.42016806722689076,
|
| 33225 |
+
"grad_norm": 0.06948299705982208,
|
| 33226 |
+
"learning_rate": 0.0009779042165543592,
|
| 33227 |
+
"loss": 1.5405,
|
| 33228 |
+
"step": 9450
|
| 33229 |
+
},
|
| 33230 |
+
{
|
| 33231 |
+
"epoch": 0.42025699168556313,
|
| 33232 |
+
"grad_norm": 0.06821645051240921,
|
| 33233 |
+
"learning_rate": 0.0009778938335468423,
|
| 33234 |
+
"loss": 1.5361,
|
| 33235 |
+
"step": 9452
|
| 33236 |
+
},
|
| 33237 |
+
{
|
| 33238 |
+
"epoch": 0.4203459161442355,
|
| 33239 |
+
"grad_norm": 0.0685497298836708,
|
| 33240 |
+
"learning_rate": 0.0009778834481555118,
|
| 33241 |
+
"loss": 1.536,
|
| 33242 |
+
"step": 9454
|
| 33243 |
+
},
|
| 33244 |
+
{
|
| 33245 |
+
"epoch": 0.4204348406029078,
|
| 33246 |
+
"grad_norm": 0.07217854261398315,
|
| 33247 |
+
"learning_rate": 0.0009778730603804192,
|
| 33248 |
+
"loss": 1.5344,
|
| 33249 |
+
"step": 9456
|
| 33250 |
+
},
|
| 33251 |
+
{
|
| 33252 |
+
"epoch": 0.4205237650615802,
|
| 33253 |
+
"grad_norm": 0.06981514394283295,
|
| 33254 |
+
"learning_rate": 0.0009778626702216164,
|
| 33255 |
+
"loss": 1.5345,
|
| 33256 |
+
"step": 9458
|
| 33257 |
+
},
|
| 33258 |
+
{
|
| 33259 |
+
"epoch": 0.42061268952025255,
|
| 33260 |
+
"grad_norm": 0.07042837888002396,
|
| 33261 |
+
"learning_rate": 0.0009778522776791553,
|
| 33262 |
+
"loss": 1.5362,
|
| 33263 |
+
"step": 9460
|
| 33264 |
+
},
|
| 33265 |
+
{
|
| 33266 |
+
"epoch": 0.4207016139789249,
|
| 33267 |
+
"grad_norm": 0.07101742923259735,
|
| 33268 |
+
"learning_rate": 0.0009778418827530878,
|
| 33269 |
+
"loss": 1.5426,
|
| 33270 |
+
"step": 9462
|
| 33271 |
+
},
|
| 33272 |
+
{
|
| 33273 |
+
"epoch": 0.4207905384375973,
|
| 33274 |
+
"grad_norm": 0.06968886405229568,
|
| 33275 |
+
"learning_rate": 0.0009778314854434656,
|
| 33276 |
+
"loss": 1.5348,
|
| 33277 |
+
"step": 9464
|
| 33278 |
+
},
|
| 33279 |
+
{
|
| 33280 |
+
"epoch": 0.4208794628962696,
|
| 33281 |
+
"grad_norm": 0.06759744137525558,
|
| 33282 |
+
"learning_rate": 0.0009778210857503407,
|
| 33283 |
+
"loss": 1.5297,
|
| 33284 |
+
"step": 9466
|
| 33285 |
+
},
|
| 33286 |
+
{
|
| 33287 |
+
"epoch": 0.42096838735494196,
|
| 33288 |
+
"grad_norm": 0.0696602612733841,
|
| 33289 |
+
"learning_rate": 0.0009778106836737647,
|
| 33290 |
+
"loss": 1.5383,
|
| 33291 |
+
"step": 9468
|
| 33292 |
+
},
|
| 33293 |
+
{
|
| 33294 |
+
"epoch": 0.42105731181361433,
|
| 33295 |
+
"grad_norm": 0.06992914527654648,
|
| 33296 |
+
"learning_rate": 0.00097780027921379,
|
| 33297 |
+
"loss": 1.5319,
|
| 33298 |
+
"step": 9470
|
| 33299 |
+
},
|
| 33300 |
+
{
|
| 33301 |
+
"epoch": 0.4211462362722867,
|
| 33302 |
+
"grad_norm": 0.07088860124349594,
|
| 33303 |
+
"learning_rate": 0.0009777898723704681,
|
| 33304 |
+
"loss": 1.54,
|
| 33305 |
+
"step": 9472
|
| 33306 |
+
},
|
| 33307 |
+
{
|
| 33308 |
+
"epoch": 0.42123516073095907,
|
| 33309 |
+
"grad_norm": 0.0724070593714714,
|
| 33310 |
+
"learning_rate": 0.000977779463143851,
|
| 33311 |
+
"loss": 1.5378,
|
| 33312 |
+
"step": 9474
|
| 33313 |
+
},
|
| 33314 |
+
{
|
| 33315 |
+
"epoch": 0.4213240851896314,
|
| 33316 |
+
"grad_norm": 0.07188228517770767,
|
| 33317 |
+
"learning_rate": 0.0009777690515339905,
|
| 33318 |
+
"loss": 1.5392,
|
| 33319 |
+
"step": 9476
|
| 33320 |
+
},
|
| 33321 |
+
{
|
| 33322 |
+
"epoch": 0.42141300964830375,
|
| 33323 |
+
"grad_norm": 0.06922028958797455,
|
| 33324 |
+
"learning_rate": 0.0009777586375409389,
|
| 33325 |
+
"loss": 1.527,
|
| 33326 |
+
"step": 9478
|
| 33327 |
+
},
|
| 33328 |
+
{
|
| 33329 |
+
"epoch": 0.4215019341069761,
|
| 33330 |
+
"grad_norm": 0.07125406712293625,
|
| 33331 |
+
"learning_rate": 0.0009777482211647476,
|
| 33332 |
+
"loss": 1.5363,
|
| 33333 |
+
"step": 9480
|
| 33334 |
+
},
|
| 33335 |
+
{
|
| 33336 |
+
"epoch": 0.4215908585656485,
|
| 33337 |
+
"grad_norm": 0.06816605478525162,
|
| 33338 |
+
"learning_rate": 0.0009777378024054693,
|
| 33339 |
+
"loss": 1.5298,
|
| 33340 |
+
"step": 9482
|
| 33341 |
+
},
|
| 33342 |
+
{
|
| 33343 |
+
"epoch": 0.42167978302432085,
|
| 33344 |
+
"grad_norm": 0.06775356084108353,
|
| 33345 |
+
"learning_rate": 0.0009777273812631552,
|
| 33346 |
+
"loss": 1.5418,
|
| 33347 |
+
"step": 9484
|
| 33348 |
+
},
|
| 33349 |
+
{
|
| 33350 |
+
"epoch": 0.4217687074829932,
|
| 33351 |
+
"grad_norm": 0.07036928087472916,
|
| 33352 |
+
"learning_rate": 0.0009777169577378578,
|
| 33353 |
+
"loss": 1.537,
|
| 33354 |
+
"step": 9486
|
| 33355 |
+
},
|
| 33356 |
+
{
|
| 33357 |
+
"epoch": 0.42185763194166553,
|
| 33358 |
+
"grad_norm": 0.07150757312774658,
|
| 33359 |
+
"learning_rate": 0.0009777065318296288,
|
| 33360 |
+
"loss": 1.5352,
|
| 33361 |
+
"step": 9488
|
| 33362 |
+
},
|
| 33363 |
+
{
|
| 33364 |
+
"epoch": 0.4219465564003379,
|
| 33365 |
+
"grad_norm": 0.06876358389854431,
|
| 33366 |
+
"learning_rate": 0.0009776961035385203,
|
| 33367 |
+
"loss": 1.5388,
|
| 33368 |
+
"step": 9490
|
| 33369 |
+
},
|
| 33370 |
+
{
|
| 33371 |
+
"epoch": 0.42203548085901027,
|
| 33372 |
+
"grad_norm": 0.07003464549779892,
|
| 33373 |
+
"learning_rate": 0.0009776856728645844,
|
| 33374 |
+
"loss": 1.537,
|
| 33375 |
+
"step": 9492
|
| 33376 |
+
},
|
| 33377 |
+
{
|
| 33378 |
+
"epoch": 0.42212440531768264,
|
| 33379 |
+
"grad_norm": 0.070342518389225,
|
| 33380 |
+
"learning_rate": 0.0009776752398078731,
|
| 33381 |
+
"loss": 1.5322,
|
| 33382 |
+
"step": 9494
|
| 33383 |
+
},
|
| 33384 |
+
{
|
| 33385 |
+
"epoch": 0.422213329776355,
|
| 33386 |
+
"grad_norm": 0.06741419434547424,
|
| 33387 |
+
"learning_rate": 0.0009776648043684384,
|
| 33388 |
+
"loss": 1.5398,
|
| 33389 |
+
"step": 9496
|
| 33390 |
+
},
|
| 33391 |
+
{
|
| 33392 |
+
"epoch": 0.4223022542350273,
|
| 33393 |
+
"grad_norm": 0.07096916437149048,
|
| 33394 |
+
"learning_rate": 0.0009776543665463325,
|
| 33395 |
+
"loss": 1.5313,
|
| 33396 |
+
"step": 9498
|
| 33397 |
+
},
|
| 33398 |
+
{
|
| 33399 |
+
"epoch": 0.4223911786936997,
|
| 33400 |
+
"grad_norm": 0.06838645040988922,
|
| 33401 |
+
"learning_rate": 0.000977643926341607,
|
| 33402 |
+
"loss": 1.5372,
|
| 33403 |
+
"step": 9500
|
| 33404 |
+
},
|
| 33405 |
+
{
|
| 33406 |
+
"epoch": 0.4223911786936997,
|
| 33407 |
+
"eval_loss": 1.5147486925125122,
|
| 33408 |
+
"eval_runtime": 12.692,
|
| 33409 |
+
"eval_samples_per_second": 544.436,
|
| 33410 |
+
"eval_steps_per_second": 68.074,
|
| 33411 |
+
"step": 9500
|
| 33412 |
+
},
|
| 33413 |
+
{
|
| 33414 |
+
"epoch": 0.42248010315237206,
|
| 33415 |
+
"grad_norm": 0.07152796536684036,
|
| 33416 |
+
"learning_rate": 0.0009776334837543147,
|
| 33417 |
+
"loss": 1.5332,
|
| 33418 |
+
"step": 9502
|
| 33419 |
+
},
|
| 33420 |
+
{
|
| 33421 |
+
"epoch": 0.4225690276110444,
|
| 33422 |
+
"grad_norm": 0.0700407326221466,
|
| 33423 |
+
"learning_rate": 0.000977623038784507,
|
| 33424 |
+
"loss": 1.5358,
|
| 33425 |
+
"step": 9504
|
| 33426 |
+
},
|
| 33427 |
+
{
|
| 33428 |
+
"epoch": 0.4226579520697168,
|
| 33429 |
+
"grad_norm": 0.06791914999485016,
|
| 33430 |
+
"learning_rate": 0.0009776125914322364,
|
| 33431 |
+
"loss": 1.5323,
|
| 33432 |
+
"step": 9506
|
| 33433 |
+
},
|
| 33434 |
+
{
|
| 33435 |
+
"epoch": 0.42274687652838916,
|
| 33436 |
+
"grad_norm": 0.07237772643566132,
|
| 33437 |
+
"learning_rate": 0.0009776021416975549,
|
| 33438 |
+
"loss": 1.5339,
|
| 33439 |
+
"step": 9508
|
| 33440 |
+
},
|
| 33441 |
+
{
|
| 33442 |
+
"epoch": 0.42283580098706147,
|
| 33443 |
+
"grad_norm": 0.07078398764133453,
|
| 33444 |
+
"learning_rate": 0.0009775916895805145,
|
| 33445 |
+
"loss": 1.5327,
|
| 33446 |
+
"step": 9510
|
| 33447 |
+
},
|
| 33448 |
+
{
|
| 33449 |
+
"epoch": 0.42292472544573384,
|
| 33450 |
+
"grad_norm": 0.07085014879703522,
|
| 33451 |
+
"learning_rate": 0.0009775812350811678,
|
| 33452 |
+
"loss": 1.5297,
|
| 33453 |
+
"step": 9512
|
| 33454 |
+
},
|
| 33455 |
+
{
|
| 33456 |
+
"epoch": 0.4230136499044062,
|
| 33457 |
+
"grad_norm": 0.07237362116575241,
|
| 33458 |
+
"learning_rate": 0.0009775707781995665,
|
| 33459 |
+
"loss": 1.5296,
|
| 33460 |
+
"step": 9514
|
| 33461 |
+
},
|
| 33462 |
+
{
|
| 33463 |
+
"epoch": 0.4231025743630786,
|
| 33464 |
+
"grad_norm": 0.06783927977085114,
|
| 33465 |
+
"learning_rate": 0.0009775603189357627,
|
| 33466 |
+
"loss": 1.5367,
|
| 33467 |
+
"step": 9516
|
| 33468 |
+
},
|
| 33469 |
+
{
|
| 33470 |
+
"epoch": 0.42319149882175094,
|
| 33471 |
+
"grad_norm": 0.06836774200201035,
|
| 33472 |
+
"learning_rate": 0.0009775498572898089,
|
| 33473 |
+
"loss": 1.5336,
|
| 33474 |
+
"step": 9518
|
| 33475 |
+
},
|
| 33476 |
+
{
|
| 33477 |
+
"epoch": 0.42328042328042326,
|
| 33478 |
+
"grad_norm": 0.0725974515080452,
|
| 33479 |
+
"learning_rate": 0.0009775393932617573,
|
| 33480 |
+
"loss": 1.5319,
|
| 33481 |
+
"step": 9520
|
| 33482 |
+
},
|
| 33483 |
+
{
|
| 33484 |
+
"epoch": 0.4233693477390956,
|
| 33485 |
+
"grad_norm": 0.07181134074926376,
|
| 33486 |
+
"learning_rate": 0.0009775289268516597,
|
| 33487 |
+
"loss": 1.5352,
|
| 33488 |
+
"step": 9522
|
| 33489 |
+
},
|
| 33490 |
+
{
|
| 33491 |
+
"epoch": 0.423458272197768,
|
| 33492 |
+
"grad_norm": 0.07021788507699966,
|
| 33493 |
+
"learning_rate": 0.0009775184580595687,
|
| 33494 |
+
"loss": 1.5393,
|
| 33495 |
+
"step": 9524
|
| 33496 |
+
},
|
| 33497 |
+
{
|
| 33498 |
+
"epoch": 0.42354719665644036,
|
| 33499 |
+
"grad_norm": 0.06836768984794617,
|
| 33500 |
+
"learning_rate": 0.0009775079868855363,
|
| 33501 |
+
"loss": 1.5354,
|
| 33502 |
+
"step": 9526
|
| 33503 |
+
},
|
| 33504 |
+
{
|
| 33505 |
+
"epoch": 0.42363612111511273,
|
| 33506 |
+
"grad_norm": 0.06834634393453598,
|
| 33507 |
+
"learning_rate": 0.0009774975133296148,
|
| 33508 |
+
"loss": 1.5393,
|
| 33509 |
+
"step": 9528
|
| 33510 |
+
},
|
| 33511 |
+
{
|
| 33512 |
+
"epoch": 0.42372504557378504,
|
| 33513 |
+
"grad_norm": 0.0703490823507309,
|
| 33514 |
+
"learning_rate": 0.0009774870373918565,
|
| 33515 |
+
"loss": 1.5319,
|
| 33516 |
+
"step": 9530
|
| 33517 |
+
},
|
| 33518 |
+
{
|
| 33519 |
+
"epoch": 0.4238139700324574,
|
| 33520 |
+
"grad_norm": 0.06841664761304855,
|
| 33521 |
+
"learning_rate": 0.0009774765590723133,
|
| 33522 |
+
"loss": 1.5413,
|
| 33523 |
+
"step": 9532
|
| 33524 |
+
},
|
| 33525 |
+
{
|
| 33526 |
+
"epoch": 0.4239028944911298,
|
| 33527 |
+
"grad_norm": 0.06665299087762833,
|
| 33528 |
+
"learning_rate": 0.0009774660783710381,
|
| 33529 |
+
"loss": 1.5359,
|
| 33530 |
+
"step": 9534
|
| 33531 |
+
},
|
| 33532 |
+
{
|
| 33533 |
+
"epoch": 0.42399181894980215,
|
| 33534 |
+
"grad_norm": 0.06913217157125473,
|
| 33535 |
+
"learning_rate": 0.0009774555952880828,
|
| 33536 |
+
"loss": 1.5409,
|
| 33537 |
+
"step": 9536
|
| 33538 |
+
},
|
| 33539 |
+
{
|
| 33540 |
+
"epoch": 0.4240807434084745,
|
| 33541 |
+
"grad_norm": 0.06786402314901352,
|
| 33542 |
+
"learning_rate": 0.0009774451098234999,
|
| 33543 |
+
"loss": 1.5291,
|
| 33544 |
+
"step": 9538
|
| 33545 |
+
},
|
| 33546 |
+
{
|
| 33547 |
+
"epoch": 0.4241696678671469,
|
| 33548 |
+
"grad_norm": 0.06822808086872101,
|
| 33549 |
+
"learning_rate": 0.000977434621977341,
|
| 33550 |
+
"loss": 1.5398,
|
| 33551 |
+
"step": 9540
|
| 33552 |
+
},
|
| 33553 |
+
{
|
| 33554 |
+
"epoch": 0.4242585923258192,
|
| 33555 |
+
"grad_norm": 0.06818651407957077,
|
| 33556 |
+
"learning_rate": 0.0009774241317496593,
|
| 33557 |
+
"loss": 1.5413,
|
| 33558 |
+
"step": 9542
|
| 33559 |
+
},
|
| 33560 |
+
{
|
| 33561 |
+
"epoch": 0.42434751678449156,
|
| 33562 |
+
"grad_norm": 0.06693682074546814,
|
| 33563 |
+
"learning_rate": 0.000977413639140507,
|
| 33564 |
+
"loss": 1.5302,
|
| 33565 |
+
"step": 9544
|
| 33566 |
+
},
|
| 33567 |
+
{
|
| 33568 |
+
"epoch": 0.42443644124316393,
|
| 33569 |
+
"grad_norm": 0.06880820542573929,
|
| 33570 |
+
"learning_rate": 0.0009774031441499359,
|
| 33571 |
+
"loss": 1.5323,
|
| 33572 |
+
"step": 9546
|
| 33573 |
+
},
|
| 33574 |
+
{
|
| 33575 |
+
"epoch": 0.4245253657018363,
|
| 33576 |
+
"grad_norm": 0.0655490830540657,
|
| 33577 |
+
"learning_rate": 0.0009773926467779987,
|
| 33578 |
+
"loss": 1.5412,
|
| 33579 |
+
"step": 9548
|
| 33580 |
+
},
|
| 33581 |
+
{
|
| 33582 |
+
"epoch": 0.42461429016050867,
|
| 33583 |
+
"grad_norm": 0.06712329387664795,
|
| 33584 |
+
"learning_rate": 0.0009773821470247478,
|
| 33585 |
+
"loss": 1.5346,
|
| 33586 |
+
"step": 9550
|
| 33587 |
+
},
|
| 33588 |
+
{
|
| 33589 |
+
"epoch": 0.424703214619181,
|
| 33590 |
+
"grad_norm": 0.06615650653839111,
|
| 33591 |
+
"learning_rate": 0.0009773716448902355,
|
| 33592 |
+
"loss": 1.5341,
|
| 33593 |
+
"step": 9552
|
| 33594 |
+
},
|
| 33595 |
+
{
|
| 33596 |
+
"epoch": 0.42479213907785335,
|
| 33597 |
+
"grad_norm": 0.06936550885438919,
|
| 33598 |
+
"learning_rate": 0.0009773611403745143,
|
| 33599 |
+
"loss": 1.5341,
|
| 33600 |
+
"step": 9554
|
| 33601 |
+
},
|
| 33602 |
+
{
|
| 33603 |
+
"epoch": 0.4248810635365257,
|
| 33604 |
+
"grad_norm": 0.06751050800085068,
|
| 33605 |
+
"learning_rate": 0.0009773506334776363,
|
| 33606 |
+
"loss": 1.5379,
|
| 33607 |
+
"step": 9556
|
| 33608 |
+
},
|
| 33609 |
+
{
|
| 33610 |
+
"epoch": 0.4249699879951981,
|
| 33611 |
+
"grad_norm": 0.0678553357720375,
|
| 33612 |
+
"learning_rate": 0.0009773401241996542,
|
| 33613 |
+
"loss": 1.5381,
|
| 33614 |
+
"step": 9558
|
| 33615 |
+
},
|
| 33616 |
+
{
|
| 33617 |
+
"epoch": 0.42505891245387045,
|
| 33618 |
+
"grad_norm": 0.06892528384923935,
|
| 33619 |
+
"learning_rate": 0.0009773296125406203,
|
| 33620 |
+
"loss": 1.5373,
|
| 33621 |
+
"step": 9560
|
| 33622 |
+
},
|
| 33623 |
+
{
|
| 33624 |
+
"epoch": 0.4251478369125428,
|
| 33625 |
+
"grad_norm": 0.06836491823196411,
|
| 33626 |
+
"learning_rate": 0.0009773190985005872,
|
| 33627 |
+
"loss": 1.5374,
|
| 33628 |
+
"step": 9562
|
| 33629 |
+
},
|
| 33630 |
+
{
|
| 33631 |
+
"epoch": 0.42523676137121513,
|
| 33632 |
+
"grad_norm": 0.07044228166341782,
|
| 33633 |
+
"learning_rate": 0.000977308582079607,
|
| 33634 |
+
"loss": 1.5313,
|
| 33635 |
+
"step": 9564
|
| 33636 |
+
},
|
| 33637 |
+
{
|
| 33638 |
+
"epoch": 0.4253256858298875,
|
| 33639 |
+
"grad_norm": 0.06915424019098282,
|
| 33640 |
+
"learning_rate": 0.0009772980632777324,
|
| 33641 |
+
"loss": 1.5336,
|
| 33642 |
+
"step": 9566
|
| 33643 |
+
},
|
| 33644 |
+
{
|
| 33645 |
+
"epoch": 0.42541461028855987,
|
| 33646 |
+
"grad_norm": 0.0704522505402565,
|
| 33647 |
+
"learning_rate": 0.0009772875420950159,
|
| 33648 |
+
"loss": 1.5408,
|
| 33649 |
+
"step": 9568
|
| 33650 |
+
},
|
| 33651 |
+
{
|
| 33652 |
+
"epoch": 0.42550353474723224,
|
| 33653 |
+
"grad_norm": 0.06899117678403854,
|
| 33654 |
+
"learning_rate": 0.0009772770185315098,
|
| 33655 |
+
"loss": 1.538,
|
| 33656 |
+
"step": 9570
|
| 33657 |
+
},
|
| 33658 |
+
{
|
| 33659 |
+
"epoch": 0.4255924592059046,
|
| 33660 |
+
"grad_norm": 0.06853268295526505,
|
| 33661 |
+
"learning_rate": 0.000977266492587267,
|
| 33662 |
+
"loss": 1.5374,
|
| 33663 |
+
"step": 9572
|
| 33664 |
+
},
|
| 33665 |
+
{
|
| 33666 |
+
"epoch": 0.4256813836645769,
|
| 33667 |
+
"grad_norm": 0.07276134192943573,
|
| 33668 |
+
"learning_rate": 0.0009772559642623395,
|
| 33669 |
+
"loss": 1.5343,
|
| 33670 |
+
"step": 9574
|
| 33671 |
+
},
|
| 33672 |
+
{
|
| 33673 |
+
"epoch": 0.4257703081232493,
|
| 33674 |
+
"grad_norm": 0.06734218448400497,
|
| 33675 |
+
"learning_rate": 0.00097724543355678,
|
| 33676 |
+
"loss": 1.5361,
|
| 33677 |
+
"step": 9576
|
| 33678 |
+
},
|
| 33679 |
+
{
|
| 33680 |
+
"epoch": 0.42585923258192165,
|
| 33681 |
+
"grad_norm": 0.07109152525663376,
|
| 33682 |
+
"learning_rate": 0.0009772349004706412,
|
| 33683 |
+
"loss": 1.5397,
|
| 33684 |
+
"step": 9578
|
| 33685 |
+
},
|
| 33686 |
+
{
|
| 33687 |
+
"epoch": 0.425948157040594,
|
| 33688 |
+
"grad_norm": 0.06941290199756622,
|
| 33689 |
+
"learning_rate": 0.0009772243650039755,
|
| 33690 |
+
"loss": 1.5369,
|
| 33691 |
+
"step": 9580
|
| 33692 |
+
},
|
| 33693 |
+
{
|
| 33694 |
+
"epoch": 0.4260370814992664,
|
| 33695 |
+
"grad_norm": 0.07232563197612762,
|
| 33696 |
+
"learning_rate": 0.0009772138271568352,
|
| 33697 |
+
"loss": 1.5375,
|
| 33698 |
+
"step": 9582
|
| 33699 |
+
},
|
| 33700 |
+
{
|
| 33701 |
+
"epoch": 0.42612600595793876,
|
| 33702 |
+
"grad_norm": 0.07067085057497025,
|
| 33703 |
+
"learning_rate": 0.0009772032869292734,
|
| 33704 |
+
"loss": 1.5339,
|
| 33705 |
+
"step": 9584
|
| 33706 |
+
},
|
| 33707 |
+
{
|
| 33708 |
+
"epoch": 0.42621493041661107,
|
| 33709 |
+
"grad_norm": 0.06817600876092911,
|
| 33710 |
+
"learning_rate": 0.0009771927443213422,
|
| 33711 |
+
"loss": 1.5303,
|
| 33712 |
+
"step": 9586
|
| 33713 |
+
},
|
| 33714 |
+
{
|
| 33715 |
+
"epoch": 0.42630385487528344,
|
| 33716 |
+
"grad_norm": 0.06987394392490387,
|
| 33717 |
+
"learning_rate": 0.0009771821993330944,
|
| 33718 |
+
"loss": 1.5308,
|
| 33719 |
+
"step": 9588
|
| 33720 |
+
},
|
| 33721 |
+
{
|
| 33722 |
+
"epoch": 0.4263927793339558,
|
| 33723 |
+
"grad_norm": 0.06813080608844757,
|
| 33724 |
+
"learning_rate": 0.0009771716519645826,
|
| 33725 |
+
"loss": 1.5327,
|
| 33726 |
+
"step": 9590
|
| 33727 |
+
},
|
| 33728 |
+
{
|
| 33729 |
+
"epoch": 0.4264817037926282,
|
| 33730 |
+
"grad_norm": 0.06885334104299545,
|
| 33731 |
+
"learning_rate": 0.0009771611022158593,
|
| 33732 |
+
"loss": 1.5369,
|
| 33733 |
+
"step": 9592
|
| 33734 |
+
},
|
| 33735 |
+
{
|
| 33736 |
+
"epoch": 0.42657062825130054,
|
| 33737 |
+
"grad_norm": 0.06869365274906158,
|
| 33738 |
+
"learning_rate": 0.0009771505500869775,
|
| 33739 |
+
"loss": 1.541,
|
| 33740 |
+
"step": 9594
|
| 33741 |
+
},
|
| 33742 |
+
{
|
| 33743 |
+
"epoch": 0.42665955270997286,
|
| 33744 |
+
"grad_norm": 0.06896496564149857,
|
| 33745 |
+
"learning_rate": 0.0009771399955779893,
|
| 33746 |
+
"loss": 1.5353,
|
| 33747 |
+
"step": 9596
|
| 33748 |
+
},
|
| 33749 |
+
{
|
| 33750 |
+
"epoch": 0.4267484771686452,
|
| 33751 |
+
"grad_norm": 0.07151202112436295,
|
| 33752 |
+
"learning_rate": 0.0009771294386889478,
|
| 33753 |
+
"loss": 1.5341,
|
| 33754 |
+
"step": 9598
|
| 33755 |
+
},
|
| 33756 |
+
{
|
| 33757 |
+
"epoch": 0.4268374016273176,
|
| 33758 |
+
"grad_norm": 0.06725707650184631,
|
| 33759 |
+
"learning_rate": 0.0009771188794199053,
|
| 33760 |
+
"loss": 1.538,
|
| 33761 |
+
"step": 9600
|
| 33762 |
+
},
|
| 33763 |
+
{
|
| 33764 |
+
"epoch": 0.42692632608598996,
|
| 33765 |
+
"grad_norm": 0.07007326185703278,
|
| 33766 |
+
"learning_rate": 0.0009771083177709146,
|
| 33767 |
+
"loss": 1.5396,
|
| 33768 |
+
"step": 9602
|
| 33769 |
+
},
|
| 33770 |
+
{
|
| 33771 |
+
"epoch": 0.42701525054466233,
|
| 33772 |
+
"grad_norm": 0.06781750172376633,
|
| 33773 |
+
"learning_rate": 0.0009770977537420288,
|
| 33774 |
+
"loss": 1.5337,
|
| 33775 |
+
"step": 9604
|
| 33776 |
+
},
|
| 33777 |
+
{
|
| 33778 |
+
"epoch": 0.42710417500333464,
|
| 33779 |
+
"grad_norm": 0.07008969038724899,
|
| 33780 |
+
"learning_rate": 0.0009770871873332997,
|
| 33781 |
+
"loss": 1.5296,
|
| 33782 |
+
"step": 9606
|
| 33783 |
+
},
|
| 33784 |
+
{
|
| 33785 |
+
"epoch": 0.427193099462007,
|
| 33786 |
+
"grad_norm": 0.07271461933851242,
|
| 33787 |
+
"learning_rate": 0.0009770766185447808,
|
| 33788 |
+
"loss": 1.5362,
|
| 33789 |
+
"step": 9608
|
| 33790 |
+
},
|
| 33791 |
+
{
|
| 33792 |
+
"epoch": 0.4272820239206794,
|
| 33793 |
+
"grad_norm": 0.07218462973833084,
|
| 33794 |
+
"learning_rate": 0.0009770660473765245,
|
| 33795 |
+
"loss": 1.5313,
|
| 33796 |
+
"step": 9610
|
| 33797 |
+
},
|
| 33798 |
+
{
|
| 33799 |
+
"epoch": 0.42737094837935174,
|
| 33800 |
+
"grad_norm": 0.0696045383810997,
|
| 33801 |
+
"learning_rate": 0.0009770554738285835,
|
| 33802 |
+
"loss": 1.535,
|
| 33803 |
+
"step": 9612
|
| 33804 |
+
},
|
| 33805 |
+
{
|
| 33806 |
+
"epoch": 0.4274598728380241,
|
| 33807 |
+
"grad_norm": 0.0716555267572403,
|
| 33808 |
+
"learning_rate": 0.0009770448979010105,
|
| 33809 |
+
"loss": 1.5347,
|
| 33810 |
+
"step": 9614
|
| 33811 |
+
},
|
| 33812 |
+
{
|
| 33813 |
+
"epoch": 0.4275487972966965,
|
| 33814 |
+
"grad_norm": 0.06784503161907196,
|
| 33815 |
+
"learning_rate": 0.0009770343195938586,
|
| 33816 |
+
"loss": 1.5328,
|
| 33817 |
+
"step": 9616
|
| 33818 |
+
},
|
| 33819 |
+
{
|
| 33820 |
+
"epoch": 0.4276377217553688,
|
| 33821 |
+
"grad_norm": 0.0706615075469017,
|
| 33822 |
+
"learning_rate": 0.0009770237389071803,
|
| 33823 |
+
"loss": 1.5365,
|
| 33824 |
+
"step": 9618
|
| 33825 |
+
},
|
| 33826 |
+
{
|
| 33827 |
+
"epoch": 0.42772664621404116,
|
| 33828 |
+
"grad_norm": 0.07164894044399261,
|
| 33829 |
+
"learning_rate": 0.0009770131558410283,
|
| 33830 |
+
"loss": 1.5398,
|
| 33831 |
+
"step": 9620
|
| 33832 |
+
},
|
| 33833 |
+
{
|
| 33834 |
+
"epoch": 0.42781557067271353,
|
| 33835 |
+
"grad_norm": 0.07363253831863403,
|
| 33836 |
+
"learning_rate": 0.0009770025703954555,
|
| 33837 |
+
"loss": 1.5344,
|
| 33838 |
+
"step": 9622
|
| 33839 |
+
},
|
| 33840 |
+
{
|
| 33841 |
+
"epoch": 0.4279044951313859,
|
| 33842 |
+
"grad_norm": 0.07175491750240326,
|
| 33843 |
+
"learning_rate": 0.0009769919825705147,
|
| 33844 |
+
"loss": 1.5304,
|
| 33845 |
+
"step": 9624
|
| 33846 |
+
},
|
| 33847 |
+
{
|
| 33848 |
+
"epoch": 0.42799341959005827,
|
| 33849 |
+
"grad_norm": 0.07303240895271301,
|
| 33850 |
+
"learning_rate": 0.0009769813923662589,
|
| 33851 |
+
"loss": 1.5402,
|
| 33852 |
+
"step": 9626
|
| 33853 |
+
},
|
| 33854 |
+
{
|
| 33855 |
+
"epoch": 0.4280823440487306,
|
| 33856 |
+
"grad_norm": 0.06884127110242844,
|
| 33857 |
+
"learning_rate": 0.0009769707997827404,
|
| 33858 |
+
"loss": 1.5313,
|
| 33859 |
+
"step": 9628
|
| 33860 |
+
},
|
| 33861 |
+
{
|
| 33862 |
+
"epoch": 0.42817126850740295,
|
| 33863 |
+
"grad_norm": 0.06890305131673813,
|
| 33864 |
+
"learning_rate": 0.0009769602048200128,
|
| 33865 |
+
"loss": 1.5369,
|
| 33866 |
+
"step": 9630
|
| 33867 |
+
},
|
| 33868 |
+
{
|
| 33869 |
+
"epoch": 0.4282601929660753,
|
| 33870 |
+
"grad_norm": 0.07173550128936768,
|
| 33871 |
+
"learning_rate": 0.0009769496074781283,
|
| 33872 |
+
"loss": 1.5283,
|
| 33873 |
+
"step": 9632
|
| 33874 |
+
},
|
| 33875 |
+
{
|
| 33876 |
+
"epoch": 0.4283491174247477,
|
| 33877 |
+
"grad_norm": 0.068869449198246,
|
| 33878 |
+
"learning_rate": 0.0009769390077571398,
|
| 33879 |
+
"loss": 1.5363,
|
| 33880 |
+
"step": 9634
|
| 33881 |
+
},
|
| 33882 |
+
{
|
| 33883 |
+
"epoch": 0.42843804188342005,
|
| 33884 |
+
"grad_norm": 0.0685199499130249,
|
| 33885 |
+
"learning_rate": 0.0009769284056571005,
|
| 33886 |
+
"loss": 1.5358,
|
| 33887 |
+
"step": 9636
|
| 33888 |
+
},
|
| 33889 |
+
{
|
| 33890 |
+
"epoch": 0.4285269663420924,
|
| 33891 |
+
"grad_norm": 0.06906317174434662,
|
| 33892 |
+
"learning_rate": 0.0009769178011780632,
|
| 33893 |
+
"loss": 1.5338,
|
| 33894 |
+
"step": 9638
|
| 33895 |
+
},
|
| 33896 |
+
{
|
| 33897 |
+
"epoch": 0.42861589080076473,
|
| 33898 |
+
"grad_norm": 0.06904531270265579,
|
| 33899 |
+
"learning_rate": 0.0009769071943200808,
|
| 33900 |
+
"loss": 1.5367,
|
| 33901 |
+
"step": 9640
|
| 33902 |
+
},
|
| 33903 |
+
{
|
| 33904 |
+
"epoch": 0.4287048152594371,
|
| 33905 |
+
"grad_norm": 0.0690319761633873,
|
| 33906 |
+
"learning_rate": 0.0009768965850832062,
|
| 33907 |
+
"loss": 1.5332,
|
| 33908 |
+
"step": 9642
|
| 33909 |
+
},
|
| 33910 |
+
{
|
| 33911 |
+
"epoch": 0.42879373971810947,
|
| 33912 |
+
"grad_norm": 0.07049769908189774,
|
| 33913 |
+
"learning_rate": 0.0009768859734674922,
|
| 33914 |
+
"loss": 1.5366,
|
| 33915 |
+
"step": 9644
|
| 33916 |
+
},
|
| 33917 |
+
{
|
| 33918 |
+
"epoch": 0.42888266417678184,
|
| 33919 |
+
"grad_norm": 0.07084295898675919,
|
| 33920 |
+
"learning_rate": 0.0009768753594729918,
|
| 33921 |
+
"loss": 1.536,
|
| 33922 |
+
"step": 9646
|
| 33923 |
+
},
|
| 33924 |
+
{
|
| 33925 |
+
"epoch": 0.4289715886354542,
|
| 33926 |
+
"grad_norm": 0.06893621385097504,
|
| 33927 |
+
"learning_rate": 0.0009768647430997578,
|
| 33928 |
+
"loss": 1.5285,
|
| 33929 |
+
"step": 9648
|
| 33930 |
+
},
|
| 33931 |
+
{
|
| 33932 |
+
"epoch": 0.4290605130941265,
|
| 33933 |
+
"grad_norm": 0.07064125686883926,
|
| 33934 |
+
"learning_rate": 0.0009768541243478435,
|
| 33935 |
+
"loss": 1.53,
|
| 33936 |
+
"step": 9650
|
| 33937 |
+
},
|
| 33938 |
+
{
|
| 33939 |
+
"epoch": 0.4291494375527989,
|
| 33940 |
+
"grad_norm": 0.07157714664936066,
|
| 33941 |
+
"learning_rate": 0.0009768435032173016,
|
| 33942 |
+
"loss": 1.5316,
|
| 33943 |
+
"step": 9652
|
| 33944 |
+
},
|
| 33945 |
+
{
|
| 33946 |
+
"epoch": 0.42923836201147125,
|
| 33947 |
+
"grad_norm": 0.07000470906496048,
|
| 33948 |
+
"learning_rate": 0.0009768328797081852,
|
| 33949 |
+
"loss": 1.5395,
|
| 33950 |
+
"step": 9654
|
| 33951 |
+
},
|
| 33952 |
+
{
|
| 33953 |
+
"epoch": 0.4293272864701436,
|
| 33954 |
+
"grad_norm": 0.06912733614444733,
|
| 33955 |
+
"learning_rate": 0.000976822253820547,
|
| 33956 |
+
"loss": 1.5349,
|
| 33957 |
+
"step": 9656
|
| 33958 |
+
},
|
| 33959 |
+
{
|
| 33960 |
+
"epoch": 0.429416210928816,
|
| 33961 |
+
"grad_norm": 0.06659382581710815,
|
| 33962 |
+
"learning_rate": 0.0009768116255544407,
|
| 33963 |
+
"loss": 1.5309,
|
| 33964 |
+
"step": 9658
|
| 33965 |
+
},
|
| 33966 |
+
{
|
| 33967 |
+
"epoch": 0.4295051353874883,
|
| 33968 |
+
"grad_norm": 0.06706222891807556,
|
| 33969 |
+
"learning_rate": 0.0009768009949099184,
|
| 33970 |
+
"loss": 1.5337,
|
| 33971 |
+
"step": 9660
|
| 33972 |
+
},
|
| 33973 |
+
{
|
| 33974 |
+
"epoch": 0.42959405984616067,
|
| 33975 |
+
"grad_norm": 0.07012798637151718,
|
| 33976 |
+
"learning_rate": 0.0009767903618870337,
|
| 33977 |
+
"loss": 1.5429,
|
| 33978 |
+
"step": 9662
|
| 33979 |
+
},
|
| 33980 |
+
{
|
| 33981 |
+
"epoch": 0.42968298430483304,
|
| 33982 |
+
"grad_norm": 0.06715415418148041,
|
| 33983 |
+
"learning_rate": 0.0009767797264858397,
|
| 33984 |
+
"loss": 1.5405,
|
| 33985 |
+
"step": 9664
|
| 33986 |
+
},
|
| 33987 |
+
{
|
| 33988 |
+
"epoch": 0.4297719087635054,
|
| 33989 |
+
"grad_norm": 0.0690038651227951,
|
| 33990 |
+
"learning_rate": 0.0009767690887063894,
|
| 33991 |
+
"loss": 1.5364,
|
| 33992 |
+
"step": 9666
|
| 33993 |
+
},
|
| 33994 |
+
{
|
| 33995 |
+
"epoch": 0.4298608332221778,
|
| 33996 |
+
"grad_norm": 0.06869196146726608,
|
| 33997 |
+
"learning_rate": 0.0009767584485487356,
|
| 33998 |
+
"loss": 1.5336,
|
| 33999 |
+
"step": 9668
|
| 34000 |
+
},
|
| 34001 |
+
{
|
| 34002 |
+
"epoch": 0.42994975768085014,
|
| 34003 |
+
"grad_norm": 0.07014153897762299,
|
| 34004 |
+
"learning_rate": 0.0009767478060129313,
|
| 34005 |
+
"loss": 1.5349,
|
| 34006 |
+
"step": 9670
|
| 34007 |
+
},
|
| 34008 |
+
{
|
| 34009 |
+
"epoch": 0.43003868213952245,
|
| 34010 |
+
"grad_norm": 0.06871045380830765,
|
| 34011 |
+
"learning_rate": 0.00097673716109903,
|
| 34012 |
+
"loss": 1.5348,
|
| 34013 |
+
"step": 9672
|
| 34014 |
+
},
|
| 34015 |
+
{
|
| 34016 |
+
"epoch": 0.4301276065981948,
|
| 34017 |
+
"grad_norm": 0.06930448859930038,
|
| 34018 |
+
"learning_rate": 0.0009767265138070846,
|
| 34019 |
+
"loss": 1.5296,
|
| 34020 |
+
"step": 9674
|
| 34021 |
+
},
|
| 34022 |
+
{
|
| 34023 |
+
"epoch": 0.4302165310568672,
|
| 34024 |
+
"grad_norm": 0.07054897397756577,
|
| 34025 |
+
"learning_rate": 0.0009767158641371483,
|
| 34026 |
+
"loss": 1.5402,
|
| 34027 |
+
"step": 9676
|
| 34028 |
+
},
|
| 34029 |
+
{
|
| 34030 |
+
"epoch": 0.43030545551553956,
|
| 34031 |
+
"grad_norm": 0.07136155664920807,
|
| 34032 |
+
"learning_rate": 0.0009767052120892741,
|
| 34033 |
+
"loss": 1.5364,
|
| 34034 |
+
"step": 9678
|
| 34035 |
+
},
|
| 34036 |
+
{
|
| 34037 |
+
"epoch": 0.4303943799742119,
|
| 34038 |
+
"grad_norm": 0.06936744600534439,
|
| 34039 |
+
"learning_rate": 0.0009766945576635151,
|
| 34040 |
+
"loss": 1.5287,
|
| 34041 |
+
"step": 9680
|
| 34042 |
+
},
|
| 34043 |
+
{
|
| 34044 |
+
"epoch": 0.43048330443288424,
|
| 34045 |
+
"grad_norm": 0.07328307628631592,
|
| 34046 |
+
"learning_rate": 0.0009766839008599245,
|
| 34047 |
+
"loss": 1.5266,
|
| 34048 |
+
"step": 9682
|
| 34049 |
+
},
|
| 34050 |
+
{
|
| 34051 |
+
"epoch": 0.4305722288915566,
|
| 34052 |
+
"grad_norm": 0.06779733300209045,
|
| 34053 |
+
"learning_rate": 0.0009766732416785556,
|
| 34054 |
+
"loss": 1.5374,
|
| 34055 |
+
"step": 9684
|
| 34056 |
+
},
|
| 34057 |
+
{
|
| 34058 |
+
"epoch": 0.430661153350229,
|
| 34059 |
+
"grad_norm": 0.07060229033231735,
|
| 34060 |
+
"learning_rate": 0.0009766625801194613,
|
| 34061 |
+
"loss": 1.5293,
|
| 34062 |
+
"step": 9686
|
| 34063 |
+
},
|
| 34064 |
+
{
|
| 34065 |
+
"epoch": 0.43075007780890134,
|
| 34066 |
+
"grad_norm": 0.06920021772384644,
|
| 34067 |
+
"learning_rate": 0.000976651916182695,
|
| 34068 |
+
"loss": 1.5357,
|
| 34069 |
+
"step": 9688
|
| 34070 |
+
},
|
| 34071 |
+
{
|
| 34072 |
+
"epoch": 0.4308390022675737,
|
| 34073 |
+
"grad_norm": 0.0714344009757042,
|
| 34074 |
+
"learning_rate": 0.0009766412498683097,
|
| 34075 |
+
"loss": 1.536,
|
| 34076 |
+
"step": 9690
|
| 34077 |
+
},
|
| 34078 |
+
{
|
| 34079 |
+
"epoch": 0.4309279267262461,
|
| 34080 |
+
"grad_norm": 0.06918825954198837,
|
| 34081 |
+
"learning_rate": 0.000976630581176359,
|
| 34082 |
+
"loss": 1.5312,
|
| 34083 |
+
"step": 9692
|
| 34084 |
+
},
|
| 34085 |
+
{
|
| 34086 |
+
"epoch": 0.4310168511849184,
|
| 34087 |
+
"grad_norm": 0.07354065775871277,
|
| 34088 |
+
"learning_rate": 0.0009766199101068956,
|
| 34089 |
+
"loss": 1.5333,
|
| 34090 |
+
"step": 9694
|
| 34091 |
+
},
|
| 34092 |
+
{
|
| 34093 |
+
"epoch": 0.43110577564359076,
|
| 34094 |
+
"grad_norm": 0.07036007940769196,
|
| 34095 |
+
"learning_rate": 0.0009766092366599731,
|
| 34096 |
+
"loss": 1.5393,
|
| 34097 |
+
"step": 9696
|
| 34098 |
+
},
|
| 34099 |
+
{
|
| 34100 |
+
"epoch": 0.43119470010226313,
|
| 34101 |
+
"grad_norm": 0.07394345849752426,
|
| 34102 |
+
"learning_rate": 0.0009765985608356446,
|
| 34103 |
+
"loss": 1.5397,
|
| 34104 |
+
"step": 9698
|
| 34105 |
+
},
|
| 34106 |
+
{
|
| 34107 |
+
"epoch": 0.4312836245609355,
|
| 34108 |
+
"grad_norm": 0.07076235115528107,
|
| 34109 |
+
"learning_rate": 0.0009765878826339634,
|
| 34110 |
+
"loss": 1.5343,
|
| 34111 |
+
"step": 9700
|
| 34112 |
+
},
|
| 34113 |
+
{
|
| 34114 |
+
"epoch": 0.43137254901960786,
|
| 34115 |
+
"grad_norm": 0.07146693021059036,
|
| 34116 |
+
"learning_rate": 0.0009765772020549827,
|
| 34117 |
+
"loss": 1.5359,
|
| 34118 |
+
"step": 9702
|
| 34119 |
+
},
|
| 34120 |
+
{
|
| 34121 |
+
"epoch": 0.4314614734782802,
|
| 34122 |
+
"grad_norm": 0.06926552951335907,
|
| 34123 |
+
"learning_rate": 0.0009765665190987558,
|
| 34124 |
+
"loss": 1.5294,
|
| 34125 |
+
"step": 9704
|
| 34126 |
+
},
|
| 34127 |
+
{
|
| 34128 |
+
"epoch": 0.43155039793695255,
|
| 34129 |
+
"grad_norm": 0.07245678454637527,
|
| 34130 |
+
"learning_rate": 0.000976555833765336,
|
| 34131 |
+
"loss": 1.5303,
|
| 34132 |
+
"step": 9706
|
| 34133 |
+
},
|
| 34134 |
+
{
|
| 34135 |
+
"epoch": 0.4316393223956249,
|
| 34136 |
+
"grad_norm": 0.06824632734060287,
|
| 34137 |
+
"learning_rate": 0.0009765451460547766,
|
| 34138 |
+
"loss": 1.5339,
|
| 34139 |
+
"step": 9708
|
| 34140 |
+
},
|
| 34141 |
+
{
|
| 34142 |
+
"epoch": 0.4317282468542973,
|
| 34143 |
+
"grad_norm": 0.06656771898269653,
|
| 34144 |
+
"learning_rate": 0.0009765344559671307,
|
| 34145 |
+
"loss": 1.5326,
|
| 34146 |
+
"step": 9710
|
| 34147 |
+
},
|
| 34148 |
+
{
|
| 34149 |
+
"epoch": 0.43181717131296965,
|
| 34150 |
+
"grad_norm": 0.0666504055261612,
|
| 34151 |
+
"learning_rate": 0.0009765237635024522,
|
| 34152 |
+
"loss": 1.5307,
|
| 34153 |
+
"step": 9712
|
| 34154 |
+
},
|
| 34155 |
+
{
|
| 34156 |
+
"epoch": 0.43190609577164196,
|
| 34157 |
+
"grad_norm": 0.06956163048744202,
|
| 34158 |
+
"learning_rate": 0.0009765130686607938,
|
| 34159 |
+
"loss": 1.5284,
|
| 34160 |
+
"step": 9714
|
| 34161 |
+
},
|
| 34162 |
+
{
|
| 34163 |
+
"epoch": 0.43199502023031433,
|
| 34164 |
+
"grad_norm": 0.06977500021457672,
|
| 34165 |
+
"learning_rate": 0.0009765023714422092,
|
| 34166 |
+
"loss": 1.5358,
|
| 34167 |
+
"step": 9716
|
| 34168 |
+
},
|
| 34169 |
+
{
|
| 34170 |
+
"epoch": 0.4320839446889867,
|
| 34171 |
+
"grad_norm": 0.06824065744876862,
|
| 34172 |
+
"learning_rate": 0.0009764916718467517,
|
| 34173 |
+
"loss": 1.5359,
|
| 34174 |
+
"step": 9718
|
| 34175 |
+
},
|
| 34176 |
+
{
|
| 34177 |
+
"epoch": 0.43217286914765907,
|
| 34178 |
+
"grad_norm": 0.06885942071676254,
|
| 34179 |
+
"learning_rate": 0.0009764809698744746,
|
| 34180 |
+
"loss": 1.5375,
|
| 34181 |
+
"step": 9720
|
| 34182 |
+
},
|
| 34183 |
+
{
|
| 34184 |
+
"epoch": 0.43226179360633143,
|
| 34185 |
+
"grad_norm": 0.06814990937709808,
|
| 34186 |
+
"learning_rate": 0.0009764702655254314,
|
| 34187 |
+
"loss": 1.5334,
|
| 34188 |
+
"step": 9722
|
| 34189 |
+
},
|
| 34190 |
+
{
|
| 34191 |
+
"epoch": 0.4323507180650038,
|
| 34192 |
+
"grad_norm": 0.07028649002313614,
|
| 34193 |
+
"learning_rate": 0.0009764595587996754,
|
| 34194 |
+
"loss": 1.5322,
|
| 34195 |
+
"step": 9724
|
| 34196 |
+
},
|
| 34197 |
+
{
|
| 34198 |
+
"epoch": 0.4324396425236761,
|
| 34199 |
+
"grad_norm": 0.06551536917686462,
|
| 34200 |
+
"learning_rate": 0.00097644884969726,
|
| 34201 |
+
"loss": 1.5346,
|
| 34202 |
+
"step": 9726
|
| 34203 |
+
},
|
| 34204 |
+
{
|
| 34205 |
+
"epoch": 0.4325285669823485,
|
| 34206 |
+
"grad_norm": 0.06867454200983047,
|
| 34207 |
+
"learning_rate": 0.0009764381382182387,
|
| 34208 |
+
"loss": 1.5386,
|
| 34209 |
+
"step": 9728
|
| 34210 |
+
},
|
| 34211 |
+
{
|
| 34212 |
+
"epoch": 0.43261749144102085,
|
| 34213 |
+
"grad_norm": 0.065100759267807,
|
| 34214 |
+
"learning_rate": 0.0009764274243626649,
|
| 34215 |
+
"loss": 1.5314,
|
| 34216 |
+
"step": 9730
|
| 34217 |
+
},
|
| 34218 |
+
{
|
| 34219 |
+
"epoch": 0.4327064158996932,
|
| 34220 |
+
"grad_norm": 0.0707404837012291,
|
| 34221 |
+
"learning_rate": 0.000976416708130592,
|
| 34222 |
+
"loss": 1.5359,
|
| 34223 |
+
"step": 9732
|
| 34224 |
+
},
|
| 34225 |
+
{
|
| 34226 |
+
"epoch": 0.4327953403583656,
|
| 34227 |
+
"grad_norm": 0.0689748004078865,
|
| 34228 |
+
"learning_rate": 0.0009764059895220734,
|
| 34229 |
+
"loss": 1.5353,
|
| 34230 |
+
"step": 9734
|
| 34231 |
+
},
|
| 34232 |
+
{
|
| 34233 |
+
"epoch": 0.4328842648170379,
|
| 34234 |
+
"grad_norm": 0.06782509386539459,
|
| 34235 |
+
"learning_rate": 0.0009763952685371627,
|
| 34236 |
+
"loss": 1.5366,
|
| 34237 |
+
"step": 9736
|
| 34238 |
+
},
|
| 34239 |
+
{
|
| 34240 |
+
"epoch": 0.43297318927571027,
|
| 34241 |
+
"grad_norm": 0.07140477746725082,
|
| 34242 |
+
"learning_rate": 0.0009763845451759133,
|
| 34243 |
+
"loss": 1.5278,
|
| 34244 |
+
"step": 9738
|
| 34245 |
+
},
|
| 34246 |
+
{
|
| 34247 |
+
"epoch": 0.43306211373438264,
|
| 34248 |
+
"grad_norm": 0.06992009282112122,
|
| 34249 |
+
"learning_rate": 0.0009763738194383787,
|
| 34250 |
+
"loss": 1.533,
|
| 34251 |
+
"step": 9740
|
| 34252 |
+
},
|
| 34253 |
+
{
|
| 34254 |
+
"epoch": 0.433151038193055,
|
| 34255 |
+
"grad_norm": 0.07231522351503372,
|
| 34256 |
+
"learning_rate": 0.0009763630913246124,
|
| 34257 |
+
"loss": 1.5334,
|
| 34258 |
+
"step": 9742
|
| 34259 |
+
},
|
| 34260 |
+
{
|
| 34261 |
+
"epoch": 0.4332399626517274,
|
| 34262 |
+
"grad_norm": 0.07116207480430603,
|
| 34263 |
+
"learning_rate": 0.000976352360834668,
|
| 34264 |
+
"loss": 1.5301,
|
| 34265 |
+
"step": 9744
|
| 34266 |
+
},
|
| 34267 |
+
{
|
| 34268 |
+
"epoch": 0.43332888711039974,
|
| 34269 |
+
"grad_norm": 0.06921789795160294,
|
| 34270 |
+
"learning_rate": 0.000976341627968599,
|
| 34271 |
+
"loss": 1.5333,
|
| 34272 |
+
"step": 9746
|
| 34273 |
+
},
|
| 34274 |
+
{
|
| 34275 |
+
"epoch": 0.43341781156907205,
|
| 34276 |
+
"grad_norm": 0.06795500218868256,
|
| 34277 |
+
"learning_rate": 0.0009763308927264588,
|
| 34278 |
+
"loss": 1.5253,
|
| 34279 |
+
"step": 9748
|
| 34280 |
+
},
|
| 34281 |
+
{
|
| 34282 |
+
"epoch": 0.4335067360277444,
|
| 34283 |
+
"grad_norm": 0.06932450830936432,
|
| 34284 |
+
"learning_rate": 0.0009763201551083011,
|
| 34285 |
+
"loss": 1.5293,
|
| 34286 |
+
"step": 9750
|
| 34287 |
+
},
|
| 34288 |
+
{
|
| 34289 |
+
"epoch": 0.4335956604864168,
|
| 34290 |
+
"grad_norm": 0.07011115550994873,
|
| 34291 |
+
"learning_rate": 0.0009763094151141793,
|
| 34292 |
+
"loss": 1.5288,
|
| 34293 |
+
"step": 9752
|
| 34294 |
+
},
|
| 34295 |
+
{
|
| 34296 |
+
"epoch": 0.43368458494508916,
|
| 34297 |
+
"grad_norm": 0.06874189525842667,
|
| 34298 |
+
"learning_rate": 0.0009762986727441472,
|
| 34299 |
+
"loss": 1.5296,
|
| 34300 |
+
"step": 9754
|
| 34301 |
+
},
|
| 34302 |
+
{
|
| 34303 |
+
"epoch": 0.4337735094037615,
|
| 34304 |
+
"grad_norm": 0.06724654138088226,
|
| 34305 |
+
"learning_rate": 0.0009762879279982582,
|
| 34306 |
+
"loss": 1.5354,
|
| 34307 |
+
"step": 9756
|
| 34308 |
+
},
|
| 34309 |
+
{
|
| 34310 |
+
"epoch": 0.43386243386243384,
|
| 34311 |
+
"grad_norm": 0.06827713549137115,
|
| 34312 |
+
"learning_rate": 0.0009762771808765659,
|
| 34313 |
+
"loss": 1.5306,
|
| 34314 |
+
"step": 9758
|
| 34315 |
+
},
|
| 34316 |
+
{
|
| 34317 |
+
"epoch": 0.4339513583211062,
|
| 34318 |
+
"grad_norm": 0.06977761536836624,
|
| 34319 |
+
"learning_rate": 0.0009762664313791241,
|
| 34320 |
+
"loss": 1.5323,
|
| 34321 |
+
"step": 9760
|
| 34322 |
+
},
|
| 34323 |
+
{
|
| 34324 |
+
"epoch": 0.4340402827797786,
|
| 34325 |
+
"grad_norm": 0.0717058926820755,
|
| 34326 |
+
"learning_rate": 0.0009762556795059862,
|
| 34327 |
+
"loss": 1.5324,
|
| 34328 |
+
"step": 9762
|
| 34329 |
+
},
|
| 34330 |
+
{
|
| 34331 |
+
"epoch": 0.43412920723845094,
|
| 34332 |
+
"grad_norm": 0.06921355426311493,
|
| 34333 |
+
"learning_rate": 0.0009762449252572058,
|
| 34334 |
+
"loss": 1.5318,
|
| 34335 |
+
"step": 9764
|
| 34336 |
+
},
|
| 34337 |
+
{
|
| 34338 |
+
"epoch": 0.4342181316971233,
|
| 34339 |
+
"grad_norm": 0.0721471756696701,
|
| 34340 |
+
"learning_rate": 0.0009762341686328368,
|
| 34341 |
+
"loss": 1.5354,
|
| 34342 |
+
"step": 9766
|
| 34343 |
+
},
|
| 34344 |
+
{
|
| 34345 |
+
"epoch": 0.4343070561557957,
|
| 34346 |
+
"grad_norm": 0.06916612386703491,
|
| 34347 |
+
"learning_rate": 0.0009762234096329327,
|
| 34348 |
+
"loss": 1.531,
|
| 34349 |
+
"step": 9768
|
| 34350 |
+
},
|
| 34351 |
+
{
|
| 34352 |
+
"epoch": 0.434395980614468,
|
| 34353 |
+
"grad_norm": 0.07326947152614594,
|
| 34354 |
+
"learning_rate": 0.0009762126482575473,
|
| 34355 |
+
"loss": 1.5348,
|
| 34356 |
+
"step": 9770
|
| 34357 |
+
},
|
| 34358 |
+
{
|
| 34359 |
+
"epoch": 0.43448490507314036,
|
| 34360 |
+
"grad_norm": 0.07012344151735306,
|
| 34361 |
+
"learning_rate": 0.000976201884506734,
|
| 34362 |
+
"loss": 1.5335,
|
| 34363 |
+
"step": 9772
|
| 34364 |
+
},
|
| 34365 |
+
{
|
| 34366 |
+
"epoch": 0.4345738295318127,
|
| 34367 |
+
"grad_norm": 0.07134021073579788,
|
| 34368 |
+
"learning_rate": 0.0009761911183805466,
|
| 34369 |
+
"loss": 1.534,
|
| 34370 |
+
"step": 9774
|
| 34371 |
+
},
|
| 34372 |
+
{
|
| 34373 |
+
"epoch": 0.4346627539904851,
|
| 34374 |
+
"grad_norm": 0.06965865939855576,
|
| 34375 |
+
"learning_rate": 0.0009761803498790389,
|
| 34376 |
+
"loss": 1.5304,
|
| 34377 |
+
"step": 9776
|
| 34378 |
+
},
|
| 34379 |
+
{
|
| 34380 |
+
"epoch": 0.43475167844915746,
|
| 34381 |
+
"grad_norm": 0.06988412886857986,
|
| 34382 |
+
"learning_rate": 0.0009761695790022647,
|
| 34383 |
+
"loss": 1.5289,
|
| 34384 |
+
"step": 9778
|
| 34385 |
+
},
|
| 34386 |
+
{
|
| 34387 |
+
"epoch": 0.4348406029078298,
|
| 34388 |
+
"grad_norm": 0.0701071098446846,
|
| 34389 |
+
"learning_rate": 0.0009761588057502775,
|
| 34390 |
+
"loss": 1.5378,
|
| 34391 |
+
"step": 9780
|
| 34392 |
+
},
|
| 34393 |
+
{
|
| 34394 |
+
"epoch": 0.43492952736650214,
|
| 34395 |
+
"grad_norm": 0.07131971418857574,
|
| 34396 |
+
"learning_rate": 0.0009761480301231311,
|
| 34397 |
+
"loss": 1.5371,
|
| 34398 |
+
"step": 9782
|
| 34399 |
+
},
|
| 34400 |
+
{
|
| 34401 |
+
"epoch": 0.4350184518251745,
|
| 34402 |
+
"grad_norm": 0.06863526254892349,
|
| 34403 |
+
"learning_rate": 0.0009761372521208795,
|
| 34404 |
+
"loss": 1.5313,
|
| 34405 |
+
"step": 9784
|
| 34406 |
+
},
|
| 34407 |
+
{
|
| 34408 |
+
"epoch": 0.4351073762838469,
|
| 34409 |
+
"grad_norm": 0.06877754628658295,
|
| 34410 |
+
"learning_rate": 0.0009761264717435761,
|
| 34411 |
+
"loss": 1.5346,
|
| 34412 |
+
"step": 9786
|
| 34413 |
+
},
|
| 34414 |
+
{
|
| 34415 |
+
"epoch": 0.43519630074251925,
|
| 34416 |
+
"grad_norm": 0.06825356185436249,
|
| 34417 |
+
"learning_rate": 0.0009761156889912748,
|
| 34418 |
+
"loss": 1.5348,
|
| 34419 |
+
"step": 9788
|
| 34420 |
+
},
|
| 34421 |
+
{
|
| 34422 |
+
"epoch": 0.43528522520119156,
|
| 34423 |
+
"grad_norm": 0.06907788664102554,
|
| 34424 |
+
"learning_rate": 0.0009761049038640295,
|
| 34425 |
+
"loss": 1.5285,
|
| 34426 |
+
"step": 9790
|
| 34427 |
+
},
|
| 34428 |
+
{
|
| 34429 |
+
"epoch": 0.43537414965986393,
|
| 34430 |
+
"grad_norm": 0.06589856743812561,
|
| 34431 |
+
"learning_rate": 0.0009760941163618937,
|
| 34432 |
+
"loss": 1.5279,
|
| 34433 |
+
"step": 9792
|
| 34434 |
+
},
|
| 34435 |
+
{
|
| 34436 |
+
"epoch": 0.4354630741185363,
|
| 34437 |
+
"grad_norm": 0.06974221765995026,
|
| 34438 |
+
"learning_rate": 0.0009760833264849215,
|
| 34439 |
+
"loss": 1.5368,
|
| 34440 |
+
"step": 9794
|
| 34441 |
+
},
|
| 34442 |
+
{
|
| 34443 |
+
"epoch": 0.43555199857720867,
|
| 34444 |
+
"grad_norm": 0.06818997859954834,
|
| 34445 |
+
"learning_rate": 0.0009760725342331668,
|
| 34446 |
+
"loss": 1.5287,
|
| 34447 |
+
"step": 9796
|
| 34448 |
+
},
|
| 34449 |
+
{
|
| 34450 |
+
"epoch": 0.43564092303588103,
|
| 34451 |
+
"grad_norm": 0.0677684098482132,
|
| 34452 |
+
"learning_rate": 0.0009760617396066834,
|
| 34453 |
+
"loss": 1.5346,
|
| 34454 |
+
"step": 9798
|
| 34455 |
+
},
|
| 34456 |
+
{
|
| 34457 |
+
"epoch": 0.4357298474945534,
|
| 34458 |
+
"grad_norm": 0.06733290106058121,
|
| 34459 |
+
"learning_rate": 0.0009760509426055247,
|
| 34460 |
+
"loss": 1.5316,
|
| 34461 |
+
"step": 9800
|
| 34462 |
+
},
|
| 34463 |
+
{
|
| 34464 |
+
"epoch": 0.4358187719532257,
|
| 34465 |
+
"grad_norm": 0.06740870326757431,
|
| 34466 |
+
"learning_rate": 0.000976040143229745,
|
| 34467 |
+
"loss": 1.5319,
|
| 34468 |
+
"step": 9802
|
| 34469 |
+
},
|
| 34470 |
+
{
|
| 34471 |
+
"epoch": 0.4359076964118981,
|
| 34472 |
+
"grad_norm": 0.06795347481966019,
|
| 34473 |
+
"learning_rate": 0.0009760293414793979,
|
| 34474 |
+
"loss": 1.5353,
|
| 34475 |
+
"step": 9804
|
| 34476 |
+
},
|
| 34477 |
+
{
|
| 34478 |
+
"epoch": 0.43599662087057045,
|
| 34479 |
+
"grad_norm": 0.0665903314948082,
|
| 34480 |
+
"learning_rate": 0.0009760185373545376,
|
| 34481 |
+
"loss": 1.5337,
|
| 34482 |
+
"step": 9806
|
| 34483 |
+
},
|
| 34484 |
+
{
|
| 34485 |
+
"epoch": 0.4360855453292428,
|
| 34486 |
+
"grad_norm": 0.06611596792936325,
|
| 34487 |
+
"learning_rate": 0.0009760077308552178,
|
| 34488 |
+
"loss": 1.5304,
|
| 34489 |
+
"step": 9808
|
| 34490 |
+
},
|
| 34491 |
+
{
|
| 34492 |
+
"epoch": 0.4361744697879152,
|
| 34493 |
+
"grad_norm": 0.06896202266216278,
|
| 34494 |
+
"learning_rate": 0.0009759969219814924,
|
| 34495 |
+
"loss": 1.53,
|
| 34496 |
+
"step": 9810
|
| 34497 |
+
},
|
| 34498 |
+
{
|
| 34499 |
+
"epoch": 0.4362633942465875,
|
| 34500 |
+
"grad_norm": 0.06539107859134674,
|
| 34501 |
+
"learning_rate": 0.0009759861107334154,
|
| 34502 |
+
"loss": 1.5292,
|
| 34503 |
+
"step": 9812
|
| 34504 |
+
},
|
| 34505 |
+
{
|
| 34506 |
+
"epoch": 0.43635231870525987,
|
| 34507 |
+
"grad_norm": 0.07421990483999252,
|
| 34508 |
+
"learning_rate": 0.0009759752971110407,
|
| 34509 |
+
"loss": 1.5396,
|
| 34510 |
+
"step": 9814
|
| 34511 |
+
},
|
| 34512 |
+
{
|
| 34513 |
+
"epoch": 0.43644124316393224,
|
| 34514 |
+
"grad_norm": 0.06868709623813629,
|
| 34515 |
+
"learning_rate": 0.000975964481114422,
|
| 34516 |
+
"loss": 1.5387,
|
| 34517 |
+
"step": 9816
|
| 34518 |
+
},
|
| 34519 |
+
{
|
| 34520 |
+
"epoch": 0.4365301676226046,
|
| 34521 |
+
"grad_norm": 0.07227539271116257,
|
| 34522 |
+
"learning_rate": 0.0009759536627436137,
|
| 34523 |
+
"loss": 1.54,
|
| 34524 |
+
"step": 9818
|
| 34525 |
+
},
|
| 34526 |
+
{
|
| 34527 |
+
"epoch": 0.43661909208127697,
|
| 34528 |
+
"grad_norm": 0.06839174777269363,
|
| 34529 |
+
"learning_rate": 0.0009759428419986693,
|
| 34530 |
+
"loss": 1.5323,
|
| 34531 |
+
"step": 9820
|
| 34532 |
+
},
|
| 34533 |
+
{
|
| 34534 |
+
"epoch": 0.43670801653994934,
|
| 34535 |
+
"grad_norm": 0.07100740075111389,
|
| 34536 |
+
"learning_rate": 0.0009759320188796432,
|
| 34537 |
+
"loss": 1.5304,
|
| 34538 |
+
"step": 9822
|
| 34539 |
+
},
|
| 34540 |
+
{
|
| 34541 |
+
"epoch": 0.43679694099862165,
|
| 34542 |
+
"grad_norm": 0.07037629187107086,
|
| 34543 |
+
"learning_rate": 0.000975921193386589,
|
| 34544 |
+
"loss": 1.5327,
|
| 34545 |
+
"step": 9824
|
| 34546 |
+
},
|
| 34547 |
+
{
|
| 34548 |
+
"epoch": 0.436885865457294,
|
| 34549 |
+
"grad_norm": 0.07322507351636887,
|
| 34550 |
+
"learning_rate": 0.000975910365519561,
|
| 34551 |
+
"loss": 1.5359,
|
| 34552 |
+
"step": 9826
|
| 34553 |
+
},
|
| 34554 |
+
{
|
| 34555 |
+
"epoch": 0.4369747899159664,
|
| 34556 |
+
"grad_norm": 0.06714385747909546,
|
| 34557 |
+
"learning_rate": 0.0009758995352786131,
|
| 34558 |
+
"loss": 1.5287,
|
| 34559 |
+
"step": 9828
|
| 34560 |
+
},
|
| 34561 |
+
{
|
| 34562 |
+
"epoch": 0.43706371437463876,
|
| 34563 |
+
"grad_norm": 0.06822887063026428,
|
| 34564 |
+
"learning_rate": 0.0009758887026637994,
|
| 34565 |
+
"loss": 1.5342,
|
| 34566 |
+
"step": 9830
|
| 34567 |
+
},
|
| 34568 |
+
{
|
| 34569 |
+
"epoch": 0.4371526388333111,
|
| 34570 |
+
"grad_norm": 0.0696975588798523,
|
| 34571 |
+
"learning_rate": 0.0009758778676751737,
|
| 34572 |
+
"loss": 1.5314,
|
| 34573 |
+
"step": 9832
|
| 34574 |
+
},
|
| 34575 |
+
{
|
| 34576 |
+
"epoch": 0.43724156329198344,
|
| 34577 |
+
"grad_norm": 0.07337722182273865,
|
| 34578 |
+
"learning_rate": 0.0009758670303127903,
|
| 34579 |
+
"loss": 1.5383,
|
| 34580 |
+
"step": 9834
|
| 34581 |
+
},
|
| 34582 |
+
{
|
| 34583 |
+
"epoch": 0.4373304877506558,
|
| 34584 |
+
"grad_norm": 0.07148087024688721,
|
| 34585 |
+
"learning_rate": 0.0009758561905767032,
|
| 34586 |
+
"loss": 1.5315,
|
| 34587 |
+
"step": 9836
|
| 34588 |
+
},
|
| 34589 |
+
{
|
| 34590 |
+
"epoch": 0.4374194122093282,
|
| 34591 |
+
"grad_norm": 0.06952288746833801,
|
| 34592 |
+
"learning_rate": 0.0009758453484669663,
|
| 34593 |
+
"loss": 1.5327,
|
| 34594 |
+
"step": 9838
|
| 34595 |
+
},
|
| 34596 |
+
{
|
| 34597 |
+
"epoch": 0.43750833666800054,
|
| 34598 |
+
"grad_norm": 0.07030414789915085,
|
| 34599 |
+
"learning_rate": 0.000975834503983634,
|
| 34600 |
+
"loss": 1.5458,
|
| 34601 |
+
"step": 9840
|
| 34602 |
+
},
|
| 34603 |
+
{
|
| 34604 |
+
"epoch": 0.4375972611266729,
|
| 34605 |
+
"grad_norm": 0.06890011578798294,
|
| 34606 |
+
"learning_rate": 0.00097582365712676,
|
| 34607 |
+
"loss": 1.5377,
|
| 34608 |
+
"step": 9842
|
| 34609 |
+
},
|
| 34610 |
+
{
|
| 34611 |
+
"epoch": 0.4376861855853452,
|
| 34612 |
+
"grad_norm": 0.06955260038375854,
|
| 34613 |
+
"learning_rate": 0.0009758128078963988,
|
| 34614 |
+
"loss": 1.5374,
|
| 34615 |
+
"step": 9844
|
| 34616 |
+
},
|
| 34617 |
+
{
|
| 34618 |
+
"epoch": 0.4377751100440176,
|
| 34619 |
+
"grad_norm": 0.0680069625377655,
|
| 34620 |
+
"learning_rate": 0.0009758019562926041,
|
| 34621 |
+
"loss": 1.5341,
|
| 34622 |
+
"step": 9846
|
| 34623 |
+
},
|
| 34624 |
+
{
|
| 34625 |
+
"epoch": 0.43786403450268996,
|
| 34626 |
+
"grad_norm": 0.06949877738952637,
|
| 34627 |
+
"learning_rate": 0.0009757911023154305,
|
| 34628 |
+
"loss": 1.5361,
|
| 34629 |
+
"step": 9848
|
| 34630 |
+
},
|
| 34631 |
+
{
|
| 34632 |
+
"epoch": 0.4379529589613623,
|
| 34633 |
+
"grad_norm": 0.0744439959526062,
|
| 34634 |
+
"learning_rate": 0.0009757802459649319,
|
| 34635 |
+
"loss": 1.5328,
|
| 34636 |
+
"step": 9850
|
| 34637 |
+
},
|
| 34638 |
+
{
|
| 34639 |
+
"epoch": 0.4380418834200347,
|
| 34640 |
+
"grad_norm": 0.07167305797338486,
|
| 34641 |
+
"learning_rate": 0.0009757693872411622,
|
| 34642 |
+
"loss": 1.5318,
|
| 34643 |
+
"step": 9852
|
| 34644 |
+
},
|
| 34645 |
+
{
|
| 34646 |
+
"epoch": 0.43813080787870706,
|
| 34647 |
+
"grad_norm": 0.07135527580976486,
|
| 34648 |
+
"learning_rate": 0.0009757585261441762,
|
| 34649 |
+
"loss": 1.5315,
|
| 34650 |
+
"step": 9854
|
| 34651 |
+
},
|
| 34652 |
+
{
|
| 34653 |
+
"epoch": 0.4382197323373794,
|
| 34654 |
+
"grad_norm": 0.0703761875629425,
|
| 34655 |
+
"learning_rate": 0.0009757476626740274,
|
| 34656 |
+
"loss": 1.5339,
|
| 34657 |
+
"step": 9856
|
| 34658 |
+
},
|
| 34659 |
+
{
|
| 34660 |
+
"epoch": 0.43830865679605174,
|
| 34661 |
+
"grad_norm": 0.06967216730117798,
|
| 34662 |
+
"learning_rate": 0.0009757367968307705,
|
| 34663 |
+
"loss": 1.5276,
|
| 34664 |
+
"step": 9858
|
| 34665 |
+
},
|
| 34666 |
+
{
|
| 34667 |
+
"epoch": 0.4383975812547241,
|
| 34668 |
+
"grad_norm": 0.06713514029979706,
|
| 34669 |
+
"learning_rate": 0.0009757259286144593,
|
| 34670 |
+
"loss": 1.5282,
|
| 34671 |
+
"step": 9860
|
| 34672 |
+
},
|
| 34673 |
+
{
|
| 34674 |
+
"epoch": 0.4384865057133965,
|
| 34675 |
+
"grad_norm": 0.07148092240095139,
|
| 34676 |
+
"learning_rate": 0.0009757150580251481,
|
| 34677 |
+
"loss": 1.5397,
|
| 34678 |
+
"step": 9862
|
| 34679 |
+
},
|
| 34680 |
+
{
|
| 34681 |
+
"epoch": 0.43857543017206885,
|
| 34682 |
+
"grad_norm": 0.07046201080083847,
|
| 34683 |
+
"learning_rate": 0.0009757041850628915,
|
| 34684 |
+
"loss": 1.5359,
|
| 34685 |
+
"step": 9864
|
| 34686 |
+
},
|
| 34687 |
+
{
|
| 34688 |
+
"epoch": 0.43866435463074116,
|
| 34689 |
+
"grad_norm": 0.06848791241645813,
|
| 34690 |
+
"learning_rate": 0.0009756933097277434,
|
| 34691 |
+
"loss": 1.5323,
|
| 34692 |
+
"step": 9866
|
| 34693 |
+
},
|
| 34694 |
+
{
|
| 34695 |
+
"epoch": 0.43875327908941353,
|
| 34696 |
+
"grad_norm": 0.06691039353609085,
|
| 34697 |
+
"learning_rate": 0.000975682432019758,
|
| 34698 |
+
"loss": 1.5337,
|
| 34699 |
+
"step": 9868
|
| 34700 |
+
},
|
| 34701 |
+
{
|
| 34702 |
+
"epoch": 0.4388422035480859,
|
| 34703 |
+
"grad_norm": 0.06878488510847092,
|
| 34704 |
+
"learning_rate": 0.0009756715519389899,
|
| 34705 |
+
"loss": 1.5321,
|
| 34706 |
+
"step": 9870
|
| 34707 |
+
},
|
| 34708 |
+
{
|
| 34709 |
+
"epoch": 0.43893112800675826,
|
| 34710 |
+
"grad_norm": 0.06844276189804077,
|
| 34711 |
+
"learning_rate": 0.0009756606694854928,
|
| 34712 |
+
"loss": 1.5288,
|
| 34713 |
+
"step": 9872
|
| 34714 |
+
},
|
| 34715 |
+
{
|
| 34716 |
+
"epoch": 0.43902005246543063,
|
| 34717 |
+
"grad_norm": 0.07109802216291428,
|
| 34718 |
+
"learning_rate": 0.0009756497846593215,
|
| 34719 |
+
"loss": 1.5309,
|
| 34720 |
+
"step": 9874
|
| 34721 |
+
},
|
| 34722 |
+
{
|
| 34723 |
+
"epoch": 0.439108976924103,
|
| 34724 |
+
"grad_norm": 0.06382738053798676,
|
| 34725 |
+
"learning_rate": 0.0009756388974605302,
|
| 34726 |
+
"loss": 1.5289,
|
| 34727 |
+
"step": 9876
|
| 34728 |
+
},
|
| 34729 |
+
{
|
| 34730 |
+
"epoch": 0.4391979013827753,
|
| 34731 |
+
"grad_norm": 0.06882934272289276,
|
| 34732 |
+
"learning_rate": 0.000975628007889173,
|
| 34733 |
+
"loss": 1.5332,
|
| 34734 |
+
"step": 9878
|
| 34735 |
+
},
|
| 34736 |
+
{
|
| 34737 |
+
"epoch": 0.4392868258414477,
|
| 34738 |
+
"grad_norm": 0.06806602329015732,
|
| 34739 |
+
"learning_rate": 0.0009756171159453045,
|
| 34740 |
+
"loss": 1.5313,
|
| 34741 |
+
"step": 9880
|
| 34742 |
+
},
|
| 34743 |
+
{
|
| 34744 |
+
"epoch": 0.43937575030012005,
|
| 34745 |
+
"grad_norm": 0.06839878112077713,
|
| 34746 |
+
"learning_rate": 0.0009756062216289787,
|
| 34747 |
+
"loss": 1.5379,
|
| 34748 |
+
"step": 9882
|
| 34749 |
+
},
|
| 34750 |
+
{
|
| 34751 |
+
"epoch": 0.4394646747587924,
|
| 34752 |
+
"grad_norm": 0.0689515769481659,
|
| 34753 |
+
"learning_rate": 0.0009755953249402503,
|
| 34754 |
+
"loss": 1.5344,
|
| 34755 |
+
"step": 9884
|
| 34756 |
+
},
|
| 34757 |
+
{
|
| 34758 |
+
"epoch": 0.4395535992174648,
|
| 34759 |
+
"grad_norm": 0.06822851300239563,
|
| 34760 |
+
"learning_rate": 0.0009755844258791733,
|
| 34761 |
+
"loss": 1.5353,
|
| 34762 |
+
"step": 9886
|
| 34763 |
+
},
|
| 34764 |
+
{
|
| 34765 |
+
"epoch": 0.4396425236761371,
|
| 34766 |
+
"grad_norm": 0.06764476001262665,
|
| 34767 |
+
"learning_rate": 0.0009755735244458024,
|
| 34768 |
+
"loss": 1.5303,
|
| 34769 |
+
"step": 9888
|
| 34770 |
+
},
|
| 34771 |
+
{
|
| 34772 |
+
"epoch": 0.43973144813480947,
|
| 34773 |
+
"grad_norm": 0.06747211515903473,
|
| 34774 |
+
"learning_rate": 0.0009755626206401917,
|
| 34775 |
+
"loss": 1.5273,
|
| 34776 |
+
"step": 9890
|
| 34777 |
+
},
|
| 34778 |
+
{
|
| 34779 |
+
"epoch": 0.43982037259348183,
|
| 34780 |
+
"grad_norm": 0.06862806528806686,
|
| 34781 |
+
"learning_rate": 0.0009755517144623958,
|
| 34782 |
+
"loss": 1.5317,
|
| 34783 |
+
"step": 9892
|
| 34784 |
+
},
|
| 34785 |
+
{
|
| 34786 |
+
"epoch": 0.4399092970521542,
|
| 34787 |
+
"grad_norm": 0.06655488908290863,
|
| 34788 |
+
"learning_rate": 0.0009755408059124688,
|
| 34789 |
+
"loss": 1.5291,
|
| 34790 |
+
"step": 9894
|
| 34791 |
+
},
|
| 34792 |
+
{
|
| 34793 |
+
"epoch": 0.43999822151082657,
|
| 34794 |
+
"grad_norm": 0.06658562272787094,
|
| 34795 |
+
"learning_rate": 0.0009755298949904655,
|
| 34796 |
+
"loss": 1.5282,
|
| 34797 |
+
"step": 9896
|
| 34798 |
+
},
|
| 34799 |
+
{
|
| 34800 |
+
"epoch": 0.4400871459694989,
|
| 34801 |
+
"grad_norm": 0.06817793101072311,
|
| 34802 |
+
"learning_rate": 0.0009755189816964402,
|
| 34803 |
+
"loss": 1.5301,
|
| 34804 |
+
"step": 9898
|
| 34805 |
+
},
|
| 34806 |
+
{
|
| 34807 |
+
"epoch": 0.44017607042817125,
|
| 34808 |
+
"grad_norm": 0.07138610631227493,
|
| 34809 |
+
"learning_rate": 0.0009755080660304472,
|
| 34810 |
+
"loss": 1.5329,
|
| 34811 |
+
"step": 9900
|
| 34812 |
+
},
|
| 34813 |
+
{
|
| 34814 |
+
"epoch": 0.4402649948868436,
|
| 34815 |
+
"grad_norm": 0.06737557798624039,
|
| 34816 |
+
"learning_rate": 0.0009754971479925409,
|
| 34817 |
+
"loss": 1.5357,
|
| 34818 |
+
"step": 9902
|
| 34819 |
+
},
|
| 34820 |
+
{
|
| 34821 |
+
"epoch": 0.440353919345516,
|
| 34822 |
+
"grad_norm": 0.06973376125097275,
|
| 34823 |
+
"learning_rate": 0.0009754862275827762,
|
| 34824 |
+
"loss": 1.5267,
|
| 34825 |
+
"step": 9904
|
| 34826 |
+
},
|
| 34827 |
+
{
|
| 34828 |
+
"epoch": 0.44044284380418836,
|
| 34829 |
+
"grad_norm": 0.06866409629583359,
|
| 34830 |
+
"learning_rate": 0.000975475304801207,
|
| 34831 |
+
"loss": 1.5338,
|
| 34832 |
+
"step": 9906
|
| 34833 |
+
},
|
| 34834 |
+
{
|
| 34835 |
+
"epoch": 0.4405317682628607,
|
| 34836 |
+
"grad_norm": 0.06903880089521408,
|
| 34837 |
+
"learning_rate": 0.0009754643796478882,
|
| 34838 |
+
"loss": 1.5376,
|
| 34839 |
+
"step": 9908
|
| 34840 |
+
},
|
| 34841 |
+
{
|
| 34842 |
+
"epoch": 0.44062069272153304,
|
| 34843 |
+
"grad_norm": 0.06750325113534927,
|
| 34844 |
+
"learning_rate": 0.0009754534521228742,
|
| 34845 |
+
"loss": 1.5331,
|
| 34846 |
+
"step": 9910
|
| 34847 |
+
},
|
| 34848 |
+
{
|
| 34849 |
+
"epoch": 0.4407096171802054,
|
| 34850 |
+
"grad_norm": 0.0681682899594307,
|
| 34851 |
+
"learning_rate": 0.0009754425222262193,
|
| 34852 |
+
"loss": 1.535,
|
| 34853 |
+
"step": 9912
|
| 34854 |
+
},
|
| 34855 |
+
{
|
| 34856 |
+
"epoch": 0.44079854163887777,
|
| 34857 |
+
"grad_norm": 0.06802817434072495,
|
| 34858 |
+
"learning_rate": 0.0009754315899579783,
|
| 34859 |
+
"loss": 1.53,
|
| 34860 |
+
"step": 9914
|
| 34861 |
+
},
|
| 34862 |
+
{
|
| 34863 |
+
"epoch": 0.44088746609755014,
|
| 34864 |
+
"grad_norm": 0.06679469347000122,
|
| 34865 |
+
"learning_rate": 0.0009754206553182057,
|
| 34866 |
+
"loss": 1.5295,
|
| 34867 |
+
"step": 9916
|
| 34868 |
+
},
|
| 34869 |
+
{
|
| 34870 |
+
"epoch": 0.4409763905562225,
|
| 34871 |
+
"grad_norm": 0.06789480149745941,
|
| 34872 |
+
"learning_rate": 0.0009754097183069557,
|
| 34873 |
+
"loss": 1.5327,
|
| 34874 |
+
"step": 9918
|
| 34875 |
+
},
|
| 34876 |
+
{
|
| 34877 |
+
"epoch": 0.4410653150148948,
|
| 34878 |
+
"grad_norm": 0.07148660719394684,
|
| 34879 |
+
"learning_rate": 0.0009753987789242833,
|
| 34880 |
+
"loss": 1.5376,
|
| 34881 |
+
"step": 9920
|
| 34882 |
+
},
|
| 34883 |
+
{
|
| 34884 |
+
"epoch": 0.4411542394735672,
|
| 34885 |
+
"grad_norm": 0.07011796534061432,
|
| 34886 |
+
"learning_rate": 0.0009753878371702427,
|
| 34887 |
+
"loss": 1.5291,
|
| 34888 |
+
"step": 9922
|
| 34889 |
+
},
|
| 34890 |
+
{
|
| 34891 |
+
"epoch": 0.44124316393223956,
|
| 34892 |
+
"grad_norm": 0.07079441100358963,
|
| 34893 |
+
"learning_rate": 0.0009753768930448888,
|
| 34894 |
+
"loss": 1.5317,
|
| 34895 |
+
"step": 9924
|
| 34896 |
+
},
|
| 34897 |
+
{
|
| 34898 |
+
"epoch": 0.4413320883909119,
|
| 34899 |
+
"grad_norm": 0.07116356492042542,
|
| 34900 |
+
"learning_rate": 0.000975365946548276,
|
| 34901 |
+
"loss": 1.5337,
|
| 34902 |
+
"step": 9926
|
| 34903 |
+
},
|
| 34904 |
+
{
|
| 34905 |
+
"epoch": 0.4414210128495843,
|
| 34906 |
+
"grad_norm": 0.07054295390844345,
|
| 34907 |
+
"learning_rate": 0.000975354997680459,
|
| 34908 |
+
"loss": 1.5309,
|
| 34909 |
+
"step": 9928
|
| 34910 |
+
},
|
| 34911 |
+
{
|
| 34912 |
+
"epoch": 0.44150993730825666,
|
| 34913 |
+
"grad_norm": 0.06715717166662216,
|
| 34914 |
+
"learning_rate": 0.0009753440464414924,
|
| 34915 |
+
"loss": 1.5324,
|
| 34916 |
+
"step": 9930
|
| 34917 |
+
},
|
| 34918 |
+
{
|
| 34919 |
+
"epoch": 0.441598861766929,
|
| 34920 |
+
"grad_norm": 0.06757137924432755,
|
| 34921 |
+
"learning_rate": 0.0009753330928314308,
|
| 34922 |
+
"loss": 1.5335,
|
| 34923 |
+
"step": 9932
|
| 34924 |
+
},
|
| 34925 |
+
{
|
| 34926 |
+
"epoch": 0.44168778622560134,
|
| 34927 |
+
"grad_norm": 0.06650960445404053,
|
| 34928 |
+
"learning_rate": 0.0009753221368503286,
|
| 34929 |
+
"loss": 1.5276,
|
| 34930 |
+
"step": 9934
|
| 34931 |
+
},
|
| 34932 |
+
{
|
| 34933 |
+
"epoch": 0.4417767106842737,
|
| 34934 |
+
"grad_norm": 0.06757502257823944,
|
| 34935 |
+
"learning_rate": 0.0009753111784982409,
|
| 34936 |
+
"loss": 1.5326,
|
| 34937 |
+
"step": 9936
|
| 34938 |
+
},
|
| 34939 |
+
{
|
| 34940 |
+
"epoch": 0.4418656351429461,
|
| 34941 |
+
"grad_norm": 0.07040387392044067,
|
| 34942 |
+
"learning_rate": 0.0009753002177752221,
|
| 34943 |
+
"loss": 1.5321,
|
| 34944 |
+
"step": 9938
|
| 34945 |
+
},
|
| 34946 |
+
{
|
| 34947 |
+
"epoch": 0.44195455960161845,
|
| 34948 |
+
"grad_norm": 0.06907001882791519,
|
| 34949 |
+
"learning_rate": 0.000975289254681327,
|
| 34950 |
+
"loss": 1.5358,
|
| 34951 |
+
"step": 9940
|
| 34952 |
+
},
|
| 34953 |
+
{
|
| 34954 |
+
"epoch": 0.44204348406029076,
|
| 34955 |
+
"grad_norm": 0.06722556799650192,
|
| 34956 |
+
"learning_rate": 0.0009752782892166101,
|
| 34957 |
+
"loss": 1.5388,
|
| 34958 |
+
"step": 9942
|
| 34959 |
+
},
|
| 34960 |
+
{
|
| 34961 |
+
"epoch": 0.4421324085189631,
|
| 34962 |
+
"grad_norm": 0.06660650670528412,
|
| 34963 |
+
"learning_rate": 0.0009752673213811263,
|
| 34964 |
+
"loss": 1.5229,
|
| 34965 |
+
"step": 9944
|
| 34966 |
+
},
|
| 34967 |
+
{
|
| 34968 |
+
"epoch": 0.4422213329776355,
|
| 34969 |
+
"grad_norm": 0.0699448511004448,
|
| 34970 |
+
"learning_rate": 0.00097525635117493,
|
| 34971 |
+
"loss": 1.5271,
|
| 34972 |
+
"step": 9946
|
| 34973 |
+
},
|
| 34974 |
+
{
|
| 34975 |
+
"epoch": 0.44231025743630786,
|
| 34976 |
+
"grad_norm": 0.07192565500736237,
|
| 34977 |
+
"learning_rate": 0.0009752453785980763,
|
| 34978 |
+
"loss": 1.5317,
|
| 34979 |
+
"step": 9948
|
| 34980 |
+
},
|
| 34981 |
+
{
|
| 34982 |
+
"epoch": 0.44239918189498023,
|
| 34983 |
+
"grad_norm": 0.06789202988147736,
|
| 34984 |
+
"learning_rate": 0.0009752344036506197,
|
| 34985 |
+
"loss": 1.5254,
|
| 34986 |
+
"step": 9950
|
| 34987 |
+
},
|
| 34988 |
+
{
|
| 34989 |
+
"epoch": 0.4424881063536526,
|
| 34990 |
+
"grad_norm": 0.06763622164726257,
|
| 34991 |
+
"learning_rate": 0.000975223426332615,
|
| 34992 |
+
"loss": 1.5357,
|
| 34993 |
+
"step": 9952
|
| 34994 |
+
},
|
| 34995 |
+
{
|
| 34996 |
+
"epoch": 0.4425770308123249,
|
| 34997 |
+
"grad_norm": 0.0687379539012909,
|
| 34998 |
+
"learning_rate": 0.000975212446644117,
|
| 34999 |
+
"loss": 1.5316,
|
| 35000 |
+
"step": 9954
|
| 35001 |
+
},
|
| 35002 |
+
{
|
| 35003 |
+
"epoch": 0.4426659552709973,
|
| 35004 |
+
"grad_norm": 0.06635449826717377,
|
| 35005 |
+
"learning_rate": 0.0009752014645851805,
|
| 35006 |
+
"loss": 1.5354,
|
| 35007 |
+
"step": 9956
|
| 35008 |
+
},
|
| 35009 |
+
{
|
| 35010 |
+
"epoch": 0.44275487972966965,
|
| 35011 |
+
"grad_norm": 0.07067475467920303,
|
| 35012 |
+
"learning_rate": 0.0009751904801558602,
|
| 35013 |
+
"loss": 1.5283,
|
| 35014 |
+
"step": 9958
|
| 35015 |
+
},
|
| 35016 |
+
{
|
| 35017 |
+
"epoch": 0.442843804188342,
|
| 35018 |
+
"grad_norm": 0.07415599375963211,
|
| 35019 |
+
"learning_rate": 0.0009751794933562108,
|
| 35020 |
+
"loss": 1.534,
|
| 35021 |
+
"step": 9960
|
| 35022 |
+
},
|
| 35023 |
+
{
|
| 35024 |
+
"epoch": 0.4429327286470144,
|
| 35025 |
+
"grad_norm": 0.06655219942331314,
|
| 35026 |
+
"learning_rate": 0.0009751685041862872,
|
| 35027 |
+
"loss": 1.5341,
|
| 35028 |
+
"step": 9962
|
| 35029 |
+
},
|
| 35030 |
+
{
|
| 35031 |
+
"epoch": 0.4430216531056867,
|
| 35032 |
+
"grad_norm": 0.07229764014482498,
|
| 35033 |
+
"learning_rate": 0.0009751575126461443,
|
| 35034 |
+
"loss": 1.535,
|
| 35035 |
+
"step": 9964
|
| 35036 |
+
},
|
| 35037 |
+
{
|
| 35038 |
+
"epoch": 0.44311057756435906,
|
| 35039 |
+
"grad_norm": 0.06633423268795013,
|
| 35040 |
+
"learning_rate": 0.0009751465187358368,
|
| 35041 |
+
"loss": 1.5361,
|
| 35042 |
+
"step": 9966
|
| 35043 |
+
},
|
| 35044 |
+
{
|
| 35045 |
+
"epoch": 0.44319950202303143,
|
| 35046 |
+
"grad_norm": 0.07215137034654617,
|
| 35047 |
+
"learning_rate": 0.0009751355224554197,
|
| 35048 |
+
"loss": 1.5333,
|
| 35049 |
+
"step": 9968
|
| 35050 |
+
},
|
| 35051 |
+
{
|
| 35052 |
+
"epoch": 0.4432884264817038,
|
| 35053 |
+
"grad_norm": 0.06949516385793686,
|
| 35054 |
+
"learning_rate": 0.0009751245238049477,
|
| 35055 |
+
"loss": 1.53,
|
| 35056 |
+
"step": 9970
|
| 35057 |
+
},
|
| 35058 |
+
{
|
| 35059 |
+
"epoch": 0.44337735094037617,
|
| 35060 |
+
"grad_norm": 0.06816526502370834,
|
| 35061 |
+
"learning_rate": 0.0009751135227844758,
|
| 35062 |
+
"loss": 1.5338,
|
| 35063 |
+
"step": 9972
|
| 35064 |
+
},
|
| 35065 |
+
{
|
| 35066 |
+
"epoch": 0.4434662753990485,
|
| 35067 |
+
"grad_norm": 0.06912938505411148,
|
| 35068 |
+
"learning_rate": 0.0009751025193940586,
|
| 35069 |
+
"loss": 1.5347,
|
| 35070 |
+
"step": 9974
|
| 35071 |
+
},
|
| 35072 |
+
{
|
| 35073 |
+
"epoch": 0.44355519985772085,
|
| 35074 |
+
"grad_norm": 0.07131484895944595,
|
| 35075 |
+
"learning_rate": 0.0009750915136337513,
|
| 35076 |
+
"loss": 1.5317,
|
| 35077 |
+
"step": 9976
|
| 35078 |
+
},
|
| 35079 |
+
{
|
| 35080 |
+
"epoch": 0.4436441243163932,
|
| 35081 |
+
"grad_norm": 0.06642764806747437,
|
| 35082 |
+
"learning_rate": 0.0009750805055036086,
|
| 35083 |
+
"loss": 1.5271,
|
| 35084 |
+
"step": 9978
|
| 35085 |
+
},
|
| 35086 |
+
{
|
| 35087 |
+
"epoch": 0.4437330487750656,
|
| 35088 |
+
"grad_norm": 0.0668521523475647,
|
| 35089 |
+
"learning_rate": 0.0009750694950036855,
|
| 35090 |
+
"loss": 1.5312,
|
| 35091 |
+
"step": 9980
|
| 35092 |
+
},
|
| 35093 |
+
{
|
| 35094 |
+
"epoch": 0.44382197323373795,
|
| 35095 |
+
"grad_norm": 0.06732647120952606,
|
| 35096 |
+
"learning_rate": 0.0009750584821340369,
|
| 35097 |
+
"loss": 1.536,
|
| 35098 |
+
"step": 9982
|
| 35099 |
+
},
|
| 35100 |
+
{
|
| 35101 |
+
"epoch": 0.4439108976924103,
|
| 35102 |
+
"grad_norm": 0.06798292696475983,
|
| 35103 |
+
"learning_rate": 0.0009750474668947178,
|
| 35104 |
+
"loss": 1.5334,
|
| 35105 |
+
"step": 9984
|
| 35106 |
+
},
|
| 35107 |
+
{
|
| 35108 |
+
"epoch": 0.44399982215108263,
|
| 35109 |
+
"grad_norm": 0.06467396020889282,
|
| 35110 |
+
"learning_rate": 0.0009750364492857829,
|
| 35111 |
+
"loss": 1.532,
|
| 35112 |
+
"step": 9986
|
| 35113 |
+
},
|
| 35114 |
+
{
|
| 35115 |
+
"epoch": 0.444088746609755,
|
| 35116 |
+
"grad_norm": 0.06865821778774261,
|
| 35117 |
+
"learning_rate": 0.0009750254293072876,
|
| 35118 |
+
"loss": 1.5302,
|
| 35119 |
+
"step": 9988
|
| 35120 |
+
},
|
| 35121 |
+
{
|
| 35122 |
+
"epoch": 0.44417767106842737,
|
| 35123 |
+
"grad_norm": 0.06773124635219574,
|
| 35124 |
+
"learning_rate": 0.0009750144069592863,
|
| 35125 |
+
"loss": 1.5351,
|
| 35126 |
+
"step": 9990
|
| 35127 |
+
},
|
| 35128 |
+
{
|
| 35129 |
+
"epoch": 0.44426659552709974,
|
| 35130 |
+
"grad_norm": 0.06710581481456757,
|
| 35131 |
+
"learning_rate": 0.0009750033822418345,
|
| 35132 |
+
"loss": 1.5284,
|
| 35133 |
+
"step": 9992
|
| 35134 |
+
},
|
| 35135 |
+
{
|
| 35136 |
+
"epoch": 0.4443555199857721,
|
| 35137 |
+
"grad_norm": 0.06517042219638824,
|
| 35138 |
+
"learning_rate": 0.000974992355154987,
|
| 35139 |
+
"loss": 1.5325,
|
| 35140 |
+
"step": 9994
|
| 35141 |
+
},
|
| 35142 |
+
{
|
| 35143 |
+
"epoch": 0.4444444444444444,
|
| 35144 |
+
"grad_norm": 0.06951684504747391,
|
| 35145 |
+
"learning_rate": 0.0009749813256987987,
|
| 35146 |
+
"loss": 1.5345,
|
| 35147 |
+
"step": 9996
|
| 35148 |
+
},
|
| 35149 |
+
{
|
| 35150 |
+
"epoch": 0.4445333689031168,
|
| 35151 |
+
"grad_norm": 0.06696436554193497,
|
| 35152 |
+
"learning_rate": 0.0009749702938733247,
|
| 35153 |
+
"loss": 1.5336,
|
| 35154 |
+
"step": 9998
|
| 35155 |
+
},
|
| 35156 |
+
{
|
| 35157 |
+
"epoch": 0.44462229336178916,
|
| 35158 |
+
"grad_norm": 0.06807401776313782,
|
| 35159 |
+
"learning_rate": 0.00097495925967862,
|
| 35160 |
+
"loss": 1.5254,
|
| 35161 |
+
"step": 10000
|
| 35162 |
+
},
|
| 35163 |
+
{
|
| 35164 |
+
"epoch": 0.44462229336178916,
|
| 35165 |
+
"eval_loss": 1.5108540058135986,
|
| 35166 |
+
"eval_runtime": 12.3873,
|
| 35167 |
+
"eval_samples_per_second": 557.83,
|
| 35168 |
+
"eval_steps_per_second": 69.749,
|
| 35169 |
+
"step": 10000
|
| 35170 |
}
|
| 35171 |
],
|
| 35172 |
"logging_steps": 2,
|
|
|
|
| 35186 |
"attributes": {}
|
| 35187 |
}
|
| 35188 |
},
|
| 35189 |
+
"total_flos": 2.13973123203072e+19,
|
| 35190 |
"train_batch_size": 768,
|
| 35191 |
"trial_name": null,
|
| 35192 |
"trial_params": null
|