Upload 10 files
Browse files- model.safetensors +1 -1
- optimizer.pt +1 -1
- rng_state.pth +1 -1
- scheduler.pt +1 -1
- trainer_state.json +1485 -3
- training_args.bin +1 -1
    	
        model.safetensors
    CHANGED
    
    | @@ -1,3 +1,3 @@ | |
| 1 | 
             
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            -
            oid sha256: | 
| 3 | 
             
            size 598635032
         | 
|  | |
| 1 | 
             
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:8cf6fd855c268d3938b4c6d6a77aa9284b5bd03c679875c89cb3579c489295b8
         | 
| 3 | 
             
            size 598635032
         | 
    	
        optimizer.pt
    CHANGED
    
    | @@ -1,3 +1,3 @@ | |
| 1 | 
             
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            -
            oid sha256: | 
| 3 | 
             
            size 1197359627
         | 
|  | |
| 1 | 
             
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:20d57d69e8d4eb0bcf9143bb2a5722964a200d83b3b1c090ed18f98299556b3a
         | 
| 3 | 
             
            size 1197359627
         | 
    	
        rng_state.pth
    CHANGED
    
    | @@ -1,3 +1,3 @@ | |
| 1 | 
             
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            -
            oid sha256: | 
| 3 | 
             
            size 14645
         | 
|  | |
| 1 | 
             
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:be293c0bc96c40007a1ca95bf99da704f29c24d932c7c8e19b962a361adfdc4c
         | 
| 3 | 
             
            size 14645
         | 
    	
        scheduler.pt
    CHANGED
    
    | @@ -1,3 +1,3 @@ | |
| 1 | 
             
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            -
            oid sha256: | 
| 3 | 
             
            size 1465
         | 
|  | |
| 1 | 
             
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:af5ee4dc438217ac40b2a125900214146b721f2725ba954be785ed61a3abe011
         | 
| 3 | 
             
            size 1465
         | 
    	
        trainer_state.json
    CHANGED
    
    | @@ -2,9 +2,9 @@ | |
| 2 | 
             
              "best_global_step": null,
         | 
| 3 | 
             
              "best_metric": null,
         | 
| 4 | 
             
              "best_model_checkpoint": null,
         | 
| 5 | 
            -
              "epoch": 0. | 
| 6 | 
             
              "eval_steps": 1000,
         | 
| 7 | 
            -
              "global_step":  | 
| 8 | 
             
              "is_hyper_param_search": false,
         | 
| 9 | 
             
              "is_local_process_zero": true,
         | 
| 10 | 
             
              "is_world_process_zero": true,
         | 
| @@ -13041,6 +13041,1488 @@ | |
| 13041 | 
             
                  "eval_samples_per_second": 195.51,
         | 
| 13042 | 
             
                  "eval_steps_per_second": 1.534,
         | 
| 13043 | 
             
                  "step": 167000
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 13044 | 
             
                }
         | 
| 13045 | 
             
              ],
         | 
| 13046 | 
             
              "logging_steps": 100,
         | 
| @@ -13060,7 +14542,7 @@ | |
| 13060 | 
             
                  "attributes": {}
         | 
| 13061 | 
             
                }
         | 
| 13062 | 
             
              },
         | 
| 13063 | 
            -
              "total_flos": 1. | 
| 13064 | 
             
              "train_batch_size": 128,
         | 
| 13065 | 
             
              "trial_name": null,
         | 
| 13066 | 
             
              "trial_params": null
         | 
|  | |
| 2 | 
             
              "best_global_step": null,
         | 
| 3 | 
             
              "best_metric": null,
         | 
| 4 | 
             
              "best_model_checkpoint": null,
         | 
| 5 | 
            +
              "epoch": 0.0055816341908584,
         | 
| 6 | 
             
              "eval_steps": 1000,
         | 
| 7 | 
            +
              "global_step": 186000,
         | 
| 8 | 
             
              "is_hyper_param_search": false,
         | 
| 9 | 
             
              "is_local_process_zero": true,
         | 
| 10 | 
             
              "is_world_process_zero": true,
         | 
|  | |
| 13041 | 
             
                  "eval_samples_per_second": 195.51,
         | 
| 13042 | 
             
                  "eval_steps_per_second": 1.534,
         | 
| 13043 | 
             
                  "step": 167000
         | 
| 13044 | 
            +
                },
         | 
| 13045 | 
            +
                {
         | 
| 13046 | 
            +
                  "epoch": 0.01981480137754732,
         | 
| 13047 | 
            +
                  "grad_norm": 1.5712428092956543,
         | 
| 13048 | 
            +
                  "learning_rate": 2.8278830871985708e-05,
         | 
| 13049 | 
            +
                  "loss": 1.7747,
         | 
| 13050 | 
            +
                  "step": 167100
         | 
| 13051 | 
            +
                },
         | 
| 13052 | 
            +
                {
         | 
| 13053 | 
            +
                  "epoch": 0.020093883087090238,
         | 
| 13054 | 
            +
                  "grad_norm": 1.5386340618133545,
         | 
| 13055 | 
            +
                  "learning_rate": 2.8256880354422098e-05,
         | 
| 13056 | 
            +
                  "loss": 1.7738,
         | 
| 13057 | 
            +
                  "step": 167200
         | 
| 13058 | 
            +
                },
         | 
| 13059 | 
            +
                {
         | 
| 13060 | 
            +
                  "epoch": 0.02037296479663316,
         | 
| 13061 | 
            +
                  "grad_norm": 1.5471428632736206,
         | 
| 13062 | 
            +
                  "learning_rate": 2.8234927282417417e-05,
         | 
| 13063 | 
            +
                  "loss": 1.779,
         | 
| 13064 | 
            +
                  "step": 167300
         | 
| 13065 | 
            +
                },
         | 
| 13066 | 
            +
                {
         | 
| 13067 | 
            +
                  "epoch": 0.020652046506176077,
         | 
| 13068 | 
            +
                  "grad_norm": 1.5163718461990356,
         | 
| 13069 | 
            +
                  "learning_rate": 2.821297167318992e-05,
         | 
| 13070 | 
            +
                  "loss": 1.7741,
         | 
| 13071 | 
            +
                  "step": 167400
         | 
| 13072 | 
            +
                },
         | 
| 13073 | 
            +
                {
         | 
| 13074 | 
            +
                  "epoch": 0.020931128215719,
         | 
| 13075 | 
            +
                  "grad_norm": 1.5554001331329346,
         | 
| 13076 | 
            +
                  "learning_rate": 2.819101354395986e-05,
         | 
| 13077 | 
            +
                  "loss": 1.7825,
         | 
| 13078 | 
            +
                  "step": 167500
         | 
| 13079 | 
            +
                },
         | 
| 13080 | 
            +
                {
         | 
| 13081 | 
            +
                  "epoch": 0.02121020992526192,
         | 
| 13082 | 
            +
                  "grad_norm": 1.4839155673980713,
         | 
| 13083 | 
            +
                  "learning_rate": 2.8169052911949484e-05,
         | 
| 13084 | 
            +
                  "loss": 1.7729,
         | 
| 13085 | 
            +
                  "step": 167600
         | 
| 13086 | 
            +
                },
         | 
| 13087 | 
            +
                {
         | 
| 13088 | 
            +
                  "epoch": 0.021489291634804838,
         | 
| 13089 | 
            +
                  "grad_norm": 1.5696512460708618,
         | 
| 13090 | 
            +
                  "learning_rate": 2.8147089794382965e-05,
         | 
| 13091 | 
            +
                  "loss": 1.7754,
         | 
| 13092 | 
            +
                  "step": 167700
         | 
| 13093 | 
            +
                },
         | 
| 13094 | 
            +
                {
         | 
| 13095 | 
            +
                  "epoch": 0.02176837334434776,
         | 
| 13096 | 
            +
                  "grad_norm": 1.6730250120162964,
         | 
| 13097 | 
            +
                  "learning_rate": 2.8125124208486465e-05,
         | 
| 13098 | 
            +
                  "loss": 1.7736,
         | 
| 13099 | 
            +
                  "step": 167800
         | 
| 13100 | 
            +
                },
         | 
| 13101 | 
            +
                {
         | 
| 13102 | 
            +
                  "epoch": 0.022047455053890677,
         | 
| 13103 | 
            +
                  "grad_norm": 1.6201075315475464,
         | 
| 13104 | 
            +
                  "learning_rate": 2.810315617148806e-05,
         | 
| 13105 | 
            +
                  "loss": 1.7771,
         | 
| 13106 | 
            +
                  "step": 167900
         | 
| 13107 | 
            +
                },
         | 
| 13108 | 
            +
                {
         | 
| 13109 | 
            +
                  "epoch": 0.0223265367634336,
         | 
| 13110 | 
            +
                  "grad_norm": 1.6662862300872803,
         | 
| 13111 | 
            +
                  "learning_rate": 2.8081185700617746e-05,
         | 
| 13112 | 
            +
                  "loss": 1.7761,
         | 
| 13113 | 
            +
                  "step": 168000
         | 
| 13114 | 
            +
                },
         | 
| 13115 | 
            +
                {
         | 
| 13116 | 
            +
                  "epoch": 0.0223265367634336,
         | 
| 13117 | 
            +
                  "eval_loss": 2.161256790161133,
         | 
| 13118 | 
            +
                  "eval_runtime": 52.0964,
         | 
| 13119 | 
            +
                  "eval_samples_per_second": 195.676,
         | 
| 13120 | 
            +
                  "eval_steps_per_second": 1.536,
         | 
| 13121 | 
            +
                  "step": 168000
         | 
| 13122 | 
            +
                },
         | 
| 13123 | 
            +
                {
         | 
| 13124 | 
            +
                  "epoch": 0.022605618472976517,
         | 
| 13125 | 
            +
                  "grad_norm": 1.4697953462600708,
         | 
| 13126 | 
            +
                  "learning_rate": 2.8059212813107438e-05,
         | 
| 13127 | 
            +
                  "loss": 1.7894,
         | 
| 13128 | 
            +
                  "step": 168100
         | 
| 13129 | 
            +
                },
         | 
| 13130 | 
            +
                {
         | 
| 13131 | 
            +
                  "epoch": 0.022884700182519438,
         | 
| 13132 | 
            +
                  "grad_norm": 1.6400997638702393,
         | 
| 13133 | 
            +
                  "learning_rate": 2.803723752619094e-05,
         | 
| 13134 | 
            +
                  "loss": 1.7779,
         | 
| 13135 | 
            +
                  "step": 168200
         | 
| 13136 | 
            +
                },
         | 
| 13137 | 
            +
                {
         | 
| 13138 | 
            +
                  "epoch": 0.02316378189206236,
         | 
| 13139 | 
            +
                  "grad_norm": 1.5220052003860474,
         | 
| 13140 | 
            +
                  "learning_rate": 2.8015259857103942e-05,
         | 
| 13141 | 
            +
                  "loss": 1.7732,
         | 
| 13142 | 
            +
                  "step": 168300
         | 
| 13143 | 
            +
                },
         | 
| 13144 | 
            +
                {
         | 
| 13145 | 
            +
                  "epoch": 0.023442863601605277,
         | 
| 13146 | 
            +
                  "grad_norm": 1.542869210243225,
         | 
| 13147 | 
            +
                  "learning_rate": 2.7993279823084007e-05,
         | 
| 13148 | 
            +
                  "loss": 1.7771,
         | 
| 13149 | 
            +
                  "step": 168400
         | 
| 13150 | 
            +
                },
         | 
| 13151 | 
            +
                {
         | 
| 13152 | 
            +
                  "epoch": 0.0237219453111482,
         | 
| 13153 | 
            +
                  "grad_norm": 1.4953099489212036,
         | 
| 13154 | 
            +
                  "learning_rate": 2.7971297441370542e-05,
         | 
| 13155 | 
            +
                  "loss": 1.7774,
         | 
| 13156 | 
            +
                  "step": 168500
         | 
| 13157 | 
            +
                },
         | 
| 13158 | 
            +
                {
         | 
| 13159 | 
            +
                  "epoch": 0.024001027020691117,
         | 
| 13160 | 
            +
                  "grad_norm": 1.5665849447250366,
         | 
| 13161 | 
            +
                  "learning_rate": 2.7949312729204803e-05,
         | 
| 13162 | 
            +
                  "loss": 1.7633,
         | 
| 13163 | 
            +
                  "step": 168600
         | 
| 13164 | 
            +
                },
         | 
| 13165 | 
            +
                {
         | 
| 13166 | 
            +
                  "epoch": 0.024280108730234038,
         | 
| 13167 | 
            +
                  "grad_norm": 1.5860687494277954,
         | 
| 13168 | 
            +
                  "learning_rate": 2.792732570382986e-05,
         | 
| 13169 | 
            +
                  "loss": 1.7798,
         | 
| 13170 | 
            +
                  "step": 168700
         | 
| 13171 | 
            +
                },
         | 
| 13172 | 
            +
                {
         | 
| 13173 | 
            +
                  "epoch": 0.02455919043977696,
         | 
| 13174 | 
            +
                  "grad_norm": 1.602845311164856,
         | 
| 13175 | 
            +
                  "learning_rate": 2.790533638249062e-05,
         | 
| 13176 | 
            +
                  "loss": 1.7694,
         | 
| 13177 | 
            +
                  "step": 168800
         | 
| 13178 | 
            +
                },
         | 
| 13179 | 
            +
                {
         | 
| 13180 | 
            +
                  "epoch": 0.024838272149319877,
         | 
| 13181 | 
            +
                  "grad_norm": 1.5015400648117065,
         | 
| 13182 | 
            +
                  "learning_rate": 2.7883344782433774e-05,
         | 
| 13183 | 
            +
                  "loss": 1.7628,
         | 
| 13184 | 
            +
                  "step": 168900
         | 
| 13185 | 
            +
                },
         | 
| 13186 | 
            +
                {
         | 
| 13187 | 
            +
                  "epoch": 0.0251173538588628,
         | 
| 13188 | 
            +
                  "grad_norm": 1.5296344757080078,
         | 
| 13189 | 
            +
                  "learning_rate": 2.7861350920907807e-05,
         | 
| 13190 | 
            +
                  "loss": 1.7753,
         | 
| 13191 | 
            +
                  "step": 169000
         | 
| 13192 | 
            +
                },
         | 
| 13193 | 
            +
                {
         | 
| 13194 | 
            +
                  "epoch": 0.0251173538588628,
         | 
| 13195 | 
            +
                  "eval_loss": 2.1639742851257324,
         | 
| 13196 | 
            +
                  "eval_runtime": 52.1527,
         | 
| 13197 | 
            +
                  "eval_samples_per_second": 195.465,
         | 
| 13198 | 
            +
                  "eval_steps_per_second": 1.534,
         | 
| 13199 | 
            +
                  "step": 169000
         | 
| 13200 | 
            +
                },
         | 
| 13201 | 
            +
                {
         | 
| 13202 | 
            +
                  "epoch": 0.025396435568405717,
         | 
| 13203 | 
            +
                  "grad_norm": 1.591369390487671,
         | 
| 13204 | 
            +
                  "learning_rate": 2.783935481516297e-05,
         | 
| 13205 | 
            +
                  "loss": 1.7695,
         | 
| 13206 | 
            +
                  "step": 169100
         | 
| 13207 | 
            +
                },
         | 
| 13208 | 
            +
                {
         | 
| 13209 | 
            +
                  "epoch": 0.025675517277948638,
         | 
| 13210 | 
            +
                  "grad_norm": 1.5569419860839844,
         | 
| 13211 | 
            +
                  "learning_rate": 2.7817356482451297e-05,
         | 
| 13212 | 
            +
                  "loss": 1.7689,
         | 
| 13213 | 
            +
                  "step": 169200
         | 
| 13214 | 
            +
                },
         | 
| 13215 | 
            +
                {
         | 
| 13216 | 
            +
                  "epoch": 0.025954598987491556,
         | 
| 13217 | 
            +
                  "grad_norm": 1.6080352067947388,
         | 
| 13218 | 
            +
                  "learning_rate": 2.779535594002654e-05,
         | 
| 13219 | 
            +
                  "loss": 1.767,
         | 
| 13220 | 
            +
                  "step": 169300
         | 
| 13221 | 
            +
                },
         | 
| 13222 | 
            +
                {
         | 
| 13223 | 
            +
                  "epoch": 0.026233680697034478,
         | 
| 13224 | 
            +
                  "grad_norm": 1.47182035446167,
         | 
| 13225 | 
            +
                  "learning_rate": 2.77733532051442e-05,
         | 
| 13226 | 
            +
                  "loss": 1.7717,
         | 
| 13227 | 
            +
                  "step": 169400
         | 
| 13228 | 
            +
                },
         | 
| 13229 | 
            +
                {
         | 
| 13230 | 
            +
                  "epoch": 0.0265127624065774,
         | 
| 13231 | 
            +
                  "grad_norm": 1.6706403493881226,
         | 
| 13232 | 
            +
                  "learning_rate": 2.775134829506148e-05,
         | 
| 13233 | 
            +
                  "loss": 1.7787,
         | 
| 13234 | 
            +
                  "step": 169500
         | 
| 13235 | 
            +
                },
         | 
| 13236 | 
            +
                {
         | 
| 13237 | 
            +
                  "epoch": 0.026791844116120317,
         | 
| 13238 | 
            +
                  "grad_norm": 1.6530786752700806,
         | 
| 13239 | 
            +
                  "learning_rate": 2.7729341227037313e-05,
         | 
| 13240 | 
            +
                  "loss": 1.7726,
         | 
| 13241 | 
            +
                  "step": 169600
         | 
| 13242 | 
            +
                },
         | 
| 13243 | 
            +
                {
         | 
| 13244 | 
            +
                  "epoch": 0.02707092582566324,
         | 
| 13245 | 
            +
                  "grad_norm": 1.4457296133041382,
         | 
| 13246 | 
            +
                  "learning_rate": 2.7707332018332323e-05,
         | 
| 13247 | 
            +
                  "loss": 1.7697,
         | 
| 13248 | 
            +
                  "step": 169700
         | 
| 13249 | 
            +
                },
         | 
| 13250 | 
            +
                {
         | 
| 13251 | 
            +
                  "epoch": 0.027350007535206156,
         | 
| 13252 | 
            +
                  "grad_norm": 1.5824190378189087,
         | 
| 13253 | 
            +
                  "learning_rate": 2.7685320686208793e-05,
         | 
| 13254 | 
            +
                  "loss": 1.7734,
         | 
| 13255 | 
            +
                  "step": 169800
         | 
| 13256 | 
            +
                },
         | 
| 13257 | 
            +
                {
         | 
| 13258 | 
            +
                  "epoch": 0.027629089244749078,
         | 
| 13259 | 
            +
                  "grad_norm": 1.6177047491073608,
         | 
| 13260 | 
            +
                  "learning_rate": 2.7663307247930686e-05,
         | 
| 13261 | 
            +
                  "loss": 1.7782,
         | 
| 13262 | 
            +
                  "step": 169900
         | 
| 13263 | 
            +
                },
         | 
| 13264 | 
            +
                {
         | 
| 13265 | 
            +
                  "epoch": 0.027908170954292,
         | 
| 13266 | 
            +
                  "grad_norm": 1.505018949508667,
         | 
| 13267 | 
            +
                  "learning_rate": 2.7641291720763612e-05,
         | 
| 13268 | 
            +
                  "loss": 1.7659,
         | 
| 13269 | 
            +
                  "step": 170000
         | 
| 13270 | 
            +
                },
         | 
| 13271 | 
            +
                {
         | 
| 13272 | 
            +
                  "epoch": 0.027908170954292,
         | 
| 13273 | 
            +
                  "eval_loss": 2.1508195400238037,
         | 
| 13274 | 
            +
                  "eval_runtime": 52.1147,
         | 
| 13275 | 
            +
                  "eval_samples_per_second": 195.607,
         | 
| 13276 | 
            +
                  "eval_steps_per_second": 1.535,
         | 
| 13277 | 
            +
                  "step": 170000
         | 
| 13278 | 
            +
                },
         | 
| 13279 | 
            +
                {
         | 
| 13280 | 
            +
                  "epoch": 0.028187252663834917,
         | 
| 13281 | 
            +
                  "grad_norm": 1.6319383382797241,
         | 
| 13282 | 
            +
                  "learning_rate": 2.7619274121974825e-05,
         | 
| 13283 | 
            +
                  "loss": 1.7709,
         | 
| 13284 | 
            +
                  "step": 170100
         | 
| 13285 | 
            +
                },
         | 
| 13286 | 
            +
                {
         | 
| 13287 | 
            +
                  "epoch": 0.02846633437337784,
         | 
| 13288 | 
            +
                  "grad_norm": 1.6314260959625244,
         | 
| 13289 | 
            +
                  "learning_rate": 2.759725446883319e-05,
         | 
| 13290 | 
            +
                  "loss": 1.7675,
         | 
| 13291 | 
            +
                  "step": 170200
         | 
| 13292 | 
            +
                },
         | 
| 13293 | 
            +
                {
         | 
| 13294 | 
            +
                  "epoch": 0.028745416082920756,
         | 
| 13295 | 
            +
                  "grad_norm": 1.471872329711914,
         | 
| 13296 | 
            +
                  "learning_rate": 2.7575232778609206e-05,
         | 
| 13297 | 
            +
                  "loss": 1.771,
         | 
| 13298 | 
            +
                  "step": 170300
         | 
| 13299 | 
            +
                },
         | 
| 13300 | 
            +
                {
         | 
| 13301 | 
            +
                  "epoch": 0.029024497792463678,
         | 
| 13302 | 
            +
                  "grad_norm": 1.5450881719589233,
         | 
| 13303 | 
            +
                  "learning_rate": 2.755320906857494e-05,
         | 
| 13304 | 
            +
                  "loss": 1.836,
         | 
| 13305 | 
            +
                  "step": 170400
         | 
| 13306 | 
            +
                },
         | 
| 13307 | 
            +
                {
         | 
| 13308 | 
            +
                  "epoch": 0.0293035795020066,
         | 
| 13309 | 
            +
                  "grad_norm": 1.5527344942092896,
         | 
| 13310 | 
            +
                  "learning_rate": 2.753118335600408e-05,
         | 
| 13311 | 
            +
                  "loss": 1.8808,
         | 
| 13312 | 
            +
                  "step": 170500
         | 
| 13313 | 
            +
                },
         | 
| 13314 | 
            +
                {
         | 
| 13315 | 
            +
                  "epoch": 0.029582661211549517,
         | 
| 13316 | 
            +
                  "grad_norm": 1.8364976644515991,
         | 
| 13317 | 
            +
                  "learning_rate": 2.7509155658171852e-05,
         | 
| 13318 | 
            +
                  "loss": 1.8776,
         | 
| 13319 | 
            +
                  "step": 170600
         | 
| 13320 | 
            +
                },
         | 
| 13321 | 
            +
                {
         | 
| 13322 | 
            +
                  "epoch": 0.02986174292109244,
         | 
| 13323 | 
            +
                  "grad_norm": 1.4847674369812012,
         | 
| 13324 | 
            +
                  "learning_rate": 2.7487125992355058e-05,
         | 
| 13325 | 
            +
                  "loss": 1.8724,
         | 
| 13326 | 
            +
                  "step": 170700
         | 
| 13327 | 
            +
                },
         | 
| 13328 | 
            +
                {
         | 
| 13329 | 
            +
                  "epoch": 0.030140824630635357,
         | 
| 13330 | 
            +
                  "grad_norm": 1.5595808029174805,
         | 
| 13331 | 
            +
                  "learning_rate": 2.7465094375832028e-05,
         | 
| 13332 | 
            +
                  "loss": 1.8799,
         | 
| 13333 | 
            +
                  "step": 170800
         | 
| 13334 | 
            +
                },
         | 
| 13335 | 
            +
                {
         | 
| 13336 | 
            +
                  "epoch": 0.030419906340178278,
         | 
| 13337 | 
            +
                  "grad_norm": 1.54868483543396,
         | 
| 13338 | 
            +
                  "learning_rate": 2.744306082588264e-05,
         | 
| 13339 | 
            +
                  "loss": 1.8704,
         | 
| 13340 | 
            +
                  "step": 170900
         | 
| 13341 | 
            +
                },
         | 
| 13342 | 
            +
                {
         | 
| 13343 | 
            +
                  "epoch": 0.030698988049721196,
         | 
| 13344 | 
            +
                  "grad_norm": 1.8504784107208252,
         | 
| 13345 | 
            +
                  "learning_rate": 2.742102535978827e-05,
         | 
| 13346 | 
            +
                  "loss": 1.8736,
         | 
| 13347 | 
            +
                  "step": 171000
         | 
| 13348 | 
            +
                },
         | 
| 13349 | 
            +
                {
         | 
| 13350 | 
            +
                  "epoch": 0.030698988049721196,
         | 
| 13351 | 
            +
                  "eval_loss": 2.1541635990142822,
         | 
| 13352 | 
            +
                  "eval_runtime": 52.1803,
         | 
| 13353 | 
            +
                  "eval_samples_per_second": 195.361,
         | 
| 13354 | 
            +
                  "eval_steps_per_second": 1.533,
         | 
| 13355 | 
            +
                  "step": 171000
         | 
| 13356 | 
            +
                },
         | 
| 13357 | 
            +
                {
         | 
| 13358 | 
            +
                  "epoch": 0.030978069759264117,
         | 
| 13359 | 
            +
                  "grad_norm": 1.6168150901794434,
         | 
| 13360 | 
            +
                  "learning_rate": 2.7398987994831822e-05,
         | 
| 13361 | 
            +
                  "loss": 1.8737,
         | 
| 13362 | 
            +
                  "step": 171100
         | 
| 13363 | 
            +
                },
         | 
| 13364 | 
            +
                {
         | 
| 13365 | 
            +
                  "epoch": 0.03125715146880704,
         | 
| 13366 | 
            +
                  "grad_norm": 1.6291587352752686,
         | 
| 13367 | 
            +
                  "learning_rate": 2.737694874829766e-05,
         | 
| 13368 | 
            +
                  "loss": 1.8691,
         | 
| 13369 | 
            +
                  "step": 171200
         | 
| 13370 | 
            +
                },
         | 
| 13371 | 
            +
                {
         | 
| 13372 | 
            +
                  "epoch": 0.03153623317834996,
         | 
| 13373 | 
            +
                  "grad_norm": 1.5887749195098877,
         | 
| 13374 | 
            +
                  "learning_rate": 2.735490763747164e-05,
         | 
| 13375 | 
            +
                  "loss": 1.8725,
         | 
| 13376 | 
            +
                  "step": 171300
         | 
| 13377 | 
            +
                },
         | 
| 13378 | 
            +
                {
         | 
| 13379 | 
            +
                  "epoch": 0.031815314887892875,
         | 
| 13380 | 
            +
                  "grad_norm": 1.6395853757858276,
         | 
| 13381 | 
            +
                  "learning_rate": 2.733286467964108e-05,
         | 
| 13382 | 
            +
                  "loss": 1.8857,
         | 
| 13383 | 
            +
                  "step": 171400
         | 
| 13384 | 
            +
                },
         | 
| 13385 | 
            +
                {
         | 
| 13386 | 
            +
                  "epoch": 0.0320943965974358,
         | 
| 13387 | 
            +
                  "grad_norm": 1.5826025009155273,
         | 
| 13388 | 
            +
                  "learning_rate": 2.7310819892094742e-05,
         | 
| 13389 | 
            +
                  "loss": 1.8546,
         | 
| 13390 | 
            +
                  "step": 171500
         | 
| 13391 | 
            +
                },
         | 
| 13392 | 
            +
                {
         | 
| 13393 | 
            +
                  "epoch": 0.03237347830697872,
         | 
| 13394 | 
            +
                  "grad_norm": 2.160349130630493,
         | 
| 13395 | 
            +
                  "learning_rate": 2.7288773292122827e-05,
         | 
| 13396 | 
            +
                  "loss": 1.8623,
         | 
| 13397 | 
            +
                  "step": 171600
         | 
| 13398 | 
            +
                },
         | 
| 13399 | 
            +
                {
         | 
| 13400 | 
            +
                  "epoch": 0.032652560016521635,
         | 
| 13401 | 
            +
                  "grad_norm": 1.6130859851837158,
         | 
| 13402 | 
            +
                  "learning_rate": 2.726672489701696e-05,
         | 
| 13403 | 
            +
                  "loss": 1.8629,
         | 
| 13404 | 
            +
                  "step": 171700
         | 
| 13405 | 
            +
                },
         | 
| 13406 | 
            +
                {
         | 
| 13407 | 
            +
                  "epoch": 0.03293164172606456,
         | 
| 13408 | 
            +
                  "grad_norm": 1.619787335395813,
         | 
| 13409 | 
            +
                  "learning_rate": 2.7244674724070163e-05,
         | 
| 13410 | 
            +
                  "loss": 1.8646,
         | 
| 13411 | 
            +
                  "step": 171800
         | 
| 13412 | 
            +
                },
         | 
| 13413 | 
            +
                {
         | 
| 13414 | 
            +
                  "epoch": 0.03321072343560748,
         | 
| 13415 | 
            +
                  "grad_norm": 2.099820375442505,
         | 
| 13416 | 
            +
                  "learning_rate": 2.722262279057687e-05,
         | 
| 13417 | 
            +
                  "loss": 1.8679,
         | 
| 13418 | 
            +
                  "step": 171900
         | 
| 13419 | 
            +
                },
         | 
| 13420 | 
            +
                {
         | 
| 13421 | 
            +
                  "epoch": 0.033489805145150396,
         | 
| 13422 | 
            +
                  "grad_norm": 1.7083640098571777,
         | 
| 13423 | 
            +
                  "learning_rate": 2.720056911383287e-05,
         | 
| 13424 | 
            +
                  "loss": 1.8554,
         | 
| 13425 | 
            +
                  "step": 172000
         | 
| 13426 | 
            +
                },
         | 
| 13427 | 
            +
                {
         | 
| 13428 | 
            +
                  "epoch": 0.033489805145150396,
         | 
| 13429 | 
            +
                  "eval_loss": 2.1523571014404297,
         | 
| 13430 | 
            +
                  "eval_runtime": 52.1491,
         | 
| 13431 | 
            +
                  "eval_samples_per_second": 195.478,
         | 
| 13432 | 
            +
                  "eval_steps_per_second": 1.534,
         | 
| 13433 | 
            +
                  "step": 172000
         | 
| 13434 | 
            +
                },
         | 
| 13435 | 
            +
                {
         | 
| 13436 | 
            +
                  "epoch": 0.033768886854693314,
         | 
| 13437 | 
            +
                  "grad_norm": 1.5392628908157349,
         | 
| 13438 | 
            +
                  "learning_rate": 2.717851371113534e-05,
         | 
| 13439 | 
            +
                  "loss": 1.8658,
         | 
| 13440 | 
            +
                  "step": 172100
         | 
| 13441 | 
            +
                },
         | 
| 13442 | 
            +
                {
         | 
| 13443 | 
            +
                  "epoch": 0.03404796856423624,
         | 
| 13444 | 
            +
                  "grad_norm": 2.007720708847046,
         | 
| 13445 | 
            +
                  "learning_rate": 2.715645659978281e-05,
         | 
| 13446 | 
            +
                  "loss": 1.861,
         | 
| 13447 | 
            +
                  "step": 172200
         | 
| 13448 | 
            +
                },
         | 
| 13449 | 
            +
                {
         | 
| 13450 | 
            +
                  "epoch": 0.03432705027377916,
         | 
| 13451 | 
            +
                  "grad_norm": 1.566613793373108,
         | 
| 13452 | 
            +
                  "learning_rate": 2.7134397797075145e-05,
         | 
| 13453 | 
            +
                  "loss": 1.8669,
         | 
| 13454 | 
            +
                  "step": 172300
         | 
| 13455 | 
            +
                },
         | 
| 13456 | 
            +
                {
         | 
| 13457 | 
            +
                  "epoch": 0.034606131983322075,
         | 
| 13458 | 
            +
                  "grad_norm": 1.588408350944519,
         | 
| 13459 | 
            +
                  "learning_rate": 2.7112337320313524e-05,
         | 
| 13460 | 
            +
                  "loss": 1.8568,
         | 
| 13461 | 
            +
                  "step": 172400
         | 
| 13462 | 
            +
                },
         | 
| 13463 | 
            +
                {
         | 
| 13464 | 
            +
                  "epoch": 0.034885213692865,
         | 
| 13465 | 
            +
                  "grad_norm": 1.6406699419021606,
         | 
| 13466 | 
            +
                  "learning_rate": 2.7090275186800474e-05,
         | 
| 13467 | 
            +
                  "loss": 1.8713,
         | 
| 13468 | 
            +
                  "step": 172500
         | 
| 13469 | 
            +
                },
         | 
| 13470 | 
            +
                {
         | 
| 13471 | 
            +
                  "epoch": 0.03516429540240792,
         | 
| 13472 | 
            +
                  "grad_norm": 1.5397433042526245,
         | 
| 13473 | 
            +
                  "learning_rate": 2.7068211413839782e-05,
         | 
| 13474 | 
            +
                  "loss": 1.8629,
         | 
| 13475 | 
            +
                  "step": 172600
         | 
| 13476 | 
            +
                },
         | 
| 13477 | 
            +
                {
         | 
| 13478 | 
            +
                  "epoch": 0.035443377111950836,
         | 
| 13479 | 
            +
                  "grad_norm": 1.5865190029144287,
         | 
| 13480 | 
            +
                  "learning_rate": 2.704614601873654e-05,
         | 
| 13481 | 
            +
                  "loss": 1.8579,
         | 
| 13482 | 
            +
                  "step": 172700
         | 
| 13483 | 
            +
                },
         | 
| 13484 | 
            +
                {
         | 
| 13485 | 
            +
                  "epoch": 0.035722458821493754,
         | 
| 13486 | 
            +
                  "grad_norm": 1.7077267169952393,
         | 
| 13487 | 
            +
                  "learning_rate": 2.702407901879712e-05,
         | 
| 13488 | 
            +
                  "loss": 1.8616,
         | 
| 13489 | 
            +
                  "step": 172800
         | 
| 13490 | 
            +
                },
         | 
| 13491 | 
            +
                {
         | 
| 13492 | 
            +
                  "epoch": 0.03600154053103668,
         | 
| 13493 | 
            +
                  "grad_norm": 1.727586269378662,
         | 
| 13494 | 
            +
                  "learning_rate": 2.7002010431329134e-05,
         | 
| 13495 | 
            +
                  "loss": 1.8574,
         | 
| 13496 | 
            +
                  "step": 172900
         | 
| 13497 | 
            +
                },
         | 
| 13498 | 
            +
                {
         | 
| 13499 | 
            +
                  "epoch": 0.036280622240579596,
         | 
| 13500 | 
            +
                  "grad_norm": 1.5238264799118042,
         | 
| 13501 | 
            +
                  "learning_rate": 2.6979940273641453e-05,
         | 
| 13502 | 
            +
                  "loss": 1.8595,
         | 
| 13503 | 
            +
                  "step": 173000
         | 
| 13504 | 
            +
                },
         | 
| 13505 | 
            +
                {
         | 
| 13506 | 
            +
                  "epoch": 0.036280622240579596,
         | 
| 13507 | 
            +
                  "eval_loss": 2.141134738922119,
         | 
| 13508 | 
            +
                  "eval_runtime": 52.1591,
         | 
| 13509 | 
            +
                  "eval_samples_per_second": 195.441,
         | 
| 13510 | 
            +
                  "eval_steps_per_second": 1.534,
         | 
| 13511 | 
            +
                  "step": 173000
         | 
| 13512 | 
            +
                },
         | 
| 13513 | 
            +
                {
         | 
| 13514 | 
            +
                  "epoch": 0.036559703950122514,
         | 
| 13515 | 
            +
                  "grad_norm": 1.5688259601593018,
         | 
| 13516 | 
            +
                  "learning_rate": 2.6957868563044176e-05,
         | 
| 13517 | 
            +
                  "loss": 1.8674,
         | 
| 13518 | 
            +
                  "step": 173100
         | 
| 13519 | 
            +
                },
         | 
| 13520 | 
            +
                {
         | 
| 13521 | 
            +
                  "epoch": 0.03683878565966544,
         | 
| 13522 | 
            +
                  "grad_norm": 1.5195534229278564,
         | 
| 13523 | 
            +
                  "learning_rate": 2.6935795316848612e-05,
         | 
| 13524 | 
            +
                  "loss": 1.8653,
         | 
| 13525 | 
            +
                  "step": 173200
         | 
| 13526 | 
            +
                },
         | 
| 13527 | 
            +
                {
         | 
| 13528 | 
            +
                  "epoch": 0.03711786736920836,
         | 
| 13529 | 
            +
                  "grad_norm": 1.6201164722442627,
         | 
| 13530 | 
            +
                  "learning_rate": 2.691372055236728e-05,
         | 
| 13531 | 
            +
                  "loss": 1.8579,
         | 
| 13532 | 
            +
                  "step": 173300
         | 
| 13533 | 
            +
                },
         | 
| 13534 | 
            +
                {
         | 
| 13535 | 
            +
                  "epoch": 0.037396949078751275,
         | 
| 13536 | 
            +
                  "grad_norm": 1.8065686225891113,
         | 
| 13537 | 
            +
                  "learning_rate": 2.6891644286913897e-05,
         | 
| 13538 | 
            +
                  "loss": 1.8755,
         | 
| 13539 | 
            +
                  "step": 173400
         | 
| 13540 | 
            +
                },
         | 
| 13541 | 
            +
                {
         | 
| 13542 | 
            +
                  "epoch": 0.0376760307882942,
         | 
| 13543 | 
            +
                  "grad_norm": 1.5661702156066895,
         | 
| 13544 | 
            +
                  "learning_rate": 2.6869566537803347e-05,
         | 
| 13545 | 
            +
                  "loss": 1.8552,
         | 
| 13546 | 
            +
                  "step": 173500
         | 
| 13547 | 
            +
                },
         | 
| 13548 | 
            +
                {
         | 
| 13549 | 
            +
                  "epoch": 0.03795511249783712,
         | 
| 13550 | 
            +
                  "grad_norm": 1.6565943956375122,
         | 
| 13551 | 
            +
                  "learning_rate": 2.6847487322351694e-05,
         | 
| 13552 | 
            +
                  "loss": 1.8664,
         | 
| 13553 | 
            +
                  "step": 173600
         | 
| 13554 | 
            +
                },
         | 
| 13555 | 
            +
                {
         | 
| 13556 | 
            +
                  "epoch": 0.038234194207380036,
         | 
| 13557 | 
            +
                  "grad_norm": 1.49613356590271,
         | 
| 13558 | 
            +
                  "learning_rate": 2.6825406657876123e-05,
         | 
| 13559 | 
            +
                  "loss": 1.8524,
         | 
| 13560 | 
            +
                  "step": 173700
         | 
| 13561 | 
            +
                },
         | 
| 13562 | 
            +
                {
         | 
| 13563 | 
            +
                  "epoch": 0.038513275916922954,
         | 
| 13564 | 
            +
                  "grad_norm": 1.5829864740371704,
         | 
| 13565 | 
            +
                  "learning_rate": 2.6803324561694988e-05,
         | 
| 13566 | 
            +
                  "loss": 1.8732,
         | 
| 13567 | 
            +
                  "step": 173800
         | 
| 13568 | 
            +
                },
         | 
| 13569 | 
            +
                {
         | 
| 13570 | 
            +
                  "epoch": 0.03879235762646588,
         | 
| 13571 | 
            +
                  "grad_norm": 1.6095563173294067,
         | 
| 13572 | 
            +
                  "learning_rate": 2.6781241051127738e-05,
         | 
| 13573 | 
            +
                  "loss": 1.8503,
         | 
| 13574 | 
            +
                  "step": 173900
         | 
| 13575 | 
            +
                },
         | 
| 13576 | 
            +
                {
         | 
| 13577 | 
            +
                  "epoch": 0.0390714393360088,
         | 
| 13578 | 
            +
                  "grad_norm": 1.5767251253128052,
         | 
| 13579 | 
            +
                  "learning_rate": 2.675915614349495e-05,
         | 
| 13580 | 
            +
                  "loss": 1.856,
         | 
| 13581 | 
            +
                  "step": 174000
         | 
| 13582 | 
            +
                },
         | 
| 13583 | 
            +
                {
         | 
| 13584 | 
            +
                  "epoch": 0.0390714393360088,
         | 
| 13585 | 
            +
                  "eval_loss": 2.1416378021240234,
         | 
| 13586 | 
            +
                  "eval_runtime": 52.1112,
         | 
| 13587 | 
            +
                  "eval_samples_per_second": 195.62,
         | 
| 13588 | 
            +
                  "eval_steps_per_second": 1.535,
         | 
| 13589 | 
            +
                  "step": 174000
         | 
| 13590 | 
            +
                },
         | 
| 13591 | 
            +
                {
         | 
| 13592 | 
            +
                  "epoch": 0.00027908170954291995,
         | 
| 13593 | 
            +
                  "grad_norm": 1.5513286590576172,
         | 
| 13594 | 
            +
                  "learning_rate": 2.6737069856118284e-05,
         | 
| 13595 | 
            +
                  "loss": 1.7542,
         | 
| 13596 | 
            +
                  "step": 174100
         | 
| 13597 | 
            +
                },
         | 
| 13598 | 
            +
                {
         | 
| 13599 | 
            +
                  "epoch": 0.0005581634190858399,
         | 
| 13600 | 
            +
                  "grad_norm": 1.5664585828781128,
         | 
| 13601 | 
            +
                  "learning_rate": 2.67149822063205e-05,
         | 
| 13602 | 
            +
                  "loss": 1.7515,
         | 
| 13603 | 
            +
                  "step": 174200
         | 
| 13604 | 
            +
                },
         | 
| 13605 | 
            +
                {
         | 
| 13606 | 
            +
                  "epoch": 0.0008372451286287599,
         | 
| 13607 | 
            +
                  "grad_norm": 1.5423948764801025,
         | 
| 13608 | 
            +
                  "learning_rate": 2.66928932114254e-05,
         | 
| 13609 | 
            +
                  "loss": 1.7557,
         | 
| 13610 | 
            +
                  "step": 174300
         | 
| 13611 | 
            +
                },
         | 
| 13612 | 
            +
                {
         | 
| 13613 | 
            +
                  "epoch": 0.0011163268381716798,
         | 
| 13614 | 
            +
                  "grad_norm": 1.5535671710968018,
         | 
| 13615 | 
            +
                  "learning_rate": 2.667080288875788e-05,
         | 
| 13616 | 
            +
                  "loss": 1.7569,
         | 
| 13617 | 
            +
                  "step": 174400
         | 
| 13618 | 
            +
                },
         | 
| 13619 | 
            +
                {
         | 
| 13620 | 
            +
                  "epoch": 0.0013954085477146,
         | 
| 13621 | 
            +
                  "grad_norm": 1.5592520236968994,
         | 
| 13622 | 
            +
                  "learning_rate": 2.6648711255643828e-05,
         | 
| 13623 | 
            +
                  "loss": 1.7506,
         | 
| 13624 | 
            +
                  "step": 174500
         | 
| 13625 | 
            +
                },
         | 
| 13626 | 
            +
                {
         | 
| 13627 | 
            +
                  "epoch": 0.0016744902572575198,
         | 
| 13628 | 
            +
                  "grad_norm": 1.5440510511398315,
         | 
| 13629 | 
            +
                  "learning_rate": 2.6626618329410198e-05,
         | 
| 13630 | 
            +
                  "loss": 1.7618,
         | 
| 13631 | 
            +
                  "step": 174600
         | 
| 13632 | 
            +
                },
         | 
| 13633 | 
            +
                {
         | 
| 13634 | 
            +
                  "epoch": 0.00195357196680044,
         | 
| 13635 | 
            +
                  "grad_norm": 1.54314124584198,
         | 
| 13636 | 
            +
                  "learning_rate": 2.6604524127384937e-05,
         | 
| 13637 | 
            +
                  "loss": 1.7491,
         | 
| 13638 | 
            +
                  "step": 174700
         | 
| 13639 | 
            +
                },
         | 
| 13640 | 
            +
                {
         | 
| 13641 | 
            +
                  "epoch": 0.0022326536763433596,
         | 
| 13642 | 
            +
                  "grad_norm": 1.592208743095398,
         | 
| 13643 | 
            +
                  "learning_rate": 2.658242866689702e-05,
         | 
| 13644 | 
            +
                  "loss": 1.7458,
         | 
| 13645 | 
            +
                  "step": 174800
         | 
| 13646 | 
            +
                },
         | 
| 13647 | 
            +
                {
         | 
| 13648 | 
            +
                  "epoch": 0.0025117353858862797,
         | 
| 13649 | 
            +
                  "grad_norm": 1.5204849243164062,
         | 
| 13650 | 
            +
                  "learning_rate": 2.6560331965276363e-05,
         | 
| 13651 | 
            +
                  "loss": 1.7523,
         | 
| 13652 | 
            +
                  "step": 174900
         | 
| 13653 | 
            +
                },
         | 
| 13654 | 
            +
                {
         | 
| 13655 | 
            +
                  "epoch": 0.0027908170954292,
         | 
| 13656 | 
            +
                  "grad_norm": 1.5259612798690796,
         | 
| 13657 | 
            +
                  "learning_rate": 2.653823403985391e-05,
         | 
| 13658 | 
            +
                  "loss": 1.7535,
         | 
| 13659 | 
            +
                  "step": 175000
         | 
| 13660 | 
            +
                },
         | 
| 13661 | 
            +
                {
         | 
| 13662 | 
            +
                  "epoch": 0.0027908170954292,
         | 
| 13663 | 
            +
                  "eval_loss": 2.1326749324798584,
         | 
| 13664 | 
            +
                  "eval_runtime": 52.049,
         | 
| 13665 | 
            +
                  "eval_samples_per_second": 195.854,
         | 
| 13666 | 
            +
                  "eval_steps_per_second": 1.537,
         | 
| 13667 | 
            +
                  "step": 175000
         | 
| 13668 | 
            +
                },
         | 
| 13669 | 
            +
                {
         | 
| 13670 | 
            +
                  "epoch": 0.00306989880497212,
         | 
| 13671 | 
            +
                  "grad_norm": 1.52047598361969,
         | 
| 13672 | 
            +
                  "learning_rate": 2.651613490796152e-05,
         | 
| 13673 | 
            +
                  "loss": 1.7447,
         | 
| 13674 | 
            +
                  "step": 175100
         | 
| 13675 | 
            +
                },
         | 
| 13676 | 
            +
                {
         | 
| 13677 | 
            +
                  "epoch": 0.0033489805145150396,
         | 
| 13678 | 
            +
                  "grad_norm": 1.5134586095809937,
         | 
| 13679 | 
            +
                  "learning_rate": 2.6494034586932027e-05,
         | 
| 13680 | 
            +
                  "loss": 1.7452,
         | 
| 13681 | 
            +
                  "step": 175200
         | 
| 13682 | 
            +
                },
         | 
| 13683 | 
            +
                {
         | 
| 13684 | 
            +
                  "epoch": 0.0036280622240579597,
         | 
| 13685 | 
            +
                  "grad_norm": 1.572095513343811,
         | 
| 13686 | 
            +
                  "learning_rate": 2.6471933094099177e-05,
         | 
| 13687 | 
            +
                  "loss": 1.7571,
         | 
| 13688 | 
            +
                  "step": 175300
         | 
| 13689 | 
            +
                },
         | 
| 13690 | 
            +
                {
         | 
| 13691 | 
            +
                  "epoch": 0.00390714393360088,
         | 
| 13692 | 
            +
                  "grad_norm": 1.5933750867843628,
         | 
| 13693 | 
            +
                  "learning_rate": 2.6449830446797653e-05,
         | 
| 13694 | 
            +
                  "loss": 1.745,
         | 
| 13695 | 
            +
                  "step": 175400
         | 
| 13696 | 
            +
                },
         | 
| 13697 | 
            +
                {
         | 
| 13698 | 
            +
                  "epoch": 0.0041862256431437995,
         | 
| 13699 | 
            +
                  "grad_norm": 1.6601353883743286,
         | 
| 13700 | 
            +
                  "learning_rate": 2.6427726662363023e-05,
         | 
| 13701 | 
            +
                  "loss": 1.7462,
         | 
| 13702 | 
            +
                  "step": 175500
         | 
| 13703 | 
            +
                },
         | 
| 13704 | 
            +
                {
         | 
| 13705 | 
            +
                  "epoch": 0.004465307352686719,
         | 
| 13706 | 
            +
                  "grad_norm": 1.5466818809509277,
         | 
| 13707 | 
            +
                  "learning_rate": 2.640562175813177e-05,
         | 
| 13708 | 
            +
                  "loss": 1.7573,
         | 
| 13709 | 
            +
                  "step": 175600
         | 
| 13710 | 
            +
                },
         | 
| 13711 | 
            +
                {
         | 
| 13712 | 
            +
                  "epoch": 0.00474438906222964,
         | 
| 13713 | 
            +
                  "grad_norm": 1.5273200273513794,
         | 
| 13714 | 
            +
                  "learning_rate": 2.6383515751441234e-05,
         | 
| 13715 | 
            +
                  "loss": 1.7578,
         | 
| 13716 | 
            +
                  "step": 175700
         | 
| 13717 | 
            +
                },
         | 
| 13718 | 
            +
                {
         | 
| 13719 | 
            +
                  "epoch": 0.005023470771772559,
         | 
| 13720 | 
            +
                  "grad_norm": 1.609778881072998,
         | 
| 13721 | 
            +
                  "learning_rate": 2.636140865962965e-05,
         | 
| 13722 | 
            +
                  "loss": 1.7513,
         | 
| 13723 | 
            +
                  "step": 175800
         | 
| 13724 | 
            +
                },
         | 
| 13725 | 
            +
                {
         | 
| 13726 | 
            +
                  "epoch": 0.00530255248131548,
         | 
| 13727 | 
            +
                  "grad_norm": 1.6019160747528076,
         | 
| 13728 | 
            +
                  "learning_rate": 2.633930050003606e-05,
         | 
| 13729 | 
            +
                  "loss": 1.7557,
         | 
| 13730 | 
            +
                  "step": 175900
         | 
| 13731 | 
            +
                },
         | 
| 13732 | 
            +
                {
         | 
| 13733 | 
            +
                  "epoch": 0.0055816341908584,
         | 
| 13734 | 
            +
                  "grad_norm": 1.5547572374343872,
         | 
| 13735 | 
            +
                  "learning_rate": 2.6317191290000383e-05,
         | 
| 13736 | 
            +
                  "loss": 1.7645,
         | 
| 13737 | 
            +
                  "step": 176000
         | 
| 13738 | 
            +
                },
         | 
| 13739 | 
            +
                {
         | 
| 13740 | 
            +
                  "epoch": 0.0055816341908584,
         | 
| 13741 | 
            +
                  "eval_loss": 2.141494035720825,
         | 
| 13742 | 
            +
                  "eval_runtime": 51.4645,
         | 
| 13743 | 
            +
                  "eval_samples_per_second": 198.078,
         | 
| 13744 | 
            +
                  "eval_steps_per_second": 1.554,
         | 
| 13745 | 
            +
                  "step": 176000
         | 
| 13746 | 
            +
                },
         | 
| 13747 | 
            +
                {
         | 
| 13748 | 
            +
                  "epoch": 0.005860715900401319,
         | 
| 13749 | 
            +
                  "grad_norm": 1.6100679636001587,
         | 
| 13750 | 
            +
                  "learning_rate": 2.629508104686334e-05,
         | 
| 13751 | 
            +
                  "loss": 1.7566,
         | 
| 13752 | 
            +
                  "step": 176100
         | 
| 13753 | 
            +
                },
         | 
| 13754 | 
            +
                {
         | 
| 13755 | 
            +
                  "epoch": 0.00613979760994424,
         | 
| 13756 | 
            +
                  "grad_norm": 1.5966265201568604,
         | 
| 13757 | 
            +
                  "learning_rate": 2.6272969787966466e-05,
         | 
| 13758 | 
            +
                  "loss": 1.7511,
         | 
| 13759 | 
            +
                  "step": 176200
         | 
| 13760 | 
            +
                },
         | 
| 13761 | 
            +
                {
         | 
| 13762 | 
            +
                  "epoch": 0.0064188793194871595,
         | 
| 13763 | 
            +
                  "grad_norm": 1.5519967079162598,
         | 
| 13764 | 
            +
                  "learning_rate": 2.6250857530652113e-05,
         | 
| 13765 | 
            +
                  "loss": 1.7534,
         | 
| 13766 | 
            +
                  "step": 176300
         | 
| 13767 | 
            +
                },
         | 
| 13768 | 
            +
                {
         | 
| 13769 | 
            +
                  "epoch": 0.006697961029030079,
         | 
| 13770 | 
            +
                  "grad_norm": 1.5537617206573486,
         | 
| 13771 | 
            +
                  "learning_rate": 2.6228744292263367e-05,
         | 
| 13772 | 
            +
                  "loss": 1.7448,
         | 
| 13773 | 
            +
                  "step": 176400
         | 
| 13774 | 
            +
                },
         | 
| 13775 | 
            +
                {
         | 
| 13776 | 
            +
                  "epoch": 0.006977042738573,
         | 
| 13777 | 
            +
                  "grad_norm": 1.5397429466247559,
         | 
| 13778 | 
            +
                  "learning_rate": 2.6206630090144153e-05,
         | 
| 13779 | 
            +
                  "loss": 1.7456,
         | 
| 13780 | 
            +
                  "step": 176500
         | 
| 13781 | 
            +
                },
         | 
| 13782 | 
            +
                {
         | 
| 13783 | 
            +
                  "epoch": 0.0072561244481159195,
         | 
| 13784 | 
            +
                  "grad_norm": 1.5131994485855103,
         | 
| 13785 | 
            +
                  "learning_rate": 2.618451494163908e-05,
         | 
| 13786 | 
            +
                  "loss": 1.7472,
         | 
| 13787 | 
            +
                  "step": 176600
         | 
| 13788 | 
            +
                },
         | 
| 13789 | 
            +
                {
         | 
| 13790 | 
            +
                  "epoch": 0.007535206157658839,
         | 
| 13791 | 
            +
                  "grad_norm": 1.553226113319397,
         | 
| 13792 | 
            +
                  "learning_rate": 2.6162398864093553e-05,
         | 
| 13793 | 
            +
                  "loss": 1.7588,
         | 
| 13794 | 
            +
                  "step": 176700
         | 
| 13795 | 
            +
                },
         | 
| 13796 | 
            +
                {
         | 
| 13797 | 
            +
                  "epoch": 0.00781428786720176,
         | 
| 13798 | 
            +
                  "grad_norm": 1.5782634019851685,
         | 
| 13799 | 
            +
                  "learning_rate": 2.6140281874853666e-05,
         | 
| 13800 | 
            +
                  "loss": 1.7498,
         | 
| 13801 | 
            +
                  "step": 176800
         | 
| 13802 | 
            +
                },
         | 
| 13803 | 
            +
                {
         | 
| 13804 | 
            +
                  "epoch": 0.00809336957674468,
         | 
| 13805 | 
            +
                  "grad_norm": 1.5181629657745361,
         | 
| 13806 | 
            +
                  "learning_rate": 2.6118163991266275e-05,
         | 
| 13807 | 
            +
                  "loss": 1.7525,
         | 
| 13808 | 
            +
                  "step": 176900
         | 
| 13809 | 
            +
                },
         | 
| 13810 | 
            +
                {
         | 
| 13811 | 
            +
                  "epoch": 0.008372451286287599,
         | 
| 13812 | 
            +
                  "grad_norm": 1.622118353843689,
         | 
| 13813 | 
            +
                  "learning_rate": 2.6096045230678888e-05,
         | 
| 13814 | 
            +
                  "loss": 1.7472,
         | 
| 13815 | 
            +
                  "step": 177000
         | 
| 13816 | 
            +
                },
         | 
| 13817 | 
            +
                {
         | 
| 13818 | 
            +
                  "epoch": 0.008372451286287599,
         | 
| 13819 | 
            +
                  "eval_loss": 2.1567530632019043,
         | 
| 13820 | 
            +
                  "eval_runtime": 51.4987,
         | 
| 13821 | 
            +
                  "eval_samples_per_second": 197.947,
         | 
| 13822 | 
            +
                  "eval_steps_per_second": 1.553,
         | 
| 13823 | 
            +
                  "step": 177000
         | 
| 13824 | 
            +
                },
         | 
| 13825 | 
            +
                {
         | 
| 13826 | 
            +
                  "epoch": 0.008651532995830519,
         | 
| 13827 | 
            +
                  "grad_norm": 1.5844262838363647,
         | 
| 13828 | 
            +
                  "learning_rate": 2.6073925610439738e-05,
         | 
| 13829 | 
            +
                  "loss": 1.7489,
         | 
| 13830 | 
            +
                  "step": 177100
         | 
| 13831 | 
            +
                },
         | 
| 13832 | 
            +
                {
         | 
| 13833 | 
            +
                  "epoch": 0.008930614705373438,
         | 
| 13834 | 
            +
                  "grad_norm": 1.4944721460342407,
         | 
| 13835 | 
            +
                  "learning_rate": 2.6051805147897713e-05,
         | 
| 13836 | 
            +
                  "loss": 1.7535,
         | 
| 13837 | 
            +
                  "step": 177200
         | 
| 13838 | 
            +
                },
         | 
| 13839 | 
            +
                {
         | 
| 13840 | 
            +
                  "epoch": 0.00920969641491636,
         | 
| 13841 | 
            +
                  "grad_norm": 1.607365608215332,
         | 
| 13842 | 
            +
                  "learning_rate": 2.602968386040236e-05,
         | 
| 13843 | 
            +
                  "loss": 1.7476,
         | 
| 13844 | 
            +
                  "step": 177300
         | 
| 13845 | 
            +
                },
         | 
| 13846 | 
            +
                {
         | 
| 13847 | 
            +
                  "epoch": 0.00948877812445928,
         | 
| 13848 | 
            +
                  "grad_norm": 1.5790349245071411,
         | 
| 13849 | 
            +
                  "learning_rate": 2.6007561765303878e-05,
         | 
| 13850 | 
            +
                  "loss": 1.7465,
         | 
| 13851 | 
            +
                  "step": 177400
         | 
| 13852 | 
            +
                },
         | 
| 13853 | 
            +
                {
         | 
| 13854 | 
            +
                  "epoch": 0.0097678598340022,
         | 
| 13855 | 
            +
                  "grad_norm": 1.5833547115325928,
         | 
| 13856 | 
            +
                  "learning_rate": 2.5985438879953107e-05,
         | 
| 13857 | 
            +
                  "loss": 1.7581,
         | 
| 13858 | 
            +
                  "step": 177500
         | 
| 13859 | 
            +
                },
         | 
| 13860 | 
            +
                {
         | 
| 13861 | 
            +
                  "epoch": 0.010046941543545119,
         | 
| 13862 | 
            +
                  "grad_norm": 1.5244640111923218,
         | 
| 13863 | 
            +
                  "learning_rate": 2.5963315221701496e-05,
         | 
| 13864 | 
            +
                  "loss": 1.7489,
         | 
| 13865 | 
            +
                  "step": 177600
         | 
| 13866 | 
            +
                },
         | 
| 13867 | 
            +
                {
         | 
| 13868 | 
            +
                  "epoch": 0.010326023253088039,
         | 
| 13869 | 
            +
                  "grad_norm": 1.6332496404647827,
         | 
| 13870 | 
            +
                  "learning_rate": 2.5941190807901117e-05,
         | 
| 13871 | 
            +
                  "loss": 1.7593,
         | 
| 13872 | 
            +
                  "step": 177700
         | 
| 13873 | 
            +
                },
         | 
| 13874 | 
            +
                {
         | 
| 13875 | 
            +
                  "epoch": 0.01060510496263096,
         | 
| 13876 | 
            +
                  "grad_norm": 1.4967930316925049,
         | 
| 13877 | 
            +
                  "learning_rate": 2.5919065655904606e-05,
         | 
| 13878 | 
            +
                  "loss": 1.7487,
         | 
| 13879 | 
            +
                  "step": 177800
         | 
| 13880 | 
            +
                },
         | 
| 13881 | 
            +
                {
         | 
| 13882 | 
            +
                  "epoch": 0.01088418667217388,
         | 
| 13883 | 
            +
                  "grad_norm": 1.5874158143997192,
         | 
| 13884 | 
            +
                  "learning_rate": 2.5896939783065198e-05,
         | 
| 13885 | 
            +
                  "loss": 1.7488,
         | 
| 13886 | 
            +
                  "step": 177900
         | 
| 13887 | 
            +
                },
         | 
| 13888 | 
            +
                {
         | 
| 13889 | 
            +
                  "epoch": 0.0111632683817168,
         | 
| 13890 | 
            +
                  "grad_norm": 1.6334315538406372,
         | 
| 13891 | 
            +
                  "learning_rate": 2.587481320673669e-05,
         | 
| 13892 | 
            +
                  "loss": 1.7558,
         | 
| 13893 | 
            +
                  "step": 178000
         | 
| 13894 | 
            +
                },
         | 
| 13895 | 
            +
                {
         | 
| 13896 | 
            +
                  "epoch": 0.0111632683817168,
         | 
| 13897 | 
            +
                  "eval_loss": 2.1407663822174072,
         | 
| 13898 | 
            +
                  "eval_runtime": 51.564,
         | 
| 13899 | 
            +
                  "eval_samples_per_second": 197.696,
         | 
| 13900 | 
            +
                  "eval_steps_per_second": 1.551,
         | 
| 13901 | 
            +
                  "step": 178000
         | 
| 13902 | 
            +
                },
         | 
| 13903 | 
            +
                {
         | 
| 13904 | 
            +
                  "epoch": 0.011442350091259719,
         | 
| 13905 | 
            +
                  "grad_norm": 1.5070706605911255,
         | 
| 13906 | 
            +
                  "learning_rate": 2.5852685944273437e-05,
         | 
| 13907 | 
            +
                  "loss": 1.7515,
         | 
| 13908 | 
            +
                  "step": 178100
         | 
| 13909 | 
            +
                },
         | 
| 13910 | 
            +
                {
         | 
| 13911 | 
            +
                  "epoch": 0.011721431800802639,
         | 
| 13912 | 
            +
                  "grad_norm": 1.675197958946228,
         | 
| 13913 | 
            +
                  "learning_rate": 2.583055801303031e-05,
         | 
| 13914 | 
            +
                  "loss": 1.7517,
         | 
| 13915 | 
            +
                  "step": 178200
         | 
| 13916 | 
            +
                },
         | 
| 13917 | 
            +
                {
         | 
| 13918 | 
            +
                  "epoch": 0.012000513510345558,
         | 
| 13919 | 
            +
                  "grad_norm": 1.6129719018936157,
         | 
| 13920 | 
            +
                  "learning_rate": 2.5808429430362734e-05,
         | 
| 13921 | 
            +
                  "loss": 1.739,
         | 
| 13922 | 
            +
                  "step": 178300
         | 
| 13923 | 
            +
                },
         | 
| 13924 | 
            +
                {
         | 
| 13925 | 
            +
                  "epoch": 0.01227959521988848,
         | 
| 13926 | 
            +
                  "grad_norm": 1.6314342021942139,
         | 
| 13927 | 
            +
                  "learning_rate": 2.5786300213626623e-05,
         | 
| 13928 | 
            +
                  "loss": 1.7373,
         | 
| 13929 | 
            +
                  "step": 178400
         | 
| 13930 | 
            +
                },
         | 
| 13931 | 
            +
                {
         | 
| 13932 | 
            +
                  "epoch": 0.0125586769294314,
         | 
| 13933 | 
            +
                  "grad_norm": 1.4758597612380981,
         | 
| 13934 | 
            +
                  "learning_rate": 2.576417038017841e-05,
         | 
| 13935 | 
            +
                  "loss": 1.7512,
         | 
| 13936 | 
            +
                  "step": 178500
         | 
| 13937 | 
            +
                },
         | 
| 13938 | 
            +
                {
         | 
| 13939 | 
            +
                  "epoch": 0.012837758638974319,
         | 
| 13940 | 
            +
                  "grad_norm": 1.6322437524795532,
         | 
| 13941 | 
            +
                  "learning_rate": 2.574203994737498e-05,
         | 
| 13942 | 
            +
                  "loss": 1.7529,
         | 
| 13943 | 
            +
                  "step": 178600
         | 
| 13944 | 
            +
                },
         | 
| 13945 | 
            +
                {
         | 
| 13946 | 
            +
                  "epoch": 0.013116840348517239,
         | 
| 13947 | 
            +
                  "grad_norm": 1.6611186265945435,
         | 
| 13948 | 
            +
                  "learning_rate": 2.5719908932573716e-05,
         | 
| 13949 | 
            +
                  "loss": 1.7529,
         | 
| 13950 | 
            +
                  "step": 178700
         | 
| 13951 | 
            +
                },
         | 
| 13952 | 
            +
                {
         | 
| 13953 | 
            +
                  "epoch": 0.013395922058060158,
         | 
| 13954 | 
            +
                  "grad_norm": 1.6254630088806152,
         | 
| 13955 | 
            +
                  "learning_rate": 2.5697777353132434e-05,
         | 
| 13956 | 
            +
                  "loss": 1.7548,
         | 
| 13957 | 
            +
                  "step": 178800
         | 
| 13958 | 
            +
                },
         | 
| 13959 | 
            +
                {
         | 
| 13960 | 
            +
                  "epoch": 0.013675003767603078,
         | 
| 13961 | 
            +
                  "grad_norm": 1.6417994499206543,
         | 
| 13962 | 
            +
                  "learning_rate": 2.567564522640942e-05,
         | 
| 13963 | 
            +
                  "loss": 1.7501,
         | 
| 13964 | 
            +
                  "step": 178900
         | 
| 13965 | 
            +
                },
         | 
| 13966 | 
            +
                {
         | 
| 13967 | 
            +
                  "epoch": 0.013954085477146,
         | 
| 13968 | 
            +
                  "grad_norm": 1.5359156131744385,
         | 
| 13969 | 
            +
                  "learning_rate": 2.5653512569763377e-05,
         | 
| 13970 | 
            +
                  "loss": 1.7562,
         | 
| 13971 | 
            +
                  "step": 179000
         | 
| 13972 | 
            +
                },
         | 
| 13973 | 
            +
                {
         | 
| 13974 | 
            +
                  "epoch": 0.013954085477146,
         | 
| 13975 | 
            +
                  "eval_loss": 2.144591808319092,
         | 
| 13976 | 
            +
                  "eval_runtime": 51.5364,
         | 
| 13977 | 
            +
                  "eval_samples_per_second": 197.802,
         | 
| 13978 | 
            +
                  "eval_steps_per_second": 1.552,
         | 
| 13979 | 
            +
                  "step": 179000
         | 
| 13980 | 
            +
                },
         | 
| 13981 | 
            +
                {
         | 
| 13982 | 
            +
                  "epoch": 0.01423316718668892,
         | 
| 13983 | 
            +
                  "grad_norm": 1.5880595445632935,
         | 
| 13984 | 
            +
                  "learning_rate": 2.5631379400553416e-05,
         | 
| 13985 | 
            +
                  "loss": 1.75,
         | 
| 13986 | 
            +
                  "step": 179100
         | 
| 13987 | 
            +
                },
         | 
| 13988 | 
            +
                {
         | 
| 13989 | 
            +
                  "epoch": 0.014512248896231839,
         | 
| 13990 | 
            +
                  "grad_norm": 1.6134679317474365,
         | 
| 13991 | 
            +
                  "learning_rate": 2.560924573613906e-05,
         | 
| 13992 | 
            +
                  "loss": 1.7508,
         | 
| 13993 | 
            +
                  "step": 179200
         | 
| 13994 | 
            +
                },
         | 
| 13995 | 
            +
                {
         | 
| 13996 | 
            +
                  "epoch": 0.014791330605774759,
         | 
| 13997 | 
            +
                  "grad_norm": 1.5464352369308472,
         | 
| 13998 | 
            +
                  "learning_rate": 2.5587111593880205e-05,
         | 
| 13999 | 
            +
                  "loss": 1.7502,
         | 
| 14000 | 
            +
                  "step": 179300
         | 
| 14001 | 
            +
                },
         | 
| 14002 | 
            +
                {
         | 
| 14003 | 
            +
                  "epoch": 0.015070412315317678,
         | 
| 14004 | 
            +
                  "grad_norm": 1.573649525642395,
         | 
| 14005 | 
            +
                  "learning_rate": 2.556497699113714e-05,
         | 
| 14006 | 
            +
                  "loss": 1.7435,
         | 
| 14007 | 
            +
                  "step": 179400
         | 
| 14008 | 
            +
                },
         | 
| 14009 | 
            +
                {
         | 
| 14010 | 
            +
                  "epoch": 0.015349494024860598,
         | 
| 14011 | 
            +
                  "grad_norm": 1.5665711164474487,
         | 
| 14012 | 
            +
                  "learning_rate": 2.554284194527051e-05,
         | 
| 14013 | 
            +
                  "loss": 1.7462,
         | 
| 14014 | 
            +
                  "step": 179500
         | 
| 14015 | 
            +
                },
         | 
| 14016 | 
            +
                {
         | 
| 14017 | 
            +
                  "epoch": 0.01562857573440352,
         | 
| 14018 | 
            +
                  "grad_norm": 1.606072187423706,
         | 
| 14019 | 
            +
                  "learning_rate": 2.5520706473641316e-05,
         | 
| 14020 | 
            +
                  "loss": 1.7516,
         | 
| 14021 | 
            +
                  "step": 179600
         | 
| 14022 | 
            +
                },
         | 
| 14023 | 
            +
                {
         | 
| 14024 | 
            +
                  "epoch": 0.015907657443946437,
         | 
| 14025 | 
            +
                  "grad_norm": 1.5898959636688232,
         | 
| 14026 | 
            +
                  "learning_rate": 2.549857059361086e-05,
         | 
| 14027 | 
            +
                  "loss": 1.7482,
         | 
| 14028 | 
            +
                  "step": 179700
         | 
| 14029 | 
            +
                },
         | 
| 14030 | 
            +
                {
         | 
| 14031 | 
            +
                  "epoch": 0.01618673915348936,
         | 
| 14032 | 
            +
                  "grad_norm": 1.6288598775863647,
         | 
| 14033 | 
            +
                  "learning_rate": 2.547643432254081e-05,
         | 
| 14034 | 
            +
                  "loss": 1.7365,
         | 
| 14035 | 
            +
                  "step": 179800
         | 
| 14036 | 
            +
                },
         | 
| 14037 | 
            +
                {
         | 
| 14038 | 
            +
                  "epoch": 0.01646582086303228,
         | 
| 14039 | 
            +
                  "grad_norm": 1.5765552520751953,
         | 
| 14040 | 
            +
                  "learning_rate": 2.545429767779311e-05,
         | 
| 14041 | 
            +
                  "loss": 1.7346,
         | 
| 14042 | 
            +
                  "step": 179900
         | 
| 14043 | 
            +
                },
         | 
| 14044 | 
            +
                {
         | 
| 14045 | 
            +
                  "epoch": 0.016744902572575198,
         | 
| 14046 | 
            +
                  "grad_norm": 1.5909677743911743,
         | 
| 14047 | 
            +
                  "learning_rate": 2.5432160676729994e-05,
         | 
| 14048 | 
            +
                  "loss": 1.7493,
         | 
| 14049 | 
            +
                  "step": 180000
         | 
| 14050 | 
            +
                },
         | 
| 14051 | 
            +
                {
         | 
| 14052 | 
            +
                  "epoch": 0.016744902572575198,
         | 
| 14053 | 
            +
                  "eval_loss": 2.1469063758850098,
         | 
| 14054 | 
            +
                  "eval_runtime": 52.5101,
         | 
| 14055 | 
            +
                  "eval_samples_per_second": 194.134,
         | 
| 14056 | 
            +
                  "eval_steps_per_second": 1.524,
         | 
| 14057 | 
            +
                  "step": 180000
         | 
| 14058 | 
            +
                },
         | 
| 14059 | 
            +
                {
         | 
| 14060 | 
            +
                  "epoch": 0.01702398428211812,
         | 
| 14061 | 
            +
                  "grad_norm": 1.6108888387680054,
         | 
| 14062 | 
            +
                  "learning_rate": 2.5410023336713996e-05,
         | 
| 14063 | 
            +
                  "loss": 1.749,
         | 
| 14064 | 
            +
                  "step": 180100
         | 
| 14065 | 
            +
                },
         | 
| 14066 | 
            +
                {
         | 
| 14067 | 
            +
                  "epoch": 0.017303065991661037,
         | 
| 14068 | 
            +
                  "grad_norm": 1.5427972078323364,
         | 
| 14069 | 
            +
                  "learning_rate": 2.538788567510791e-05,
         | 
| 14070 | 
            +
                  "loss": 1.738,
         | 
| 14071 | 
            +
                  "step": 180200
         | 
| 14072 | 
            +
                },
         | 
| 14073 | 
            +
                {
         | 
| 14074 | 
            +
                  "epoch": 0.01758214770120396,
         | 
| 14075 | 
            +
                  "grad_norm": 1.5925029516220093,
         | 
| 14076 | 
            +
                  "learning_rate": 2.5365747709274767e-05,
         | 
| 14077 | 
            +
                  "loss": 1.7418,
         | 
| 14078 | 
            +
                  "step": 180300
         | 
| 14079 | 
            +
                },
         | 
| 14080 | 
            +
                {
         | 
| 14081 | 
            +
                  "epoch": 0.017861229410746877,
         | 
| 14082 | 
            +
                  "grad_norm": 1.5784283876419067,
         | 
| 14083 | 
            +
                  "learning_rate": 2.5343609456577867e-05,
         | 
| 14084 | 
            +
                  "loss": 1.7417,
         | 
| 14085 | 
            +
                  "step": 180400
         | 
| 14086 | 
            +
                },
         | 
| 14087 | 
            +
                {
         | 
| 14088 | 
            +
                  "epoch": 0.018140311120289798,
         | 
| 14089 | 
            +
                  "grad_norm": 1.623561978340149,
         | 
| 14090 | 
            +
                  "learning_rate": 2.53214709343807e-05,
         | 
| 14091 | 
            +
                  "loss": 1.7443,
         | 
| 14092 | 
            +
                  "step": 180500
         | 
| 14093 | 
            +
                },
         | 
| 14094 | 
            +
                {
         | 
| 14095 | 
            +
                  "epoch": 0.01841939282983272,
         | 
| 14096 | 
            +
                  "grad_norm": 1.6505674123764038,
         | 
| 14097 | 
            +
                  "learning_rate": 2.5299332160046985e-05,
         | 
| 14098 | 
            +
                  "loss": 1.7454,
         | 
| 14099 | 
            +
                  "step": 180600
         | 
| 14100 | 
            +
                },
         | 
| 14101 | 
            +
                {
         | 
| 14102 | 
            +
                  "epoch": 0.018698474539375638,
         | 
| 14103 | 
            +
                  "grad_norm": 1.5555040836334229,
         | 
| 14104 | 
            +
                  "learning_rate": 2.5277193150940638e-05,
         | 
| 14105 | 
            +
                  "loss": 1.7416,
         | 
| 14106 | 
            +
                  "step": 180700
         | 
| 14107 | 
            +
                },
         | 
| 14108 | 
            +
                {
         | 
| 14109 | 
            +
                  "epoch": 0.01897755624891856,
         | 
| 14110 | 
            +
                  "grad_norm": 1.6162723302841187,
         | 
| 14111 | 
            +
                  "learning_rate": 2.525505392442577e-05,
         | 
| 14112 | 
            +
                  "loss": 1.7433,
         | 
| 14113 | 
            +
                  "step": 180800
         | 
| 14114 | 
            +
                },
         | 
| 14115 | 
            +
                {
         | 
| 14116 | 
            +
                  "epoch": 0.019256637958461477,
         | 
| 14117 | 
            +
                  "grad_norm": 1.5440572500228882,
         | 
| 14118 | 
            +
                  "learning_rate": 2.523291449786663e-05,
         | 
| 14119 | 
            +
                  "loss": 1.7438,
         | 
| 14120 | 
            +
                  "step": 180900
         | 
| 14121 | 
            +
                },
         | 
| 14122 | 
            +
                {
         | 
| 14123 | 
            +
                  "epoch": 0.0195357196680044,
         | 
| 14124 | 
            +
                  "grad_norm": 1.596146583557129,
         | 
| 14125 | 
            +
                  "learning_rate": 2.5210774888627664e-05,
         | 
| 14126 | 
            +
                  "loss": 1.7425,
         | 
| 14127 | 
            +
                  "step": 181000
         | 
| 14128 | 
            +
                },
         | 
| 14129 | 
            +
                {
         | 
| 14130 | 
            +
                  "epoch": 0.0195357196680044,
         | 
| 14131 | 
            +
                  "eval_loss": 2.140672206878662,
         | 
| 14132 | 
            +
                  "eval_runtime": 51.8004,
         | 
| 14133 | 
            +
                  "eval_samples_per_second": 196.794,
         | 
| 14134 | 
            +
                  "eval_steps_per_second": 1.544,
         | 
| 14135 | 
            +
                  "step": 181000
         | 
| 14136 | 
            +
                },
         | 
| 14137 | 
            +
                {
         | 
| 14138 | 
            +
                  "epoch": 0.01981480137754732,
         | 
| 14139 | 
            +
                  "grad_norm": 1.6086748838424683,
         | 
| 14140 | 
            +
                  "learning_rate": 2.5188635114073434e-05,
         | 
| 14141 | 
            +
                  "loss": 1.7488,
         | 
| 14142 | 
            +
                  "step": 181100
         | 
| 14143 | 
            +
                },
         | 
| 14144 | 
            +
                {
         | 
| 14145 | 
            +
                  "epoch": 0.020093883087090238,
         | 
| 14146 | 
            +
                  "grad_norm": 1.564663290977478,
         | 
| 14147 | 
            +
                  "learning_rate": 2.516649519156864e-05,
         | 
| 14148 | 
            +
                  "loss": 1.7452,
         | 
| 14149 | 
            +
                  "step": 181200
         | 
| 14150 | 
            +
                },
         | 
| 14151 | 
            +
                {
         | 
| 14152 | 
            +
                  "epoch": 0.02037296479663316,
         | 
| 14153 | 
            +
                  "grad_norm": 1.5975944995880127,
         | 
| 14154 | 
            +
                  "learning_rate": 2.51443551384781e-05,
         | 
| 14155 | 
            +
                  "loss": 1.7419,
         | 
| 14156 | 
            +
                  "step": 181300
         | 
| 14157 | 
            +
                },
         | 
| 14158 | 
            +
                {
         | 
| 14159 | 
            +
                  "epoch": 0.020652046506176077,
         | 
| 14160 | 
            +
                  "grad_norm": 1.6056960821151733,
         | 
| 14161 | 
            +
                  "learning_rate": 2.5122214972166724e-05,
         | 
| 14162 | 
            +
                  "loss": 1.7536,
         | 
| 14163 | 
            +
                  "step": 181400
         | 
| 14164 | 
            +
                },
         | 
| 14165 | 
            +
                {
         | 
| 14166 | 
            +
                  "epoch": 0.020931128215719,
         | 
| 14167 | 
            +
                  "grad_norm": 1.6348010301589966,
         | 
| 14168 | 
            +
                  "learning_rate": 2.5100074709999526e-05,
         | 
| 14169 | 
            +
                  "loss": 1.7505,
         | 
| 14170 | 
            +
                  "step": 181500
         | 
| 14171 | 
            +
                },
         | 
| 14172 | 
            +
                {
         | 
| 14173 | 
            +
                  "epoch": 0.02121020992526192,
         | 
| 14174 | 
            +
                  "grad_norm": 1.4651880264282227,
         | 
| 14175 | 
            +
                  "learning_rate": 2.5077934369341594e-05,
         | 
| 14176 | 
            +
                  "loss": 1.7474,
         | 
| 14177 | 
            +
                  "step": 181600
         | 
| 14178 | 
            +
                },
         | 
| 14179 | 
            +
                {
         | 
| 14180 | 
            +
                  "epoch": 0.021489291634804838,
         | 
| 14181 | 
            +
                  "grad_norm": 1.6000345945358276,
         | 
| 14182 | 
            +
                  "learning_rate": 2.505579396755806e-05,
         | 
| 14183 | 
            +
                  "loss": 1.7455,
         | 
| 14184 | 
            +
                  "step": 181700
         | 
| 14185 | 
            +
                },
         | 
| 14186 | 
            +
                {
         | 
| 14187 | 
            +
                  "epoch": 0.02176837334434776,
         | 
| 14188 | 
            +
                  "grad_norm": 1.6549137830734253,
         | 
| 14189 | 
            +
                  "learning_rate": 2.503365352201413e-05,
         | 
| 14190 | 
            +
                  "loss": 1.7404,
         | 
| 14191 | 
            +
                  "step": 181800
         | 
| 14192 | 
            +
                },
         | 
| 14193 | 
            +
                {
         | 
| 14194 | 
            +
                  "epoch": 0.022047455053890677,
         | 
| 14195 | 
            +
                  "grad_norm": 1.6172484159469604,
         | 
| 14196 | 
            +
                  "learning_rate": 2.5011513050075014e-05,
         | 
| 14197 | 
            +
                  "loss": 1.7457,
         | 
| 14198 | 
            +
                  "step": 181900
         | 
| 14199 | 
            +
                },
         | 
| 14200 | 
            +
                {
         | 
| 14201 | 
            +
                  "epoch": 0.0223265367634336,
         | 
| 14202 | 
            +
                  "grad_norm": 1.6283797025680542,
         | 
| 14203 | 
            +
                  "learning_rate": 2.4989372569105962e-05,
         | 
| 14204 | 
            +
                  "loss": 1.7411,
         | 
| 14205 | 
            +
                  "step": 182000
         | 
| 14206 | 
            +
                },
         | 
| 14207 | 
            +
                {
         | 
| 14208 | 
            +
                  "epoch": 0.0223265367634336,
         | 
| 14209 | 
            +
                  "eval_loss": 2.1432528495788574,
         | 
| 14210 | 
            +
                  "eval_runtime": 51.7742,
         | 
| 14211 | 
            +
                  "eval_samples_per_second": 196.894,
         | 
| 14212 | 
            +
                  "eval_steps_per_second": 1.545,
         | 
| 14213 | 
            +
                  "step": 182000
         | 
| 14214 | 
            +
                },
         | 
| 14215 | 
            +
                {
         | 
| 14216 | 
            +
                  "epoch": 0.022605618472976517,
         | 
| 14217 | 
            +
                  "grad_norm": 1.5319279432296753,
         | 
| 14218 | 
            +
                  "learning_rate": 2.4967232096472236e-05,
         | 
| 14219 | 
            +
                  "loss": 1.76,
         | 
| 14220 | 
            +
                  "step": 182100
         | 
| 14221 | 
            +
                },
         | 
| 14222 | 
            +
                {
         | 
| 14223 | 
            +
                  "epoch": 0.022884700182519438,
         | 
| 14224 | 
            +
                  "grad_norm": 1.600860595703125,
         | 
| 14225 | 
            +
                  "learning_rate": 2.4945091649539086e-05,
         | 
| 14226 | 
            +
                  "loss": 1.7416,
         | 
| 14227 | 
            +
                  "step": 182200
         | 
| 14228 | 
            +
                },
         | 
| 14229 | 
            +
                {
         | 
| 14230 | 
            +
                  "epoch": 0.02316378189206236,
         | 
| 14231 | 
            +
                  "grad_norm": 1.5592856407165527,
         | 
| 14232 | 
            +
                  "learning_rate": 2.4922951245671723e-05,
         | 
| 14233 | 
            +
                  "loss": 1.7421,
         | 
| 14234 | 
            +
                  "step": 182300
         | 
| 14235 | 
            +
                },
         | 
| 14236 | 
            +
                {
         | 
| 14237 | 
            +
                  "epoch": 0.023442863601605277,
         | 
| 14238 | 
            +
                  "grad_norm": 1.5361909866333008,
         | 
| 14239 | 
            +
                  "learning_rate": 2.4900810902235356e-05,
         | 
| 14240 | 
            +
                  "loss": 1.7436,
         | 
| 14241 | 
            +
                  "step": 182400
         | 
| 14242 | 
            +
                },
         | 
| 14243 | 
            +
                {
         | 
| 14244 | 
            +
                  "epoch": 0.0237219453111482,
         | 
| 14245 | 
            +
                  "grad_norm": 1.526672124862671,
         | 
| 14246 | 
            +
                  "learning_rate": 2.4878670636595117e-05,
         | 
| 14247 | 
            +
                  "loss": 1.7418,
         | 
| 14248 | 
            +
                  "step": 182500
         | 
| 14249 | 
            +
                },
         | 
| 14250 | 
            +
                {
         | 
| 14251 | 
            +
                  "epoch": 0.024001027020691117,
         | 
| 14252 | 
            +
                  "grad_norm": 1.5167595148086548,
         | 
| 14253 | 
            +
                  "learning_rate": 2.4856530466116112e-05,
         | 
| 14254 | 
            +
                  "loss": 1.7389,
         | 
| 14255 | 
            +
                  "step": 182600
         | 
| 14256 | 
            +
                },
         | 
| 14257 | 
            +
                {
         | 
| 14258 | 
            +
                  "epoch": 0.024280108730234038,
         | 
| 14259 | 
            +
                  "grad_norm": 1.6046936511993408,
         | 
| 14260 | 
            +
                  "learning_rate": 2.4834390408163324e-05,
         | 
| 14261 | 
            +
                  "loss": 1.7459,
         | 
| 14262 | 
            +
                  "step": 182700
         | 
| 14263 | 
            +
                },
         | 
| 14264 | 
            +
                {
         | 
| 14265 | 
            +
                  "epoch": 0.02455919043977696,
         | 
| 14266 | 
            +
                  "grad_norm": 1.572601079940796,
         | 
| 14267 | 
            +
                  "learning_rate": 2.4812250480101693e-05,
         | 
| 14268 | 
            +
                  "loss": 1.7464,
         | 
| 14269 | 
            +
                  "step": 182800
         | 
| 14270 | 
            +
                },
         | 
| 14271 | 
            +
                {
         | 
| 14272 | 
            +
                  "epoch": 0.024838272149319877,
         | 
| 14273 | 
            +
                  "grad_norm": 1.5549017190933228,
         | 
| 14274 | 
            +
                  "learning_rate": 2.479011069929603e-05,
         | 
| 14275 | 
            +
                  "loss": 1.7356,
         | 
| 14276 | 
            +
                  "step": 182900
         | 
| 14277 | 
            +
                },
         | 
| 14278 | 
            +
                {
         | 
| 14279 | 
            +
                  "epoch": 0.0251173538588628,
         | 
| 14280 | 
            +
                  "grad_norm": 1.5163230895996094,
         | 
| 14281 | 
            +
                  "learning_rate": 2.476797108311106e-05,
         | 
| 14282 | 
            +
                  "loss": 1.7427,
         | 
| 14283 | 
            +
                  "step": 183000
         | 
| 14284 | 
            +
                },
         | 
| 14285 | 
            +
                {
         | 
| 14286 | 
            +
                  "epoch": 0.0251173538588628,
         | 
| 14287 | 
            +
                  "eval_loss": 2.1313729286193848,
         | 
| 14288 | 
            +
                  "eval_runtime": 51.744,
         | 
| 14289 | 
            +
                  "eval_samples_per_second": 197.009,
         | 
| 14290 | 
            +
                  "eval_steps_per_second": 1.546,
         | 
| 14291 | 
            +
                  "step": 183000
         | 
| 14292 | 
            +
                },
         | 
| 14293 | 
            +
                {
         | 
| 14294 | 
            +
                  "epoch": 0.025396435568405717,
         | 
| 14295 | 
            +
                  "grad_norm": 1.5936397314071655,
         | 
| 14296 | 
            +
                  "learning_rate": 2.474583164891133e-05,
         | 
| 14297 | 
            +
                  "loss": 1.7446,
         | 
| 14298 | 
            +
                  "step": 183100
         | 
| 14299 | 
            +
                },
         | 
| 14300 | 
            +
                {
         | 
| 14301 | 
            +
                  "epoch": 0.025675517277948638,
         | 
| 14302 | 
            +
                  "grad_norm": 1.5533971786499023,
         | 
| 14303 | 
            +
                  "learning_rate": 2.4723692414061295e-05,
         | 
| 14304 | 
            +
                  "loss": 1.7452,
         | 
| 14305 | 
            +
                  "step": 183200
         | 
| 14306 | 
            +
                },
         | 
| 14307 | 
            +
                {
         | 
| 14308 | 
            +
                  "epoch": 0.025954598987491556,
         | 
| 14309 | 
            +
                  "grad_norm": 1.6152623891830444,
         | 
| 14310 | 
            +
                  "learning_rate": 2.4701553395925214e-05,
         | 
| 14311 | 
            +
                  "loss": 1.7425,
         | 
| 14312 | 
            +
                  "step": 183300
         | 
| 14313 | 
            +
                },
         | 
| 14314 | 
            +
                {
         | 
| 14315 | 
            +
                  "epoch": 0.026233680697034478,
         | 
| 14316 | 
            +
                  "grad_norm": 1.4908332824707031,
         | 
| 14317 | 
            +
                  "learning_rate": 2.4679414611867214e-05,
         | 
| 14318 | 
            +
                  "loss": 1.755,
         | 
| 14319 | 
            +
                  "step": 183400
         | 
| 14320 | 
            +
                },
         | 
| 14321 | 
            +
                {
         | 
| 14322 | 
            +
                  "epoch": 0.0265127624065774,
         | 
| 14323 | 
            +
                  "grad_norm": 1.6560674905776978,
         | 
| 14324 | 
            +
                  "learning_rate": 2.4657276079251194e-05,
         | 
| 14325 | 
            +
                  "loss": 1.7477,
         | 
| 14326 | 
            +
                  "step": 183500
         | 
| 14327 | 
            +
                },
         | 
| 14328 | 
            +
                {
         | 
| 14329 | 
            +
                  "epoch": 0.026791844116120317,
         | 
| 14330 | 
            +
                  "grad_norm": 1.7160277366638184,
         | 
| 14331 | 
            +
                  "learning_rate": 2.4635137815440894e-05,
         | 
| 14332 | 
            +
                  "loss": 1.7446,
         | 
| 14333 | 
            +
                  "step": 183600
         | 
| 14334 | 
            +
                },
         | 
| 14335 | 
            +
                {
         | 
| 14336 | 
            +
                  "epoch": 0.02707092582566324,
         | 
| 14337 | 
            +
                  "grad_norm": 1.4447243213653564,
         | 
| 14338 | 
            +
                  "learning_rate": 2.461299983779983e-05,
         | 
| 14339 | 
            +
                  "loss": 1.7403,
         | 
| 14340 | 
            +
                  "step": 183700
         | 
| 14341 | 
            +
                },
         | 
| 14342 | 
            +
                {
         | 
| 14343 | 
            +
                  "epoch": 0.027350007535206156,
         | 
| 14344 | 
            +
                  "grad_norm": 1.605068325996399,
         | 
| 14345 | 
            +
                  "learning_rate": 2.459086216369129e-05,
         | 
| 14346 | 
            +
                  "loss": 1.7439,
         | 
| 14347 | 
            +
                  "step": 183800
         | 
| 14348 | 
            +
                },
         | 
| 14349 | 
            +
                {
         | 
| 14350 | 
            +
                  "epoch": 0.027629089244749078,
         | 
| 14351 | 
            +
                  "grad_norm": 1.6601132154464722,
         | 
| 14352 | 
            +
                  "learning_rate": 2.4568724810478325e-05,
         | 
| 14353 | 
            +
                  "loss": 1.7439,
         | 
| 14354 | 
            +
                  "step": 183900
         | 
| 14355 | 
            +
                },
         | 
| 14356 | 
            +
                {
         | 
| 14357 | 
            +
                  "epoch": 0.027908170954292,
         | 
| 14358 | 
            +
                  "grad_norm": 1.546660304069519,
         | 
| 14359 | 
            +
                  "learning_rate": 2.4546587795523733e-05,
         | 
| 14360 | 
            +
                  "loss": 1.7339,
         | 
| 14361 | 
            +
                  "step": 184000
         | 
| 14362 | 
            +
                },
         | 
| 14363 | 
            +
                {
         | 
| 14364 | 
            +
                  "epoch": 0.027908170954292,
         | 
| 14365 | 
            +
                  "eval_loss": 2.1373305320739746,
         | 
| 14366 | 
            +
                  "eval_runtime": 51.7742,
         | 
| 14367 | 
            +
                  "eval_samples_per_second": 196.893,
         | 
| 14368 | 
            +
                  "eval_steps_per_second": 1.545,
         | 
| 14369 | 
            +
                  "step": 184000
         | 
| 14370 | 
            +
                },
         | 
| 14371 | 
            +
                {
         | 
| 14372 | 
            +
                  "epoch": 0.00027908170954291995,
         | 
| 14373 | 
            +
                  "grad_norm": 1.6656001806259155,
         | 
| 14374 | 
            +
                  "learning_rate": 2.4524451136190048e-05,
         | 
| 14375 | 
            +
                  "loss": 1.8435,
         | 
| 14376 | 
            +
                  "step": 184100
         | 
| 14377 | 
            +
                },
         | 
| 14378 | 
            +
                {
         | 
| 14379 | 
            +
                  "epoch": 0.0005581634190858399,
         | 
| 14380 | 
            +
                  "grad_norm": 1.6392732858657837,
         | 
| 14381 | 
            +
                  "learning_rate": 2.4502314849839546e-05,
         | 
| 14382 | 
            +
                  "loss": 1.8453,
         | 
| 14383 | 
            +
                  "step": 184200
         | 
| 14384 | 
            +
                },
         | 
| 14385 | 
            +
                {
         | 
| 14386 | 
            +
                  "epoch": 0.0008372451286287599,
         | 
| 14387 | 
            +
                  "grad_norm": 1.7409366369247437,
         | 
| 14388 | 
            +
                  "learning_rate": 2.4480178953834162e-05,
         | 
| 14389 | 
            +
                  "loss": 1.8407,
         | 
| 14390 | 
            +
                  "step": 184300
         | 
| 14391 | 
            +
                },
         | 
| 14392 | 
            +
                {
         | 
| 14393 | 
            +
                  "epoch": 0.0011163268381716798,
         | 
| 14394 | 
            +
                  "grad_norm": 1.5873730182647705,
         | 
| 14395 | 
            +
                  "learning_rate": 2.445804346553557e-05,
         | 
| 14396 | 
            +
                  "loss": 1.8428,
         | 
| 14397 | 
            +
                  "step": 184400
         | 
| 14398 | 
            +
                },
         | 
| 14399 | 
            +
                {
         | 
| 14400 | 
            +
                  "epoch": 0.0013954085477146,
         | 
| 14401 | 
            +
                  "grad_norm": 1.5073753595352173,
         | 
| 14402 | 
            +
                  "learning_rate": 2.4435908402305108e-05,
         | 
| 14403 | 
            +
                  "loss": 1.8379,
         | 
| 14404 | 
            +
                  "step": 184500
         | 
| 14405 | 
            +
                },
         | 
| 14406 | 
            +
                {
         | 
| 14407 | 
            +
                  "epoch": 0.0016744902572575198,
         | 
| 14408 | 
            +
                  "grad_norm": 2.3680567741394043,
         | 
| 14409 | 
            +
                  "learning_rate": 2.4413773781503788e-05,
         | 
| 14410 | 
            +
                  "loss": 1.83,
         | 
| 14411 | 
            +
                  "step": 184600
         | 
| 14412 | 
            +
                },
         | 
| 14413 | 
            +
                {
         | 
| 14414 | 
            +
                  "epoch": 0.00195357196680044,
         | 
| 14415 | 
            +
                  "grad_norm": 1.6823689937591553,
         | 
| 14416 | 
            +
                  "learning_rate": 2.4391639620492243e-05,
         | 
| 14417 | 
            +
                  "loss": 1.8411,
         | 
| 14418 | 
            +
                  "step": 184700
         | 
| 14419 | 
            +
                },
         | 
| 14420 | 
            +
                {
         | 
| 14421 | 
            +
                  "epoch": 0.0022326536763433596,
         | 
| 14422 | 
            +
                  "grad_norm": 1.5574064254760742,
         | 
| 14423 | 
            +
                  "learning_rate": 2.4369505936630786e-05,
         | 
| 14424 | 
            +
                  "loss": 1.8351,
         | 
| 14425 | 
            +
                  "step": 184800
         | 
| 14426 | 
            +
                },
         | 
| 14427 | 
            +
                {
         | 
| 14428 | 
            +
                  "epoch": 0.0025117353858862797,
         | 
| 14429 | 
            +
                  "grad_norm": 2.146876096725464,
         | 
| 14430 | 
            +
                  "learning_rate": 2.4347372747279337e-05,
         | 
| 14431 | 
            +
                  "loss": 1.833,
         | 
| 14432 | 
            +
                  "step": 184900
         | 
| 14433 | 
            +
                },
         | 
| 14434 | 
            +
                {
         | 
| 14435 | 
            +
                  "epoch": 0.0027908170954292,
         | 
| 14436 | 
            +
                  "grad_norm": 1.6746612787246704,
         | 
| 14437 | 
            +
                  "learning_rate": 2.4325240069797438e-05,
         | 
| 14438 | 
            +
                  "loss": 1.8284,
         | 
| 14439 | 
            +
                  "step": 185000
         | 
| 14440 | 
            +
                },
         | 
| 14441 | 
            +
                {
         | 
| 14442 | 
            +
                  "epoch": 0.0027908170954292,
         | 
| 14443 | 
            +
                  "eval_loss": 2.133864641189575,
         | 
| 14444 | 
            +
                  "eval_runtime": 52.0009,
         | 
| 14445 | 
            +
                  "eval_samples_per_second": 196.035,
         | 
| 14446 | 
            +
                  "eval_steps_per_second": 1.538,
         | 
| 14447 | 
            +
                  "step": 185000
         | 
| 14448 | 
            +
                },
         | 
| 14449 | 
            +
                {
         | 
| 14450 | 
            +
                  "epoch": 0.00306989880497212,
         | 
| 14451 | 
            +
                  "grad_norm": 1.6454411745071411,
         | 
| 14452 | 
            +
                  "learning_rate": 2.430310792154422e-05,
         | 
| 14453 | 
            +
                  "loss": 1.8312,
         | 
| 14454 | 
            +
                  "step": 185100
         | 
| 14455 | 
            +
                },
         | 
| 14456 | 
            +
                {
         | 
| 14457 | 
            +
                  "epoch": 0.0033489805145150396,
         | 
| 14458 | 
            +
                  "grad_norm": 1.8907885551452637,
         | 
| 14459 | 
            +
                  "learning_rate": 2.4280976319878392e-05,
         | 
| 14460 | 
            +
                  "loss": 1.8384,
         | 
| 14461 | 
            +
                  "step": 185200
         | 
| 14462 | 
            +
                },
         | 
| 14463 | 
            +
                {
         | 
| 14464 | 
            +
                  "epoch": 0.0036280622240579597,
         | 
| 14465 | 
            +
                  "grad_norm": 1.6488444805145264,
         | 
| 14466 | 
            +
                  "learning_rate": 2.425884528215825e-05,
         | 
| 14467 | 
            +
                  "loss": 1.8241,
         | 
| 14468 | 
            +
                  "step": 185300
         | 
| 14469 | 
            +
                },
         | 
| 14470 | 
            +
                {
         | 
| 14471 | 
            +
                  "epoch": 0.00390714393360088,
         | 
| 14472 | 
            +
                  "grad_norm": 1.6460552215576172,
         | 
| 14473 | 
            +
                  "learning_rate": 2.423671482574164e-05,
         | 
| 14474 | 
            +
                  "loss": 1.8318,
         | 
| 14475 | 
            +
                  "step": 185400
         | 
| 14476 | 
            +
                },
         | 
| 14477 | 
            +
                {
         | 
| 14478 | 
            +
                  "epoch": 0.0041862256431437995,
         | 
| 14479 | 
            +
                  "grad_norm": 1.6229537725448608,
         | 
| 14480 | 
            +
                  "learning_rate": 2.4214584967985962e-05,
         | 
| 14481 | 
            +
                  "loss": 1.8349,
         | 
| 14482 | 
            +
                  "step": 185500
         | 
| 14483 | 
            +
                },
         | 
| 14484 | 
            +
                {
         | 
| 14485 | 
            +
                  "epoch": 0.004465307352686719,
         | 
| 14486 | 
            +
                  "grad_norm": 1.5805400609970093,
         | 
| 14487 | 
            +
                  "learning_rate": 2.419245572624812e-05,
         | 
| 14488 | 
            +
                  "loss": 1.823,
         | 
| 14489 | 
            +
                  "step": 185600
         | 
| 14490 | 
            +
                },
         | 
| 14491 | 
            +
                {
         | 
| 14492 | 
            +
                  "epoch": 0.00474438906222964,
         | 
| 14493 | 
            +
                  "grad_norm": 1.8274881839752197,
         | 
| 14494 | 
            +
                  "learning_rate": 2.4170327117884562e-05,
         | 
| 14495 | 
            +
                  "loss": 1.8363,
         | 
| 14496 | 
            +
                  "step": 185700
         | 
| 14497 | 
            +
                },
         | 
| 14498 | 
            +
                {
         | 
| 14499 | 
            +
                  "epoch": 0.005023470771772559,
         | 
| 14500 | 
            +
                  "grad_norm": 1.5922763347625732,
         | 
| 14501 | 
            +
                  "learning_rate": 2.4148199160251238e-05,
         | 
| 14502 | 
            +
                  "loss": 1.8272,
         | 
| 14503 | 
            +
                  "step": 185800
         | 
| 14504 | 
            +
                },
         | 
| 14505 | 
            +
                {
         | 
| 14506 | 
            +
                  "epoch": 0.00530255248131548,
         | 
| 14507 | 
            +
                  "grad_norm": 1.6500530242919922,
         | 
| 14508 | 
            +
                  "learning_rate": 2.4126071870703574e-05,
         | 
| 14509 | 
            +
                  "loss": 1.821,
         | 
| 14510 | 
            +
                  "step": 185900
         | 
| 14511 | 
            +
                },
         | 
| 14512 | 
            +
                {
         | 
| 14513 | 
            +
                  "epoch": 0.0055816341908584,
         | 
| 14514 | 
            +
                  "grad_norm": 1.6244685649871826,
         | 
| 14515 | 
            +
                  "learning_rate": 2.410394526659647e-05,
         | 
| 14516 | 
            +
                  "loss": 1.8287,
         | 
| 14517 | 
            +
                  "step": 186000
         | 
| 14518 | 
            +
                },
         | 
| 14519 | 
            +
                {
         | 
| 14520 | 
            +
                  "epoch": 0.0055816341908584,
         | 
| 14521 | 
            +
                  "eval_loss": 2.131998300552368,
         | 
| 14522 | 
            +
                  "eval_runtime": 51.5187,
         | 
| 14523 | 
            +
                  "eval_samples_per_second": 197.87,
         | 
| 14524 | 
            +
                  "eval_steps_per_second": 1.553,
         | 
| 14525 | 
            +
                  "step": 186000
         | 
| 14526 | 
             
                }
         | 
| 14527 | 
             
              ],
         | 
| 14528 | 
             
              "logging_steps": 100,
         | 
|  | |
| 14542 | 
             
                  "attributes": {}
         | 
| 14543 | 
             
                }
         | 
| 14544 | 
             
              },
         | 
| 14545 | 
            +
              "total_flos": 1.6232668270166016e+19,
         | 
| 14546 | 
             
              "train_batch_size": 128,
         | 
| 14547 | 
             
              "trial_name": null,
         | 
| 14548 | 
             
              "trial_params": null
         | 
    	
        training_args.bin
    CHANGED
    
    | @@ -1,3 +1,3 @@ | |
| 1 | 
             
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            -
            oid sha256: | 
| 3 | 
             
            size 5777
         | 
|  | |
| 1 | 
             
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:6be0aaef9589a43e4cde380bc3e83ccd55ea3b262dc3f11f0bbc4b35fc934376
         | 
| 3 | 
             
            size 5777
         |