Upload folder using huggingface_hub
Browse files- config.json +41 -0
- merges.txt +0 -0
- model.safetensors +3 -0
- optimizer.pt +3 -0
- rng_state_0.pth +3 -0
- rng_state_1.pth +3 -0
- rng_state_2.pth +3 -0
- rng_state_3.pth +3 -0
- scheduler.pt +3 -0
- special_tokens_map.json +24 -0
- tokenizer_config.json +22 -0
- trainer_state.json +35 -0
- training_args.bin +3 -0
- vocab.json +0 -0
    	
        config.json
    ADDED
    
    | @@ -0,0 +1,41 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            {
         | 
| 2 | 
            +
              "_name_or_path": "gpt2",
         | 
| 3 | 
            +
              "activation_function": "gelu_new",
         | 
| 4 | 
            +
              "architectures": [
         | 
| 5 | 
            +
                "GPT2ForSequenceClassification"
         | 
| 6 | 
            +
              ],
         | 
| 7 | 
            +
              "attn_pdrop": 0.1,
         | 
| 8 | 
            +
              "bos_token_id": 50256,
         | 
| 9 | 
            +
              "embd_pdrop": 0.1,
         | 
| 10 | 
            +
              "eos_token_id": 50256,
         | 
| 11 | 
            +
              "initializer_range": 0.02,
         | 
| 12 | 
            +
              "layer_norm_epsilon": 1e-05,
         | 
| 13 | 
            +
              "model_type": "gpt2",
         | 
| 14 | 
            +
              "n_ctx": 1024,
         | 
| 15 | 
            +
              "n_embd": 768,
         | 
| 16 | 
            +
              "n_head": 12,
         | 
| 17 | 
            +
              "n_inner": null,
         | 
| 18 | 
            +
              "n_layer": 12,
         | 
| 19 | 
            +
              "n_positions": 1024,
         | 
| 20 | 
            +
              "pad_token_id": 50256,
         | 
| 21 | 
            +
              "problem_type": "single_label_classification",
         | 
| 22 | 
            +
              "reorder_and_upcast_attn": false,
         | 
| 23 | 
            +
              "resid_pdrop": 0.1,
         | 
| 24 | 
            +
              "scale_attn_by_inverse_layer_idx": false,
         | 
| 25 | 
            +
              "scale_attn_weights": true,
         | 
| 26 | 
            +
              "summary_activation": null,
         | 
| 27 | 
            +
              "summary_first_dropout": 0.1,
         | 
| 28 | 
            +
              "summary_proj_to_labels": true,
         | 
| 29 | 
            +
              "summary_type": "cls_index",
         | 
| 30 | 
            +
              "summary_use_proj": true,
         | 
| 31 | 
            +
              "task_specific_params": {
         | 
| 32 | 
            +
                "text-generation": {
         | 
| 33 | 
            +
                  "do_sample": true,
         | 
| 34 | 
            +
                  "max_length": 50
         | 
| 35 | 
            +
                }
         | 
| 36 | 
            +
              },
         | 
| 37 | 
            +
              "torch_dtype": "float32",
         | 
| 38 | 
            +
              "transformers_version": "4.40.1",
         | 
| 39 | 
            +
              "use_cache": true,
         | 
| 40 | 
            +
              "vocab_size": 50257
         | 
| 41 | 
            +
            }
         | 
    	
        merges.txt
    ADDED
    
    | The diff for this file is too large to render. 
		See raw diff | 
|  | 
    	
        model.safetensors
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:67badfde99347aba68b639ae4023f08582b9a6fb7c9710e56b2ac5712f912b94
         | 
| 3 | 
            +
            size 497780432
         | 
    	
        optimizer.pt
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:4802901405ba585faa99de7e1bb8e3f1778bd1fd1662498efbdae95eef961581
         | 
| 3 | 
            +
            size 995655418
         | 
    	
        rng_state_0.pth
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:7a7f47443371a12d35bc75667be8489956fa5e433f2cda3b43dd9eaa8c6ccbf5
         | 
| 3 | 
            +
            size 15024
         | 
    	
        rng_state_1.pth
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:07853066a8b032533a446eab221645b7dd0d420ef9b717050a0d481a56a39f65
         | 
| 3 | 
            +
            size 15024
         | 
    	
        rng_state_2.pth
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:0d662493e86d65e4ce4de9657c3a7130d6844966516059bd5301d5f886b47f32
         | 
| 3 | 
            +
            size 15024
         | 
    	
        rng_state_3.pth
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:e9f5fa18542c0229c1737af5d539df43358c37925ee1d4bfc827e811232145f0
         | 
| 3 | 
            +
            size 15024
         | 
    	
        scheduler.pt
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:91cd73dbc1e491d7b48c74d4be70128eb1295ec698ab128cf64db18e8ca70247
         | 
| 3 | 
            +
            size 1064
         | 
    	
        special_tokens_map.json
    ADDED
    
    | @@ -0,0 +1,24 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            {
         | 
| 2 | 
            +
              "bos_token": {
         | 
| 3 | 
            +
                "content": "<|endoftext|>",
         | 
| 4 | 
            +
                "lstrip": false,
         | 
| 5 | 
            +
                "normalized": true,
         | 
| 6 | 
            +
                "rstrip": false,
         | 
| 7 | 
            +
                "single_word": false
         | 
| 8 | 
            +
              },
         | 
| 9 | 
            +
              "eos_token": {
         | 
| 10 | 
            +
                "content": "<|endoftext|>",
         | 
| 11 | 
            +
                "lstrip": false,
         | 
| 12 | 
            +
                "normalized": true,
         | 
| 13 | 
            +
                "rstrip": false,
         | 
| 14 | 
            +
                "single_word": false
         | 
| 15 | 
            +
              },
         | 
| 16 | 
            +
              "pad_token": "<|endoftext|>",
         | 
| 17 | 
            +
              "unk_token": {
         | 
| 18 | 
            +
                "content": "<|endoftext|>",
         | 
| 19 | 
            +
                "lstrip": false,
         | 
| 20 | 
            +
                "normalized": true,
         | 
| 21 | 
            +
                "rstrip": false,
         | 
| 22 | 
            +
                "single_word": false
         | 
| 23 | 
            +
              }
         | 
| 24 | 
            +
            }
         | 
    	
        tokenizer_config.json
    ADDED
    
    | @@ -0,0 +1,22 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            {
         | 
| 2 | 
            +
              "add_bos_token": false,
         | 
| 3 | 
            +
              "add_prefix_space": false,
         | 
| 4 | 
            +
              "added_tokens_decoder": {
         | 
| 5 | 
            +
                "50256": {
         | 
| 6 | 
            +
                  "content": "<|endoftext|>",
         | 
| 7 | 
            +
                  "lstrip": false,
         | 
| 8 | 
            +
                  "normalized": true,
         | 
| 9 | 
            +
                  "rstrip": false,
         | 
| 10 | 
            +
                  "single_word": false,
         | 
| 11 | 
            +
                  "special": true
         | 
| 12 | 
            +
                }
         | 
| 13 | 
            +
              },
         | 
| 14 | 
            +
              "bos_token": "<|endoftext|>",
         | 
| 15 | 
            +
              "clean_up_tokenization_spaces": true,
         | 
| 16 | 
            +
              "eos_token": "<|endoftext|>",
         | 
| 17 | 
            +
              "errors": "replace",
         | 
| 18 | 
            +
              "model_max_length": 1024,
         | 
| 19 | 
            +
              "pad_token": "<|endoftext|>",
         | 
| 20 | 
            +
              "tokenizer_class": "GPT2Tokenizer",
         | 
| 21 | 
            +
              "unk_token": "<|endoftext|>"
         | 
| 22 | 
            +
            }
         | 
    	
        trainer_state.json
    ADDED
    
    | @@ -0,0 +1,35 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            {
         | 
| 2 | 
            +
              "best_metric": null,
         | 
| 3 | 
            +
              "best_model_checkpoint": null,
         | 
| 4 | 
            +
              "epoch": 2.557544757033248,
         | 
| 5 | 
            +
              "eval_steps": 500,
         | 
| 6 | 
            +
              "global_step": 1000,
         | 
| 7 | 
            +
              "is_hyper_param_search": false,
         | 
| 8 | 
            +
              "is_local_process_zero": true,
         | 
| 9 | 
            +
              "is_world_process_zero": true,
         | 
| 10 | 
            +
              "log_history": [
         | 
| 11 | 
            +
                {
         | 
| 12 | 
            +
                  "epoch": 1.278772378516624,
         | 
| 13 | 
            +
                  "grad_norm": 7.643950939178467,
         | 
| 14 | 
            +
                  "learning_rate": 1.1474850809889173e-05,
         | 
| 15 | 
            +
                  "loss": 0.3095,
         | 
| 16 | 
            +
                  "step": 500
         | 
| 17 | 
            +
                },
         | 
| 18 | 
            +
                {
         | 
| 19 | 
            +
                  "epoch": 2.557544757033248,
         | 
| 20 | 
            +
                  "grad_norm": 4.580963611602783,
         | 
| 21 | 
            +
                  "learning_rate": 2.949701619778346e-06,
         | 
| 22 | 
            +
                  "loss": 0.1631,
         | 
| 23 | 
            +
                  "step": 1000
         | 
| 24 | 
            +
                }
         | 
| 25 | 
            +
              ],
         | 
| 26 | 
            +
              "logging_steps": 500,
         | 
| 27 | 
            +
              "max_steps": 1173,
         | 
| 28 | 
            +
              "num_input_tokens_seen": 0,
         | 
| 29 | 
            +
              "num_train_epochs": 3,
         | 
| 30 | 
            +
              "save_steps": 500,
         | 
| 31 | 
            +
              "total_flos": 1.6722992037888e+16,
         | 
| 32 | 
            +
              "train_batch_size": 16,
         | 
| 33 | 
            +
              "trial_name": null,
         | 
| 34 | 
            +
              "trial_params": null
         | 
| 35 | 
            +
            }
         | 
    	
        training_args.bin
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:1a0c90ca4f81aadc9867932bf1a1dca895731865f925166dddcb26d0981a97d7
         | 
| 3 | 
            +
            size 4920
         | 
    	
        vocab.json
    ADDED
    
    | The diff for this file is too large to render. 
		See raw diff | 
|  | 
