Training in progress, step 200, checkpoint
Browse files- last-checkpoint/1_Pooling/config.json +10 -0
- last-checkpoint/README.md +0 -0
- last-checkpoint/config.json +25 -0
- last-checkpoint/config_sentence_transformers.json +14 -0
- last-checkpoint/model.safetensors +3 -0
- last-checkpoint/modules.json +20 -0
- last-checkpoint/optimizer.pt +3 -0
- last-checkpoint/rng_state.pth +3 -0
- last-checkpoint/scaler.pt +3 -0
- last-checkpoint/scheduler.pt +3 -0
- last-checkpoint/sentence_bert_config.json +4 -0
- last-checkpoint/special_tokens_map.json +37 -0
- last-checkpoint/tokenizer.json +0 -0
- last-checkpoint/tokenizer_config.json +65 -0
- last-checkpoint/trainer_state.json +62 -0
- last-checkpoint/training_args.bin +3 -0
- last-checkpoint/vocab.txt +0 -0
    	
        last-checkpoint/1_Pooling/config.json
    ADDED
    
    | @@ -0,0 +1,10 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            {
         | 
| 2 | 
            +
                "word_embedding_dimension": 384,
         | 
| 3 | 
            +
                "pooling_mode_cls_token": false,
         | 
| 4 | 
            +
                "pooling_mode_mean_tokens": true,
         | 
| 5 | 
            +
                "pooling_mode_max_tokens": false,
         | 
| 6 | 
            +
                "pooling_mode_mean_sqrt_len_tokens": false,
         | 
| 7 | 
            +
                "pooling_mode_weightedmean_tokens": false,
         | 
| 8 | 
            +
                "pooling_mode_lasttoken": false,
         | 
| 9 | 
            +
                "include_prompt": true
         | 
| 10 | 
            +
            }
         | 
    	
        last-checkpoint/README.md
    ADDED
    
    | The diff for this file is too large to render. 
		See raw diff | 
|  | 
    	
        last-checkpoint/config.json
    ADDED
    
    | @@ -0,0 +1,25 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            {
         | 
| 2 | 
            +
              "architectures": [
         | 
| 3 | 
            +
                "BertModel"
         | 
| 4 | 
            +
              ],
         | 
| 5 | 
            +
              "attention_probs_dropout_prob": 0.1,
         | 
| 6 | 
            +
              "classifier_dropout": null,
         | 
| 7 | 
            +
              "gradient_checkpointing": false,
         | 
| 8 | 
            +
              "hidden_act": "gelu",
         | 
| 9 | 
            +
              "hidden_dropout_prob": 0.1,
         | 
| 10 | 
            +
              "hidden_size": 384,
         | 
| 11 | 
            +
              "initializer_range": 0.02,
         | 
| 12 | 
            +
              "intermediate_size": 1536,
         | 
| 13 | 
            +
              "layer_norm_eps": 1e-12,
         | 
| 14 | 
            +
              "max_position_embeddings": 512,
         | 
| 15 | 
            +
              "model_type": "bert",
         | 
| 16 | 
            +
              "num_attention_heads": 12,
         | 
| 17 | 
            +
              "num_hidden_layers": 6,
         | 
| 18 | 
            +
              "pad_token_id": 0,
         | 
| 19 | 
            +
              "position_embedding_type": "absolute",
         | 
| 20 | 
            +
              "torch_dtype": "float32",
         | 
| 21 | 
            +
              "transformers_version": "4.55.2",
         | 
| 22 | 
            +
              "type_vocab_size": 2,
         | 
| 23 | 
            +
              "use_cache": true,
         | 
| 24 | 
            +
              "vocab_size": 30522
         | 
| 25 | 
            +
            }
         | 
    	
        last-checkpoint/config_sentence_transformers.json
    ADDED
    
    | @@ -0,0 +1,14 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            {
         | 
| 2 | 
            +
              "__version__": {
         | 
| 3 | 
            +
                "sentence_transformers": "5.1.0",
         | 
| 4 | 
            +
                "transformers": "4.55.2",
         | 
| 5 | 
            +
                "pytorch": "2.8.0+cu126"
         | 
| 6 | 
            +
              },
         | 
| 7 | 
            +
              "model_type": "SentenceTransformer",
         | 
| 8 | 
            +
              "prompts": {
         | 
| 9 | 
            +
                "query": "",
         | 
| 10 | 
            +
                "document": ""
         | 
| 11 | 
            +
              },
         | 
| 12 | 
            +
              "default_prompt_name": null,
         | 
| 13 | 
            +
              "similarity_fn_name": "cosine"
         | 
| 14 | 
            +
            }
         | 
    	
        last-checkpoint/model.safetensors
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:f953fb0f9519003bc3cc56f7c0e14b4c7a0cf25c6b003c8ae75b96f4ae4d170b
         | 
| 3 | 
            +
            size 90864192
         | 
    	
        last-checkpoint/modules.json
    ADDED
    
    | @@ -0,0 +1,20 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            [
         | 
| 2 | 
            +
              {
         | 
| 3 | 
            +
                "idx": 0,
         | 
| 4 | 
            +
                "name": "0",
         | 
| 5 | 
            +
                "path": "",
         | 
| 6 | 
            +
                "type": "sentence_transformers.models.Transformer"
         | 
| 7 | 
            +
              },
         | 
| 8 | 
            +
              {
         | 
| 9 | 
            +
                "idx": 1,
         | 
| 10 | 
            +
                "name": "1",
         | 
| 11 | 
            +
                "path": "1_Pooling",
         | 
| 12 | 
            +
                "type": "sentence_transformers.models.Pooling"
         | 
| 13 | 
            +
              },
         | 
| 14 | 
            +
              {
         | 
| 15 | 
            +
                "idx": 2,
         | 
| 16 | 
            +
                "name": "2",
         | 
| 17 | 
            +
                "path": "2_Normalize",
         | 
| 18 | 
            +
                "type": "sentence_transformers.models.Normalize"
         | 
| 19 | 
            +
              }
         | 
| 20 | 
            +
            ]
         | 
    	
        last-checkpoint/optimizer.pt
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:9c1aa2b4c3b6dfcc06be400285e90b262bb36d49374d8348b40cd77fa14c0c90
         | 
| 3 | 
            +
            size 180609611
         | 
    	
        last-checkpoint/rng_state.pth
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:771b8080df542b78be2854f4e9393672f71c5efd3b22a27b765fd12c00d11b11
         | 
| 3 | 
            +
            size 14645
         | 
    	
        last-checkpoint/scaler.pt
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:124625e167eb28acbfc793cfcb3e8a08b32e7fea06501462bc9e420a5e1beb2a
         | 
| 3 | 
            +
            size 1383
         | 
    	
        last-checkpoint/scheduler.pt
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:42b8c82da689c3defc876679e8ba8bd56df03c2bb1d400cb4fa8209aae1fd7e2
         | 
| 3 | 
            +
            size 1465
         | 
    	
        last-checkpoint/sentence_bert_config.json
    ADDED
    
    | @@ -0,0 +1,4 @@ | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            {
         | 
| 2 | 
            +
                "max_seq_length": 256,
         | 
| 3 | 
            +
                "do_lower_case": false
         | 
| 4 | 
            +
            }
         | 
    	
        last-checkpoint/special_tokens_map.json
    ADDED
    
    | @@ -0,0 +1,37 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            {
         | 
| 2 | 
            +
              "cls_token": {
         | 
| 3 | 
            +
                "content": "[CLS]",
         | 
| 4 | 
            +
                "lstrip": false,
         | 
| 5 | 
            +
                "normalized": false,
         | 
| 6 | 
            +
                "rstrip": false,
         | 
| 7 | 
            +
                "single_word": false
         | 
| 8 | 
            +
              },
         | 
| 9 | 
            +
              "mask_token": {
         | 
| 10 | 
            +
                "content": "[MASK]",
         | 
| 11 | 
            +
                "lstrip": false,
         | 
| 12 | 
            +
                "normalized": false,
         | 
| 13 | 
            +
                "rstrip": false,
         | 
| 14 | 
            +
                "single_word": false
         | 
| 15 | 
            +
              },
         | 
| 16 | 
            +
              "pad_token": {
         | 
| 17 | 
            +
                "content": "[PAD]",
         | 
| 18 | 
            +
                "lstrip": false,
         | 
| 19 | 
            +
                "normalized": false,
         | 
| 20 | 
            +
                "rstrip": false,
         | 
| 21 | 
            +
                "single_word": false
         | 
| 22 | 
            +
              },
         | 
| 23 | 
            +
              "sep_token": {
         | 
| 24 | 
            +
                "content": "[SEP]",
         | 
| 25 | 
            +
                "lstrip": false,
         | 
| 26 | 
            +
                "normalized": false,
         | 
| 27 | 
            +
                "rstrip": false,
         | 
| 28 | 
            +
                "single_word": false
         | 
| 29 | 
            +
              },
         | 
| 30 | 
            +
              "unk_token": {
         | 
| 31 | 
            +
                "content": "[UNK]",
         | 
| 32 | 
            +
                "lstrip": false,
         | 
| 33 | 
            +
                "normalized": false,
         | 
| 34 | 
            +
                "rstrip": false,
         | 
| 35 | 
            +
                "single_word": false
         | 
| 36 | 
            +
              }
         | 
| 37 | 
            +
            }
         | 
    	
        last-checkpoint/tokenizer.json
    ADDED
    
    | The diff for this file is too large to render. 
		See raw diff | 
|  | 
    	
        last-checkpoint/tokenizer_config.json
    ADDED
    
    | @@ -0,0 +1,65 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            {
         | 
| 2 | 
            +
              "added_tokens_decoder": {
         | 
| 3 | 
            +
                "0": {
         | 
| 4 | 
            +
                  "content": "[PAD]",
         | 
| 5 | 
            +
                  "lstrip": false,
         | 
| 6 | 
            +
                  "normalized": false,
         | 
| 7 | 
            +
                  "rstrip": false,
         | 
| 8 | 
            +
                  "single_word": false,
         | 
| 9 | 
            +
                  "special": true
         | 
| 10 | 
            +
                },
         | 
| 11 | 
            +
                "100": {
         | 
| 12 | 
            +
                  "content": "[UNK]",
         | 
| 13 | 
            +
                  "lstrip": false,
         | 
| 14 | 
            +
                  "normalized": false,
         | 
| 15 | 
            +
                  "rstrip": false,
         | 
| 16 | 
            +
                  "single_word": false,
         | 
| 17 | 
            +
                  "special": true
         | 
| 18 | 
            +
                },
         | 
| 19 | 
            +
                "101": {
         | 
| 20 | 
            +
                  "content": "[CLS]",
         | 
| 21 | 
            +
                  "lstrip": false,
         | 
| 22 | 
            +
                  "normalized": false,
         | 
| 23 | 
            +
                  "rstrip": false,
         | 
| 24 | 
            +
                  "single_word": false,
         | 
| 25 | 
            +
                  "special": true
         | 
| 26 | 
            +
                },
         | 
| 27 | 
            +
                "102": {
         | 
| 28 | 
            +
                  "content": "[SEP]",
         | 
| 29 | 
            +
                  "lstrip": false,
         | 
| 30 | 
            +
                  "normalized": false,
         | 
| 31 | 
            +
                  "rstrip": false,
         | 
| 32 | 
            +
                  "single_word": false,
         | 
| 33 | 
            +
                  "special": true
         | 
| 34 | 
            +
                },
         | 
| 35 | 
            +
                "103": {
         | 
| 36 | 
            +
                  "content": "[MASK]",
         | 
| 37 | 
            +
                  "lstrip": false,
         | 
| 38 | 
            +
                  "normalized": false,
         | 
| 39 | 
            +
                  "rstrip": false,
         | 
| 40 | 
            +
                  "single_word": false,
         | 
| 41 | 
            +
                  "special": true
         | 
| 42 | 
            +
                }
         | 
| 43 | 
            +
              },
         | 
| 44 | 
            +
              "clean_up_tokenization_spaces": false,
         | 
| 45 | 
            +
              "cls_token": "[CLS]",
         | 
| 46 | 
            +
              "do_basic_tokenize": true,
         | 
| 47 | 
            +
              "do_lower_case": true,
         | 
| 48 | 
            +
              "extra_special_tokens": {},
         | 
| 49 | 
            +
              "mask_token": "[MASK]",
         | 
| 50 | 
            +
              "max_length": 128,
         | 
| 51 | 
            +
              "model_max_length": 256,
         | 
| 52 | 
            +
              "never_split": null,
         | 
| 53 | 
            +
              "pad_to_multiple_of": null,
         | 
| 54 | 
            +
              "pad_token": "[PAD]",
         | 
| 55 | 
            +
              "pad_token_type_id": 0,
         | 
| 56 | 
            +
              "padding_side": "right",
         | 
| 57 | 
            +
              "sep_token": "[SEP]",
         | 
| 58 | 
            +
              "stride": 0,
         | 
| 59 | 
            +
              "strip_accents": null,
         | 
| 60 | 
            +
              "tokenize_chinese_chars": true,
         | 
| 61 | 
            +
              "tokenizer_class": "BertTokenizer",
         | 
| 62 | 
            +
              "truncation_side": "right",
         | 
| 63 | 
            +
              "truncation_strategy": "longest_first",
         | 
| 64 | 
            +
              "unk_token": "[UNK]"
         | 
| 65 | 
            +
            }
         | 
    	
        last-checkpoint/trainer_state.json
    ADDED
    
    | @@ -0,0 +1,62 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            {
         | 
| 2 | 
            +
              "best_global_step": null,
         | 
| 3 | 
            +
              "best_metric": null,
         | 
| 4 | 
            +
              "best_model_checkpoint": null,
         | 
| 5 | 
            +
              "epoch": 0.003534130868866074,
         | 
| 6 | 
            +
              "eval_steps": 500,
         | 
| 7 | 
            +
              "global_step": 200,
         | 
| 8 | 
            +
              "is_hyper_param_search": false,
         | 
| 9 | 
            +
              "is_local_process_zero": true,
         | 
| 10 | 
            +
              "is_world_process_zero": true,
         | 
| 11 | 
            +
              "log_history": [
         | 
| 12 | 
            +
                {
         | 
| 13 | 
            +
                  "epoch": 0.0008835327172165185,
         | 
| 14 | 
            +
                  "grad_norm": 5.665971279144287,
         | 
| 15 | 
            +
                  "learning_rate": 4.3286219081272084e-07,
         | 
| 16 | 
            +
                  "loss": 1.3738,
         | 
| 17 | 
            +
                  "step": 50
         | 
| 18 | 
            +
                },
         | 
| 19 | 
            +
                {
         | 
| 20 | 
            +
                  "epoch": 0.001767065434433037,
         | 
| 21 | 
            +
                  "grad_norm": 5.6161651611328125,
         | 
| 22 | 
            +
                  "learning_rate": 8.745583038869259e-07,
         | 
| 23 | 
            +
                  "loss": 1.1661,
         | 
| 24 | 
            +
                  "step": 100
         | 
| 25 | 
            +
                },
         | 
| 26 | 
            +
                {
         | 
| 27 | 
            +
                  "epoch": 0.0026505981516495554,
         | 
| 28 | 
            +
                  "grad_norm": 7.866199970245361,
         | 
| 29 | 
            +
                  "learning_rate": 1.3162544169611309e-06,
         | 
| 30 | 
            +
                  "loss": 1.2107,
         | 
| 31 | 
            +
                  "step": 150
         | 
| 32 | 
            +
                },
         | 
| 33 | 
            +
                {
         | 
| 34 | 
            +
                  "epoch": 0.003534130868866074,
         | 
| 35 | 
            +
                  "grad_norm": 5.07379674911499,
         | 
| 36 | 
            +
                  "learning_rate": 1.7579505300353357e-06,
         | 
| 37 | 
            +
                  "loss": 0.9855,
         | 
| 38 | 
            +
                  "step": 200
         | 
| 39 | 
            +
                }
         | 
| 40 | 
            +
              ],
         | 
| 41 | 
            +
              "logging_steps": 50,
         | 
| 42 | 
            +
              "max_steps": 56591,
         | 
| 43 | 
            +
              "num_input_tokens_seen": 0,
         | 
| 44 | 
            +
              "num_train_epochs": 1,
         | 
| 45 | 
            +
              "save_steps": 200,
         | 
| 46 | 
            +
              "stateful_callbacks": {
         | 
| 47 | 
            +
                "TrainerControl": {
         | 
| 48 | 
            +
                  "args": {
         | 
| 49 | 
            +
                    "should_epoch_stop": false,
         | 
| 50 | 
            +
                    "should_evaluate": false,
         | 
| 51 | 
            +
                    "should_log": false,
         | 
| 52 | 
            +
                    "should_save": true,
         | 
| 53 | 
            +
                    "should_training_stop": false
         | 
| 54 | 
            +
                  },
         | 
| 55 | 
            +
                  "attributes": {}
         | 
| 56 | 
            +
                }
         | 
| 57 | 
            +
              },
         | 
| 58 | 
            +
              "total_flos": 0.0,
         | 
| 59 | 
            +
              "train_batch_size": 256,
         | 
| 60 | 
            +
              "trial_name": null,
         | 
| 61 | 
            +
              "trial_params": null
         | 
| 62 | 
            +
            }
         | 
    	
        last-checkpoint/training_args.bin
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:a5a2600b2df7b8aa86e9a368609e29900445ebc9cdf1eaf37dde9ffa01ded762
         | 
| 3 | 
            +
            size 6097
         | 
    	
        last-checkpoint/vocab.txt
    ADDED
    
    | The diff for this file is too large to render. 
		See raw diff | 
|  |