add tokenizer
Browse files- source.spm +0 -0
 - special_tokens_map.json +1 -0
 - target.spm +0 -0
 - tokenizer_config.json +1 -0
 - vocab.json +0 -0
 
    	
        source.spm
    ADDED
    
    | 
         Binary file (1.08 MB). View file 
     | 
| 
         | 
    	
        special_tokens_map.json
    ADDED
    
    | 
         @@ -0,0 +1 @@ 
     | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            {"eos_token": "</s>", "unk_token": "<unk>", "pad_token": "<pad>"}
         
     | 
    	
        target.spm
    ADDED
    
    | 
         Binary file (803 kB). View file 
     | 
| 
         | 
    	
        tokenizer_config.json
    ADDED
    
    | 
         @@ -0,0 +1 @@ 
     | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            {"source_lang": "ru", "target_lang": "en", "unk_token": "<unk>", "eos_token": "</s>", "pad_token": "<pad>", "model_max_length": 512, "sp_model_kwargs": {}, "special_tokens_map_file": null, "tokenizer_file": null, "name_or_path": "/home/adorkin/tmp/tatoeba_ruen/checkpoint-69990", "tokenizer_class": "MarianTokenizer"}
         
     | 
    	
        vocab.json
    ADDED
    
    | 
         The diff for this file is too large to render. 
		See raw diff 
     | 
| 
         |