Upload tokenizer.json with huggingface_hub
Browse files- tokenizer.json +63 -4
    	
        tokenizer.json
    CHANGED
    
    | @@ -2329,10 +2329,69 @@ | |
| 2329 | 
             
                ]
         | 
| 2330 | 
             
              },
         | 
| 2331 | 
             
              "post_processor": {
         | 
| 2332 | 
            -
                "type": " | 
| 2333 | 
            -
                " | 
| 2334 | 
            -
             | 
| 2335 | 
            -
             | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 2336 | 
             
              },
         | 
| 2337 | 
             
              "decoder": {
         | 
| 2338 | 
             
                "type": "ByteLevel",
         | 
|  | |
| 2329 | 
             
                ]
         | 
| 2330 | 
             
              },
         | 
| 2331 | 
             
              "post_processor": {
         | 
| 2332 | 
            +
                "type": "Sequence",
         | 
| 2333 | 
            +
                "processors": [
         | 
| 2334 | 
            +
                  {
         | 
| 2335 | 
            +
                    "type": "ByteLevel",
         | 
| 2336 | 
            +
                    "add_prefix_space": true,
         | 
| 2337 | 
            +
                    "trim_offsets": false,
         | 
| 2338 | 
            +
                    "use_regex": true
         | 
| 2339 | 
            +
                  },
         | 
| 2340 | 
            +
                  {
         | 
| 2341 | 
            +
                    "type": "TemplateProcessing",
         | 
| 2342 | 
            +
                    "single": [
         | 
| 2343 | 
            +
                      {
         | 
| 2344 | 
            +
                        "SpecialToken": {
         | 
| 2345 | 
            +
                          "id": "<|begin_of_text|>",
         | 
| 2346 | 
            +
                          "type_id": 0
         | 
| 2347 | 
            +
                        }
         | 
| 2348 | 
            +
                      },
         | 
| 2349 | 
            +
                      {
         | 
| 2350 | 
            +
                        "Sequence": {
         | 
| 2351 | 
            +
                          "id": "A",
         | 
| 2352 | 
            +
                          "type_id": 0
         | 
| 2353 | 
            +
                        }
         | 
| 2354 | 
            +
                      }
         | 
| 2355 | 
            +
                    ],
         | 
| 2356 | 
            +
                    "pair": [
         | 
| 2357 | 
            +
                      {
         | 
| 2358 | 
            +
                        "SpecialToken": {
         | 
| 2359 | 
            +
                          "id": "<|begin_of_text|>",
         | 
| 2360 | 
            +
                          "type_id": 0
         | 
| 2361 | 
            +
                        }
         | 
| 2362 | 
            +
                      },
         | 
| 2363 | 
            +
                      {
         | 
| 2364 | 
            +
                        "Sequence": {
         | 
| 2365 | 
            +
                          "id": "A",
         | 
| 2366 | 
            +
                          "type_id": 0
         | 
| 2367 | 
            +
                        }
         | 
| 2368 | 
            +
                      },
         | 
| 2369 | 
            +
                      {
         | 
| 2370 | 
            +
                        "SpecialToken": {
         | 
| 2371 | 
            +
                          "id": "<|begin_of_text|>",
         | 
| 2372 | 
            +
                          "type_id": 1
         | 
| 2373 | 
            +
                        }
         | 
| 2374 | 
            +
                      },
         | 
| 2375 | 
            +
                      {
         | 
| 2376 | 
            +
                        "Sequence": {
         | 
| 2377 | 
            +
                          "id": "B",
         | 
| 2378 | 
            +
                          "type_id": 1
         | 
| 2379 | 
            +
                        }
         | 
| 2380 | 
            +
                      }
         | 
| 2381 | 
            +
                    ],
         | 
| 2382 | 
            +
                    "special_tokens": {
         | 
| 2383 | 
            +
                      "<|begin_of_text|>": {
         | 
| 2384 | 
            +
                        "id": "<|begin_of_text|>",
         | 
| 2385 | 
            +
                        "ids": [
         | 
| 2386 | 
            +
                          128000
         | 
| 2387 | 
            +
                        ],
         | 
| 2388 | 
            +
                        "tokens": [
         | 
| 2389 | 
            +
                          "<|begin_of_text|>"
         | 
| 2390 | 
            +
                        ]
         | 
| 2391 | 
            +
                      }
         | 
| 2392 | 
            +
                    }
         | 
| 2393 | 
            +
                  }
         | 
| 2394 | 
            +
                ]
         | 
| 2395 | 
             
              },
         | 
| 2396 | 
             
              "decoder": {
         | 
| 2397 | 
             
                "type": "ByteLevel",
         | 
