Upload 5 files
Browse files- tokenizer.json +18 -10
tokenizer.json
CHANGED
|
@@ -51,17 +51,25 @@
|
|
| 51 |
],
|
| 52 |
"normalizer": null,
|
| 53 |
"pre_tokenizer": {
|
| 54 |
-
"type": "
|
| 55 |
-
"
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 64 |
},
|
|
|
|
| 65 |
"decoder": {
|
| 66 |
"type": "ByteLevel",
|
| 67 |
"add_prefix_space": true,
|
|
|
|
| 51 |
],
|
| 52 |
"normalizer": null,
|
| 53 |
"pre_tokenizer": {
|
| 54 |
+
"type": "Sequence",
|
| 55 |
+
"pretokenizers": [
|
| 56 |
+
{
|
| 57 |
+
"type": "Split",
|
| 58 |
+
"pattern": {
|
| 59 |
+
"Regex": "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}{1,3}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+"
|
| 60 |
+
},
|
| 61 |
+
"behavior": "Removed",
|
| 62 |
+
"invert": true
|
| 63 |
+
},
|
| 64 |
+
{
|
| 65 |
+
"type": "ByteLevel",
|
| 66 |
+
"add_prefix_space": false,
|
| 67 |
+
"trim_offsets": true,
|
| 68 |
+
"use_regex": false
|
| 69 |
+
}
|
| 70 |
+
]
|
| 71 |
},
|
| 72 |
+
"post_processor": null,
|
| 73 |
"decoder": {
|
| 74 |
"type": "ByteLevel",
|
| 75 |
"add_prefix_space": true,
|