nepp1d0 commited on
Commit
b2c7891
·
1 Parent(s): d2e0379

add tokenizer

Browse files
Files changed (2) hide show
  1. tokenizer.json +21 -9
  2. vocab.txt +5 -5
tokenizer.json CHANGED
@@ -1,7 +1,19 @@
1
  {
2
  "version": "1.0",
3
- "truncation": null,
4
- "padding": null,
 
 
 
 
 
 
 
 
 
 
 
 
5
  "added_tokens": [
6
  {
7
  "id": 0,
@@ -186,17 +198,17 @@
186
  "r": 56,
187
  "s": 57,
188
  "##\"": 58,
189
- "##N": 59,
190
- "##F": 60,
191
- "##[": 61,
192
  "##S": 62,
193
  "##E": 63,
194
  "##P": 64,
195
  "##]": 65,
196
- "##O": 66,
197
- "##c": 67,
198
- "##C": 68,
199
- "##B": 69
200
  }
201
  }
202
  }
 
1
  {
2
  "version": "1.0",
3
+ "truncation": {
4
+ "direction": "Right",
5
+ "max_length": 1000,
6
+ "strategy": "LongestFirst",
7
+ "stride": 0
8
+ },
9
+ "padding": {
10
+ "strategy": "BatchLongest",
11
+ "direction": "Right",
12
+ "pad_to_multiple_of": null,
13
+ "pad_id": 0,
14
+ "pad_type_id": 0,
15
+ "pad_token": "[PAD]"
16
+ },
17
  "added_tokens": [
18
  {
19
  "id": 0,
 
198
  "r": 56,
199
  "s": 57,
200
  "##\"": 58,
201
+ "##[": 59,
202
+ "##C": 60,
203
+ "##O": 61,
204
  "##S": 62,
205
  "##E": 63,
206
  "##P": 64,
207
  "##]": 65,
208
+ "##B": 66,
209
+ "##F": 67,
210
+ "##N": 68,
211
+ "##c": 69
212
  }
213
  }
214
  }
vocab.txt CHANGED
@@ -57,14 +57,14 @@ o
57
  r
58
  s
59
  ##"
60
- ##N
61
- ##F
62
  ##[
 
 
63
  ##S
64
  ##E
65
  ##P
66
  ##]
67
- ##O
68
- ##c
69
- ##C
70
  ##B
 
 
 
 
57
  r
58
  s
59
  ##"
 
 
60
  ##[
61
+ ##C
62
+ ##O
63
  ##S
64
  ##E
65
  ##P
66
  ##]
 
 
 
67
  ##B
68
+ ##F
69
+ ##N
70
+ ##c