Cdywalst commited on
Commit
53d57f4
·
verified ·
1 Parent(s): 65188f8

Training done

Browse files
Files changed (3) hide show
  1. added_tokens.json +8 -10
  2. tokenizer.json +7 -25
  3. tokenizer_config.json +7 -23
added_tokens.json CHANGED
@@ -1,18 +1,16 @@
1
  {
2
- "</s_address>": 57537,
3
- "</s_company>": 57535,
4
- "</s_date>": 57533,
5
- "</s_gt_parse>": 57529,
6
  "</s_nom>": 57526,
7
- "</s_total>": 57531,
8
- "<s_address>": 57536,
9
- "<s_company>": 57534,
10
  "<s_cord-v2>": 57527,
11
- "<s_date>": 57532,
12
- "<s_gt_parse>": 57528,
13
  "<s_iitcdip>": 57523,
14
  "<s_nom>": 57525,
15
  "<s_synthdog>": 57524,
16
- "<s_total>": 57530,
17
  "<sep/>": 57522
18
  }
 
1
  {
2
+ "</s_address>": 57535,
3
+ "</s_company>": 57533,
4
+ "</s_date>": 57531,
 
5
  "</s_nom>": 57526,
6
+ "</s_total>": 57529,
7
+ "<s_address>": 57534,
8
+ "<s_company>": 57532,
9
  "<s_cord-v2>": 57527,
10
+ "<s_date>": 57530,
 
11
  "<s_iitcdip>": 57523,
12
  "<s_nom>": 57525,
13
  "<s_synthdog>": 57524,
14
+ "<s_total>": 57528,
15
  "<sep/>": 57522
16
  }
tokenizer.json CHANGED
@@ -104,24 +104,6 @@
104
  },
105
  {
106
  "id": 57528,
107
- "content": "<s_gt_parse>",
108
- "single_word": false,
109
- "lstrip": false,
110
- "rstrip": false,
111
- "normalized": true,
112
- "special": false
113
- },
114
- {
115
- "id": 57529,
116
- "content": "</s_gt_parse>",
117
- "single_word": false,
118
- "lstrip": false,
119
- "rstrip": false,
120
- "normalized": true,
121
- "special": false
122
- },
123
- {
124
- "id": 57530,
125
  "content": "<s_total>",
126
  "single_word": false,
127
  "lstrip": false,
@@ -130,7 +112,7 @@
130
  "special": false
131
  },
132
  {
133
- "id": 57531,
134
  "content": "</s_total>",
135
  "single_word": false,
136
  "lstrip": false,
@@ -139,7 +121,7 @@
139
  "special": false
140
  },
141
  {
142
- "id": 57532,
143
  "content": "<s_date>",
144
  "single_word": false,
145
  "lstrip": false,
@@ -148,7 +130,7 @@
148
  "special": false
149
  },
150
  {
151
- "id": 57533,
152
  "content": "</s_date>",
153
  "single_word": false,
154
  "lstrip": false,
@@ -157,7 +139,7 @@
157
  "special": false
158
  },
159
  {
160
- "id": 57534,
161
  "content": "<s_company>",
162
  "single_word": false,
163
  "lstrip": false,
@@ -166,7 +148,7 @@
166
  "special": false
167
  },
168
  {
169
- "id": 57535,
170
  "content": "</s_company>",
171
  "single_word": false,
172
  "lstrip": false,
@@ -175,7 +157,7 @@
175
  "special": false
176
  },
177
  {
178
- "id": 57536,
179
  "content": "<s_address>",
180
  "single_word": false,
181
  "lstrip": false,
@@ -184,7 +166,7 @@
184
  "special": false
185
  },
186
  {
187
- "id": 57537,
188
  "content": "</s_address>",
189
  "single_word": false,
190
  "lstrip": false,
 
104
  },
105
  {
106
  "id": 57528,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
107
  "content": "<s_total>",
108
  "single_word": false,
109
  "lstrip": false,
 
112
  "special": false
113
  },
114
  {
115
+ "id": 57529,
116
  "content": "</s_total>",
117
  "single_word": false,
118
  "lstrip": false,
 
121
  "special": false
122
  },
123
  {
124
+ "id": 57530,
125
  "content": "<s_date>",
126
  "single_word": false,
127
  "lstrip": false,
 
130
  "special": false
131
  },
132
  {
133
+ "id": 57531,
134
  "content": "</s_date>",
135
  "single_word": false,
136
  "lstrip": false,
 
139
  "special": false
140
  },
141
  {
142
+ "id": 57532,
143
  "content": "<s_company>",
144
  "single_word": false,
145
  "lstrip": false,
 
148
  "special": false
149
  },
150
  {
151
+ "id": 57533,
152
  "content": "</s_company>",
153
  "single_word": false,
154
  "lstrip": false,
 
157
  "special": false
158
  },
159
  {
160
+ "id": 57534,
161
  "content": "<s_address>",
162
  "single_word": false,
163
  "lstrip": false,
 
166
  "special": false
167
  },
168
  {
169
+ "id": 57535,
170
  "content": "</s_address>",
171
  "single_word": false,
172
  "lstrip": false,
tokenizer_config.json CHANGED
@@ -89,22 +89,6 @@
89
  "special": false
90
  },
91
  "57528": {
92
- "content": "<s_gt_parse>",
93
- "lstrip": false,
94
- "normalized": true,
95
- "rstrip": false,
96
- "single_word": false,
97
- "special": false
98
- },
99
- "57529": {
100
- "content": "</s_gt_parse>",
101
- "lstrip": false,
102
- "normalized": true,
103
- "rstrip": false,
104
- "single_word": false,
105
- "special": false
106
- },
107
- "57530": {
108
  "content": "<s_total>",
109
  "lstrip": false,
110
  "normalized": true,
@@ -112,7 +96,7 @@
112
  "single_word": false,
113
  "special": false
114
  },
115
- "57531": {
116
  "content": "</s_total>",
117
  "lstrip": false,
118
  "normalized": true,
@@ -120,7 +104,7 @@
120
  "single_word": false,
121
  "special": false
122
  },
123
- "57532": {
124
  "content": "<s_date>",
125
  "lstrip": false,
126
  "normalized": true,
@@ -128,7 +112,7 @@
128
  "single_word": false,
129
  "special": false
130
  },
131
- "57533": {
132
  "content": "</s_date>",
133
  "lstrip": false,
134
  "normalized": true,
@@ -136,7 +120,7 @@
136
  "single_word": false,
137
  "special": false
138
  },
139
- "57534": {
140
  "content": "<s_company>",
141
  "lstrip": false,
142
  "normalized": true,
@@ -144,7 +128,7 @@
144
  "single_word": false,
145
  "special": false
146
  },
147
- "57535": {
148
  "content": "</s_company>",
149
  "lstrip": false,
150
  "normalized": true,
@@ -152,7 +136,7 @@
152
  "single_word": false,
153
  "special": false
154
  },
155
- "57536": {
156
  "content": "<s_address>",
157
  "lstrip": false,
158
  "normalized": true,
@@ -160,7 +144,7 @@
160
  "single_word": false,
161
  "special": false
162
  },
163
- "57537": {
164
  "content": "</s_address>",
165
  "lstrip": false,
166
  "normalized": true,
 
89
  "special": false
90
  },
91
  "57528": {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
92
  "content": "<s_total>",
93
  "lstrip": false,
94
  "normalized": true,
 
96
  "single_word": false,
97
  "special": false
98
  },
99
+ "57529": {
100
  "content": "</s_total>",
101
  "lstrip": false,
102
  "normalized": true,
 
104
  "single_word": false,
105
  "special": false
106
  },
107
+ "57530": {
108
  "content": "<s_date>",
109
  "lstrip": false,
110
  "normalized": true,
 
112
  "single_word": false,
113
  "special": false
114
  },
115
+ "57531": {
116
  "content": "</s_date>",
117
  "lstrip": false,
118
  "normalized": true,
 
120
  "single_word": false,
121
  "special": false
122
  },
123
+ "57532": {
124
  "content": "<s_company>",
125
  "lstrip": false,
126
  "normalized": true,
 
128
  "single_word": false,
129
  "special": false
130
  },
131
+ "57533": {
132
  "content": "</s_company>",
133
  "lstrip": false,
134
  "normalized": true,
 
136
  "single_word": false,
137
  "special": false
138
  },
139
+ "57534": {
140
  "content": "<s_address>",
141
  "lstrip": false,
142
  "normalized": true,
 
144
  "single_word": false,
145
  "special": false
146
  },
147
+ "57535": {
148
  "content": "</s_address>",
149
  "lstrip": false,
150
  "normalized": true,