emisilab commited on
Commit
992fed4
·
verified ·
1 Parent(s): 97b3126

End of training

Browse files
README.md CHANGED
@@ -43,6 +43,10 @@ The following hyperparameters were used during training:
43
  - num_epochs: 5
44
  - mixed_precision_training: Native AMP
45
 
 
 
 
 
46
  ### Framework versions
47
 
48
  - Transformers 4.39.0
 
43
  - num_epochs: 5
44
  - mixed_precision_training: Native AMP
45
 
46
+ ### Training results
47
+
48
+
49
+
50
  ### Framework versions
51
 
52
  - Transformers 4.39.0
preprocessor_config.json CHANGED
@@ -37,8 +37,8 @@
37
  "processor_class": "DonutProcessor",
38
  "resample": 2,
39
  "rescale_factor": 0.00392156862745098,
40
- "size": [
41
- 720,
42
- 960
43
- ]
44
  }
 
37
  "processor_class": "DonutProcessor",
38
  "resample": 2,
39
  "rescale_factor": 0.00392156862745098,
40
+ "size": {
41
+ "height": 960,
42
+ "width": 720
43
+ }
44
  }
special_tokens_map.json CHANGED
@@ -1,285 +1,45 @@
1
  {
2
  "additional_special_tokens": [
3
- {
4
- "content": "<s_KTP_TGL_LAHIR>",
5
- "lstrip": false,
6
- "normalized": false,
7
- "rstrip": false,
8
- "single_word": false
9
- },
10
- {
11
- "content": "</s_KTP_TGL_LAHIR>",
12
- "lstrip": false,
13
- "normalized": false,
14
- "rstrip": false,
15
- "single_word": false
16
- },
17
- {
18
- "content": "<s_KTP_TEMPAT_LAHIR>",
19
- "lstrip": false,
20
- "normalized": false,
21
- "rstrip": false,
22
- "single_word": false
23
- },
24
- {
25
- "content": "</s_KTP_TEMPAT_LAHIR>",
26
- "lstrip": false,
27
- "normalized": false,
28
- "rstrip": false,
29
- "single_word": false
30
- },
31
- {
32
- "content": "<s_KTP_STATUS_PERKAWINAN>",
33
- "lstrip": false,
34
- "normalized": false,
35
- "rstrip": false,
36
- "single_word": false
37
- },
38
- {
39
- "content": "</s_KTP_STATUS_PERKAWINAN>",
40
- "lstrip": false,
41
- "normalized": false,
42
- "rstrip": false,
43
- "single_word": false
44
- },
45
- {
46
- "content": "<s_KTP_RT_RW>",
47
- "lstrip": false,
48
- "normalized": false,
49
- "rstrip": false,
50
- "single_word": false
51
- },
52
- {
53
- "content": "</s_KTP_RT_RW>",
54
- "lstrip": false,
55
- "normalized": false,
56
- "rstrip": false,
57
- "single_word": false
58
- },
59
- {
60
- "content": "<s_KTP_PROVINSI>",
61
- "lstrip": false,
62
- "normalized": false,
63
- "rstrip": false,
64
- "single_word": false
65
- },
66
- {
67
- "content": "</s_KTP_PROVINSI>",
68
- "lstrip": false,
69
- "normalized": false,
70
- "rstrip": false,
71
- "single_word": false
72
- },
73
- {
74
- "content": "<s_KTP_PEKERJAAN>",
75
- "lstrip": false,
76
- "normalized": false,
77
- "rstrip": false,
78
- "single_word": false
79
- },
80
- {
81
- "content": "</s_KTP_PEKERJAAN>",
82
- "lstrip": false,
83
- "normalized": false,
84
- "rstrip": false,
85
- "single_word": false
86
- },
87
- {
88
- "content": "<s_KTP_NIK>",
89
- "lstrip": false,
90
- "normalized": false,
91
- "rstrip": false,
92
- "single_word": false
93
- },
94
- {
95
- "content": "</s_KTP_NIK>",
96
- "lstrip": false,
97
- "normalized": false,
98
- "rstrip": false,
99
- "single_word": false
100
- },
101
- {
102
- "content": "<s_KTP_NAMA>",
103
- "lstrip": false,
104
- "normalized": false,
105
- "rstrip": false,
106
- "single_word": false
107
- },
108
- {
109
- "content": "</s_KTP_NAMA>",
110
- "lstrip": false,
111
- "normalized": false,
112
- "rstrip": false,
113
- "single_word": false
114
- },
115
- {
116
- "content": "<s_KTP_KEWARGANEGARAAN>",
117
- "lstrip": false,
118
- "normalized": false,
119
- "rstrip": false,
120
- "single_word": false
121
- },
122
- {
123
- "content": "</s_KTP_KEWARGANEGARAAN>",
124
- "lstrip": false,
125
- "normalized": false,
126
- "rstrip": false,
127
- "single_word": false
128
- },
129
- {
130
- "content": "<s_KTP_KELURAHAN>",
131
- "lstrip": false,
132
- "normalized": false,
133
- "rstrip": false,
134
- "single_word": false
135
- },
136
- {
137
- "content": "</s_KTP_KELURAHAN>",
138
- "lstrip": false,
139
- "normalized": false,
140
- "rstrip": false,
141
- "single_word": false
142
- },
143
- {
144
- "content": "<s_KTP_KECAMATAN>",
145
- "lstrip": false,
146
- "normalized": false,
147
- "rstrip": false,
148
- "single_word": false
149
- },
150
- {
151
- "content": "</s_KTP_KECAMATAN>",
152
- "lstrip": false,
153
- "normalized": false,
154
- "rstrip": false,
155
- "single_word": false
156
- },
157
- {
158
- "content": "<s_KTP_KABUPATEN_MADYA>",
159
- "lstrip": false,
160
- "normalized": false,
161
- "rstrip": false,
162
- "single_word": false
163
- },
164
- {
165
- "content": "</s_KTP_KABUPATEN_MADYA>",
166
- "lstrip": false,
167
- "normalized": false,
168
- "rstrip": false,
169
- "single_word": false
170
- },
171
- {
172
- "content": "<s_KTP_JENIS_KELAMIN>",
173
- "lstrip": false,
174
- "normalized": false,
175
- "rstrip": false,
176
- "single_word": false
177
- },
178
- {
179
- "content": "</s_KTP_JENIS_KELAMIN>",
180
- "lstrip": false,
181
- "normalized": false,
182
- "rstrip": false,
183
- "single_word": false
184
- },
185
- {
186
- "content": "<s_KTP_GOL_DARAH>",
187
- "lstrip": false,
188
- "normalized": false,
189
- "rstrip": false,
190
- "single_word": false
191
- },
192
- {
193
- "content": "</s_KTP_GOL_DARAH>",
194
- "lstrip": false,
195
- "normalized": false,
196
- "rstrip": false,
197
- "single_word": false
198
- },
199
- {
200
- "content": "<s_KTP_DIKELUARKAN_TGL>",
201
- "lstrip": false,
202
- "normalized": false,
203
- "rstrip": false,
204
- "single_word": false
205
- },
206
- {
207
- "content": "</s_KTP_DIKELUARKAN_TGL>",
208
- "lstrip": false,
209
- "normalized": false,
210
- "rstrip": false,
211
- "single_word": false
212
- },
213
- {
214
- "content": "<s_KTP_DIKELUARKAN_DI>",
215
- "lstrip": false,
216
- "normalized": false,
217
- "rstrip": false,
218
- "single_word": false
219
- },
220
- {
221
- "content": "</s_KTP_DIKELUARKAN_DI>",
222
- "lstrip": false,
223
- "normalized": false,
224
- "rstrip": false,
225
- "single_word": false
226
- },
227
- {
228
- "content": "<s_KTP_BERLAKU_HINGGA>",
229
- "lstrip": false,
230
- "normalized": false,
231
- "rstrip": false,
232
- "single_word": false
233
- },
234
- {
235
- "content": "</s_KTP_BERLAKU_HINGGA>",
236
- "lstrip": false,
237
- "normalized": false,
238
- "rstrip": false,
239
- "single_word": false
240
- },
241
- {
242
- "content": "<s_KTP_ALAMAT>",
243
- "lstrip": false,
244
- "normalized": false,
245
- "rstrip": false,
246
- "single_word": false
247
- },
248
- {
249
- "content": "</s_KTP_ALAMAT>",
250
- "lstrip": false,
251
- "normalized": false,
252
- "rstrip": false,
253
- "single_word": false
254
- },
255
- {
256
- "content": "<s_KTP_AGAMA>",
257
- "lstrip": false,
258
- "normalized": false,
259
- "rstrip": false,
260
- "single_word": false
261
- },
262
- {
263
- "content": "</s_KTP_AGAMA>",
264
- "lstrip": false,
265
- "normalized": false,
266
- "rstrip": false,
267
- "single_word": false
268
- },
269
- {
270
- "content": "<s>",
271
- "lstrip": false,
272
- "normalized": false,
273
- "rstrip": false,
274
- "single_word": false
275
- },
276
- {
277
- "content": "</s>",
278
- "lstrip": false,
279
- "normalized": false,
280
- "rstrip": false,
281
- "single_word": false
282
- }
283
  ],
284
  "bos_token": {
285
  "content": "<s>",
 
1
  {
2
  "additional_special_tokens": [
3
+ "<s_KTP_TGL_LAHIR>",
4
+ "</s_KTP_TGL_LAHIR>",
5
+ "<s_KTP_TEMPAT_LAHIR>",
6
+ "</s_KTP_TEMPAT_LAHIR>",
7
+ "<s_KTP_STATUS_PERKAWINAN>",
8
+ "</s_KTP_STATUS_PERKAWINAN>",
9
+ "<s_KTP_RT_RW>",
10
+ "</s_KTP_RT_RW>",
11
+ "<s_KTP_PROVINSI>",
12
+ "</s_KTP_PROVINSI>",
13
+ "<s_KTP_PEKERJAAN>",
14
+ "</s_KTP_PEKERJAAN>",
15
+ "<s_KTP_NIK>",
16
+ "</s_KTP_NIK>",
17
+ "<s_KTP_NAMA>",
18
+ "</s_KTP_NAMA>",
19
+ "<s_KTP_KEWARGANEGARAAN>",
20
+ "</s_KTP_KEWARGANEGARAAN>",
21
+ "<s_KTP_KELURAHAN>",
22
+ "</s_KTP_KELURAHAN>",
23
+ "<s_KTP_KECAMATAN>",
24
+ "</s_KTP_KECAMATAN>",
25
+ "<s_KTP_KABUPATEN_MADYA>",
26
+ "</s_KTP_KABUPATEN_MADYA>",
27
+ "<s_KTP_JENIS_KELAMIN>",
28
+ "</s_KTP_JENIS_KELAMIN>",
29
+ "<s_KTP_GOL_DARAH>",
30
+ "</s_KTP_GOL_DARAH>",
31
+ "<s_KTP_DIKELUARKAN_TGL>",
32
+ "</s_KTP_DIKELUARKAN_TGL>",
33
+ "<s_KTP_DIKELUARKAN_DI>",
34
+ "</s_KTP_DIKELUARKAN_DI>",
35
+ "<s_KTP_BERLAKU_HINGGA>",
36
+ "</s_KTP_BERLAKU_HINGGA>",
37
+ "<s_KTP_ALAMAT>",
38
+ "</s_KTP_ALAMAT>",
39
+ "<s_KTP_AGAMA>",
40
+ "</s_KTP_AGAMA>",
41
+ "<s>",
42
+ "</s>"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  ],
44
  "bos_token": {
45
  "content": "<s>",
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json CHANGED
@@ -416,11 +416,18 @@
416
  "cls_token": "<s>",
417
  "eos_token": "</s>",
418
  "mask_token": "<mask>",
 
419
  "model_max_length": 1000000000000000019884624838656,
 
420
  "pad_token": "<pad>",
 
 
421
  "processor_class": "DonutProcessor",
422
  "sep_token": "</s>",
423
  "sp_model_kwargs": {},
 
424
  "tokenizer_class": "XLMRobertaTokenizer",
 
 
425
  "unk_token": "<unk>"
426
  }
 
416
  "cls_token": "<s>",
417
  "eos_token": "</s>",
418
  "mask_token": "<mask>",
419
+ "max_length": 512,
420
  "model_max_length": 1000000000000000019884624838656,
421
+ "pad_to_multiple_of": null,
422
  "pad_token": "<pad>",
423
+ "pad_token_type_id": 0,
424
+ "padding_side": "right",
425
  "processor_class": "DonutProcessor",
426
  "sep_token": "</s>",
427
  "sp_model_kwargs": {},
428
+ "stride": 0,
429
  "tokenizer_class": "XLMRobertaTokenizer",
430
+ "truncation_side": "right",
431
+ "truncation_strategy": "longest_first",
432
  "unk_token": "<unk>"
433
  }