emisilab commited on
Commit
b468de9
·
verified ·
1 Parent(s): d4e913d

End of training

Browse files
README.md CHANGED
@@ -40,7 +40,7 @@ The following hyperparameters were used during training:
40
  - seed: 42
41
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
42
  - lr_scheduler_type: linear
43
- - num_epochs: 5
44
  - mixed_precision_training: Native AMP
45
 
46
  ### Training results
@@ -49,7 +49,7 @@ The following hyperparameters were used during training:
49
 
50
  ### Framework versions
51
 
52
- - Transformers 4.39.0
53
  - Pytorch 2.2.1+cu121
54
  - Datasets 2.18.0
55
  - Tokenizers 0.15.2
 
40
  - seed: 42
41
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
42
  - lr_scheduler_type: linear
43
+ - num_epochs: 20
44
  - mixed_precision_training: Native AMP
45
 
46
  ### Training results
 
49
 
50
  ### Framework versions
51
 
52
+ - Transformers 4.39.1
53
  - Pytorch 2.2.1+cu121
54
  - Datasets 2.18.0
55
  - Tokenizers 0.15.2
generation_config.json CHANGED
@@ -4,5 +4,5 @@
4
  "eos_token_id": 2,
5
  "forced_eos_token_id": 2,
6
  "pad_token_id": 1,
7
- "transformers_version": "4.39.0"
8
  }
 
4
  "eos_token_id": 2,
5
  "forced_eos_token_id": 2,
6
  "pad_token_id": 1,
7
+ "transformers_version": "4.39.1"
8
  }
preprocessor_config.json CHANGED
@@ -37,8 +37,8 @@
37
  "processor_class": "DonutProcessor",
38
  "resample": 2,
39
  "rescale_factor": 0.00392156862745098,
40
- "size": {
41
- "height": 960,
42
- "width": 720
43
- }
44
  }
 
37
  "processor_class": "DonutProcessor",
38
  "resample": 2,
39
  "rescale_factor": 0.00392156862745098,
40
+ "size": [
41
+ 720,
42
+ 960
43
+ ]
44
  }
special_tokens_map.json CHANGED
@@ -1,45 +1,285 @@
1
  {
2
  "additional_special_tokens": [
3
- "<s_KTP_TGL_LAHIR>",
4
- "</s_KTP_TGL_LAHIR>",
5
- "<s_KTP_TEMPAT_LAHIR>",
6
- "</s_KTP_TEMPAT_LAHIR>",
7
- "<s_KTP_STATUS_PERKAWINAN>",
8
- "</s_KTP_STATUS_PERKAWINAN>",
9
- "<s_KTP_RT_RW>",
10
- "</s_KTP_RT_RW>",
11
- "<s_KTP_PROVINSI>",
12
- "</s_KTP_PROVINSI>",
13
- "<s_KTP_PEKERJAAN>",
14
- "</s_KTP_PEKERJAAN>",
15
- "<s_KTP_NIK>",
16
- "</s_KTP_NIK>",
17
- "<s_KTP_NAMA>",
18
- "</s_KTP_NAMA>",
19
- "<s_KTP_KEWARGANEGARAAN>",
20
- "</s_KTP_KEWARGANEGARAAN>",
21
- "<s_KTP_KELURAHAN>",
22
- "</s_KTP_KELURAHAN>",
23
- "<s_KTP_KECAMATAN>",
24
- "</s_KTP_KECAMATAN>",
25
- "<s_KTP_KABUPATEN_MADYA>",
26
- "</s_KTP_KABUPATEN_MADYA>",
27
- "<s_KTP_JENIS_KELAMIN>",
28
- "</s_KTP_JENIS_KELAMIN>",
29
- "<s_KTP_GOL_DARAH>",
30
- "</s_KTP_GOL_DARAH>",
31
- "<s_KTP_DIKELUARKAN_TGL>",
32
- "</s_KTP_DIKELUARKAN_TGL>",
33
- "<s_KTP_DIKELUARKAN_DI>",
34
- "</s_KTP_DIKELUARKAN_DI>",
35
- "<s_KTP_BERLAKU_HINGGA>",
36
- "</s_KTP_BERLAKU_HINGGA>",
37
- "<s_KTP_ALAMAT>",
38
- "</s_KTP_ALAMAT>",
39
- "<s_KTP_AGAMA>",
40
- "</s_KTP_AGAMA>",
41
- "<s>",
42
- "</s>"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  ],
44
  "bos_token": {
45
  "content": "<s>",
 
1
  {
2
  "additional_special_tokens": [
3
+ {
4
+ "content": "<s_KTP_TGL_LAHIR>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false
9
+ },
10
+ {
11
+ "content": "</s_KTP_TGL_LAHIR>",
12
+ "lstrip": false,
13
+ "normalized": false,
14
+ "rstrip": false,
15
+ "single_word": false
16
+ },
17
+ {
18
+ "content": "<s_KTP_TEMPAT_LAHIR>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ },
24
+ {
25
+ "content": "</s_KTP_TEMPAT_LAHIR>",
26
+ "lstrip": false,
27
+ "normalized": false,
28
+ "rstrip": false,
29
+ "single_word": false
30
+ },
31
+ {
32
+ "content": "<s_KTP_STATUS_PERKAWINAN>",
33
+ "lstrip": false,
34
+ "normalized": false,
35
+ "rstrip": false,
36
+ "single_word": false
37
+ },
38
+ {
39
+ "content": "</s_KTP_STATUS_PERKAWINAN>",
40
+ "lstrip": false,
41
+ "normalized": false,
42
+ "rstrip": false,
43
+ "single_word": false
44
+ },
45
+ {
46
+ "content": "<s_KTP_RT_RW>",
47
+ "lstrip": false,
48
+ "normalized": false,
49
+ "rstrip": false,
50
+ "single_word": false
51
+ },
52
+ {
53
+ "content": "</s_KTP_RT_RW>",
54
+ "lstrip": false,
55
+ "normalized": false,
56
+ "rstrip": false,
57
+ "single_word": false
58
+ },
59
+ {
60
+ "content": "<s_KTP_PROVINSI>",
61
+ "lstrip": false,
62
+ "normalized": false,
63
+ "rstrip": false,
64
+ "single_word": false
65
+ },
66
+ {
67
+ "content": "</s_KTP_PROVINSI>",
68
+ "lstrip": false,
69
+ "normalized": false,
70
+ "rstrip": false,
71
+ "single_word": false
72
+ },
73
+ {
74
+ "content": "<s_KTP_PEKERJAAN>",
75
+ "lstrip": false,
76
+ "normalized": false,
77
+ "rstrip": false,
78
+ "single_word": false
79
+ },
80
+ {
81
+ "content": "</s_KTP_PEKERJAAN>",
82
+ "lstrip": false,
83
+ "normalized": false,
84
+ "rstrip": false,
85
+ "single_word": false
86
+ },
87
+ {
88
+ "content": "<s_KTP_NIK>",
89
+ "lstrip": false,
90
+ "normalized": false,
91
+ "rstrip": false,
92
+ "single_word": false
93
+ },
94
+ {
95
+ "content": "</s_KTP_NIK>",
96
+ "lstrip": false,
97
+ "normalized": false,
98
+ "rstrip": false,
99
+ "single_word": false
100
+ },
101
+ {
102
+ "content": "<s_KTP_NAMA>",
103
+ "lstrip": false,
104
+ "normalized": false,
105
+ "rstrip": false,
106
+ "single_word": false
107
+ },
108
+ {
109
+ "content": "</s_KTP_NAMA>",
110
+ "lstrip": false,
111
+ "normalized": false,
112
+ "rstrip": false,
113
+ "single_word": false
114
+ },
115
+ {
116
+ "content": "<s_KTP_KEWARGANEGARAAN>",
117
+ "lstrip": false,
118
+ "normalized": false,
119
+ "rstrip": false,
120
+ "single_word": false
121
+ },
122
+ {
123
+ "content": "</s_KTP_KEWARGANEGARAAN>",
124
+ "lstrip": false,
125
+ "normalized": false,
126
+ "rstrip": false,
127
+ "single_word": false
128
+ },
129
+ {
130
+ "content": "<s_KTP_KELURAHAN>",
131
+ "lstrip": false,
132
+ "normalized": false,
133
+ "rstrip": false,
134
+ "single_word": false
135
+ },
136
+ {
137
+ "content": "</s_KTP_KELURAHAN>",
138
+ "lstrip": false,
139
+ "normalized": false,
140
+ "rstrip": false,
141
+ "single_word": false
142
+ },
143
+ {
144
+ "content": "<s_KTP_KECAMATAN>",
145
+ "lstrip": false,
146
+ "normalized": false,
147
+ "rstrip": false,
148
+ "single_word": false
149
+ },
150
+ {
151
+ "content": "</s_KTP_KECAMATAN>",
152
+ "lstrip": false,
153
+ "normalized": false,
154
+ "rstrip": false,
155
+ "single_word": false
156
+ },
157
+ {
158
+ "content": "<s_KTP_KABUPATEN_MADYA>",
159
+ "lstrip": false,
160
+ "normalized": false,
161
+ "rstrip": false,
162
+ "single_word": false
163
+ },
164
+ {
165
+ "content": "</s_KTP_KABUPATEN_MADYA>",
166
+ "lstrip": false,
167
+ "normalized": false,
168
+ "rstrip": false,
169
+ "single_word": false
170
+ },
171
+ {
172
+ "content": "<s_KTP_JENIS_KELAMIN>",
173
+ "lstrip": false,
174
+ "normalized": false,
175
+ "rstrip": false,
176
+ "single_word": false
177
+ },
178
+ {
179
+ "content": "</s_KTP_JENIS_KELAMIN>",
180
+ "lstrip": false,
181
+ "normalized": false,
182
+ "rstrip": false,
183
+ "single_word": false
184
+ },
185
+ {
186
+ "content": "<s_KTP_GOL_DARAH>",
187
+ "lstrip": false,
188
+ "normalized": false,
189
+ "rstrip": false,
190
+ "single_word": false
191
+ },
192
+ {
193
+ "content": "</s_KTP_GOL_DARAH>",
194
+ "lstrip": false,
195
+ "normalized": false,
196
+ "rstrip": false,
197
+ "single_word": false
198
+ },
199
+ {
200
+ "content": "<s_KTP_DIKELUARKAN_TGL>",
201
+ "lstrip": false,
202
+ "normalized": false,
203
+ "rstrip": false,
204
+ "single_word": false
205
+ },
206
+ {
207
+ "content": "</s_KTP_DIKELUARKAN_TGL>",
208
+ "lstrip": false,
209
+ "normalized": false,
210
+ "rstrip": false,
211
+ "single_word": false
212
+ },
213
+ {
214
+ "content": "<s_KTP_DIKELUARKAN_DI>",
215
+ "lstrip": false,
216
+ "normalized": false,
217
+ "rstrip": false,
218
+ "single_word": false
219
+ },
220
+ {
221
+ "content": "</s_KTP_DIKELUARKAN_DI>",
222
+ "lstrip": false,
223
+ "normalized": false,
224
+ "rstrip": false,
225
+ "single_word": false
226
+ },
227
+ {
228
+ "content": "<s_KTP_BERLAKU_HINGGA>",
229
+ "lstrip": false,
230
+ "normalized": false,
231
+ "rstrip": false,
232
+ "single_word": false
233
+ },
234
+ {
235
+ "content": "</s_KTP_BERLAKU_HINGGA>",
236
+ "lstrip": false,
237
+ "normalized": false,
238
+ "rstrip": false,
239
+ "single_word": false
240
+ },
241
+ {
242
+ "content": "<s_KTP_ALAMAT>",
243
+ "lstrip": false,
244
+ "normalized": false,
245
+ "rstrip": false,
246
+ "single_word": false
247
+ },
248
+ {
249
+ "content": "</s_KTP_ALAMAT>",
250
+ "lstrip": false,
251
+ "normalized": false,
252
+ "rstrip": false,
253
+ "single_word": false
254
+ },
255
+ {
256
+ "content": "<s_KTP_AGAMA>",
257
+ "lstrip": false,
258
+ "normalized": false,
259
+ "rstrip": false,
260
+ "single_word": false
261
+ },
262
+ {
263
+ "content": "</s_KTP_AGAMA>",
264
+ "lstrip": false,
265
+ "normalized": false,
266
+ "rstrip": false,
267
+ "single_word": false
268
+ },
269
+ {
270
+ "content": "<s>",
271
+ "lstrip": false,
272
+ "normalized": false,
273
+ "rstrip": false,
274
+ "single_word": false
275
+ },
276
+ {
277
+ "content": "</s>",
278
+ "lstrip": false,
279
+ "normalized": false,
280
+ "rstrip": false,
281
+ "single_word": false
282
+ }
283
  ],
284
  "bos_token": {
285
  "content": "<s>",
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json CHANGED
@@ -416,18 +416,11 @@
416
  "cls_token": "<s>",
417
  "eos_token": "</s>",
418
  "mask_token": "<mask>",
419
- "max_length": 512,
420
  "model_max_length": 1000000000000000019884624838656,
421
- "pad_to_multiple_of": null,
422
  "pad_token": "<pad>",
423
- "pad_token_type_id": 0,
424
- "padding_side": "right",
425
  "processor_class": "DonutProcessor",
426
  "sep_token": "</s>",
427
  "sp_model_kwargs": {},
428
- "stride": 0,
429
  "tokenizer_class": "XLMRobertaTokenizer",
430
- "truncation_side": "right",
431
- "truncation_strategy": "longest_first",
432
  "unk_token": "<unk>"
433
  }
 
416
  "cls_token": "<s>",
417
  "eos_token": "</s>",
418
  "mask_token": "<mask>",
 
419
  "model_max_length": 1000000000000000019884624838656,
 
420
  "pad_token": "<pad>",
 
 
421
  "processor_class": "DonutProcessor",
422
  "sep_token": "</s>",
423
  "sp_model_kwargs": {},
 
424
  "tokenizer_class": "XLMRobertaTokenizer",
 
 
425
  "unk_token": "<unk>"
426
  }