zzhang1987 commited on
Commit
c6e367b
·
verified ·
1 Parent(s): ef5fd58

Model save

Browse files
README.md ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: Qwen/Qwen2.5-VL-3B-Instruct
3
+ library_name: transformers
4
+ model_name: Qwen2.5-VL-3B-Instruct-Open-R1-Distill
5
+ tags:
6
+ - generated_from_trainer
7
+ - trl
8
+ - grpo
9
+ licence: license
10
+ ---
11
+
12
+ # Model Card for Qwen2.5-VL-3B-Instruct-Open-R1-Distill
13
+
14
+ This model is a fine-tuned version of [Qwen/Qwen2.5-VL-3B-Instruct](https://huggingface.co/Qwen/Qwen2.5-VL-3B-Instruct).
15
+ It has been trained using [TRL](https://github.com/huggingface/trl).
16
+
17
+ ## Quick start
18
+
19
+ ```python
20
+ from transformers import pipeline
21
+
22
+ question = "If you had a time machine, but could only go to the past or the future once and never return, which would you choose and why?"
23
+ generator = pipeline("text-generation", model="zzhang1987/Qwen2.5-VL-3B-Instruct-Open-R1-Distill", device="cuda")
24
+ output = generator([{"role": "user", "content": question}], max_new_tokens=128, return_full_text=False)[0]
25
+ print(output["generated_text"])
26
+ ```
27
+
28
+ ## Training procedure
29
+
30
+ [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/causalai/huggingface/runs/wpz01iog)
31
+
32
+
33
+ This model was trained with GRPO, a method introduced in [DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models](https://huggingface.co/papers/2402.03300).
34
+
35
+ ### Framework versions
36
+
37
+ - TRL: 0.15.0.dev0
38
+ - Transformers: 4.49.0.dev0
39
+ - Pytorch: 2.5.1
40
+ - Datasets: 3.2.0
41
+ - Tokenizers: 0.21.0
42
+
43
+ ## Citations
44
+
45
+ Cite GRPO as:
46
+
47
+ ```bibtex
48
+ @article{zhihong2024deepseekmath,
49
+ title = {{DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models}},
50
+ author = {Zhihong Shao and Peiyi Wang and Qihao Zhu and Runxin Xu and Junxiao Song and Mingchuan Zhang and Y. K. Li and Y. Wu and Daya Guo},
51
+ year = 2024,
52
+ eprint = {arXiv:2402.03300},
53
+ }
54
+
55
+ ```
56
+
57
+ Cite TRL as:
58
+
59
+ ```bibtex
60
+ @misc{vonwerra2022trl,
61
+ title = {{TRL: Transformer Reinforcement Learning}},
62
+ author = {Leandro von Werra and Younes Belkada and Lewis Tunstall and Edward Beeching and Tristan Thrush and Nathan Lambert and Shengyi Huang and Kashif Rasul and Quentin Gallouédec},
63
+ year = 2020,
64
+ journal = {GitHub repository},
65
+ publisher = {GitHub},
66
+ howpublished = {\url{https://github.com/huggingface/trl}}
67
+ }
68
+ ```
all_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "total_flos": 0.0,
3
+ "train_loss": 0.8263391004140047,
4
+ "train_runtime": 308929.2472,
5
+ "train_samples": 17056,
6
+ "train_samples_per_second": 0.055,
7
+ "train_steps_per_second": 0.0
8
+ }
generation_config.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "attn_implementation": "flash_attention_2",
3
+ "bos_token_id": 151643,
4
+ "do_sample": true,
5
+ "eos_token_id": [
6
+ 151645,
7
+ 151643
8
+ ],
9
+ "pad_token_id": 151643,
10
+ "repetition_penalty": 1.05,
11
+ "temperature": 0.1,
12
+ "top_k": 1,
13
+ "top_p": 0.001,
14
+ "transformers_version": "4.49.0.dev0",
15
+ "use_cache": false
16
+ }
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "total_flos": 0.0,
3
+ "train_loss": 0.8263391004140047,
4
+ "train_runtime": 308929.2472,
5
+ "train_samples": 17056,
6
+ "train_samples_per_second": 0.055,
7
+ "train_steps_per_second": 0.0
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,428 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 0.99812382739212,
5
+ "eval_steps": 100,
6
+ "global_step": 133,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "completion_length": 108.8546875,
13
+ "epoch": 0.0375234521575985,
14
+ "grad_norm": 4.688192367553711,
15
+ "kl": 0.0056383728981018065,
16
+ "learning_rate": 7.1428571428571436e-06,
17
+ "loss": 0.0002,
18
+ "reward": 0.059375,
19
+ "reward_std": 0.10496962703764438,
20
+ "rewards/accuracy_reward": 0.01328125,
21
+ "rewards/format_reward": 0.03203125,
22
+ "rewards/relaxed_accuracy_reward": 0.0140625,
23
+ "step": 5
24
+ },
25
+ {
26
+ "completion_length": 86.25859375,
27
+ "epoch": 0.075046904315197,
28
+ "grad_norm": 4.734835147857666,
29
+ "kl": 0.27505035400390626,
30
+ "learning_rate": 1.4285714285714287e-05,
31
+ "loss": 0.011,
32
+ "reward": 0.79375,
33
+ "reward_std": 0.549490630812943,
34
+ "rewards/accuracy_reward": 0.093359375,
35
+ "rewards/format_reward": 0.59375,
36
+ "rewards/relaxed_accuracy_reward": 0.106640625,
37
+ "step": 10
38
+ },
39
+ {
40
+ "completion_length": 98.770703125,
41
+ "epoch": 0.1125703564727955,
42
+ "grad_norm": 1.7818490266799927,
43
+ "kl": 0.7628662109375,
44
+ "learning_rate": 1.9996515418688493e-05,
45
+ "loss": 0.0305,
46
+ "reward": 1.2640625,
47
+ "reward_std": 0.44707948826253413,
48
+ "rewards/accuracy_reward": 0.166015625,
49
+ "rewards/format_reward": 0.895703125,
50
+ "rewards/relaxed_accuracy_reward": 0.20234375,
51
+ "step": 15
52
+ },
53
+ {
54
+ "completion_length": 71.719140625,
55
+ "epoch": 0.150093808630394,
56
+ "grad_norm": 2.765423059463501,
57
+ "kl": 509.6938110351563,
58
+ "learning_rate": 1.9874809871741877e-05,
59
+ "loss": 20.421,
60
+ "reward": 1.01875,
61
+ "reward_std": 0.36167241632938385,
62
+ "rewards/accuracy_reward": 0.122265625,
63
+ "rewards/format_reward": 0.748046875,
64
+ "rewards/relaxed_accuracy_reward": 0.1484375,
65
+ "step": 20
66
+ },
67
+ {
68
+ "completion_length": 142.440625,
69
+ "epoch": 0.18761726078799248,
70
+ "grad_norm": 1.2772337198257446,
71
+ "kl": 0.4162109375,
72
+ "learning_rate": 1.9581296124106682e-05,
73
+ "loss": 0.0166,
74
+ "reward": 1.319921875,
75
+ "reward_std": 0.4763633420690894,
76
+ "rewards/accuracy_reward": 0.186328125,
77
+ "rewards/format_reward": 0.8890625,
78
+ "rewards/relaxed_accuracy_reward": 0.24453125,
79
+ "step": 25
80
+ },
81
+ {
82
+ "completion_length": 71.17890625,
83
+ "epoch": 0.225140712945591,
84
+ "grad_norm": 1.5595391988754272,
85
+ "kl": 0.44217529296875,
86
+ "learning_rate": 1.912108091398988e-05,
87
+ "loss": 0.0177,
88
+ "reward": 1.47109375,
89
+ "reward_std": 0.35589357279241085,
90
+ "rewards/accuracy_reward": 0.225,
91
+ "rewards/format_reward": 0.969140625,
92
+ "rewards/relaxed_accuracy_reward": 0.276953125,
93
+ "step": 30
94
+ },
95
+ {
96
+ "completion_length": 82.559375,
97
+ "epoch": 0.2626641651031895,
98
+ "grad_norm": 2.5247058868408203,
99
+ "kl": 7.60645751953125,
100
+ "learning_rate": 1.8502171357296144e-05,
101
+ "loss": 0.304,
102
+ "reward": 1.445703125,
103
+ "reward_std": 0.3477139575406909,
104
+ "rewards/accuracy_reward": 0.2125,
105
+ "rewards/format_reward": 0.95234375,
106
+ "rewards/relaxed_accuracy_reward": 0.280859375,
107
+ "step": 35
108
+ },
109
+ {
110
+ "completion_length": 153.034375,
111
+ "epoch": 0.300187617260788,
112
+ "grad_norm": 0.7359657287597656,
113
+ "kl": 0.399249267578125,
114
+ "learning_rate": 1.773533563475053e-05,
115
+ "loss": 0.016,
116
+ "reward": 1.30703125,
117
+ "reward_std": 0.4166026232764125,
118
+ "rewards/accuracy_reward": 0.17265625,
119
+ "rewards/format_reward": 0.919921875,
120
+ "rewards/relaxed_accuracy_reward": 0.214453125,
121
+ "step": 40
122
+ },
123
+ {
124
+ "completion_length": 75.854296875,
125
+ "epoch": 0.33771106941838647,
126
+ "grad_norm": 1.5962355136871338,
127
+ "kl": 0.36080322265625,
128
+ "learning_rate": 1.6833915640265485e-05,
129
+ "loss": 0.0144,
130
+ "reward": 1.375390625,
131
+ "reward_std": 0.3114532470703125,
132
+ "rewards/accuracy_reward": 0.178125,
133
+ "rewards/format_reward": 0.9796875,
134
+ "rewards/relaxed_accuracy_reward": 0.217578125,
135
+ "step": 45
136
+ },
137
+ {
138
+ "completion_length": 136.20546875,
139
+ "epoch": 0.37523452157598497,
140
+ "grad_norm": 0.9903507232666016,
141
+ "kl": 0.46458740234375,
142
+ "learning_rate": 1.58135948502146e-05,
143
+ "loss": 0.0186,
144
+ "reward": 1.396484375,
145
+ "reward_std": 0.36093369908630846,
146
+ "rewards/accuracy_reward": 0.205078125,
147
+ "rewards/format_reward": 0.9546875,
148
+ "rewards/relaxed_accuracy_reward": 0.23671875,
149
+ "step": 50
150
+ },
151
+ {
152
+ "completion_length": 233.16171875,
153
+ "epoch": 0.41275797373358347,
154
+ "grad_norm": 16.861698150634766,
155
+ "kl": 2.68153076171875,
156
+ "learning_rate": 1.4692125452370664e-05,
157
+ "loss": 0.1073,
158
+ "reward": 1.103125,
159
+ "reward_std": 0.5997822197154165,
160
+ "rewards/accuracy_reward": 0.1609375,
161
+ "rewards/format_reward": 0.74921875,
162
+ "rewards/relaxed_accuracy_reward": 0.19296875,
163
+ "step": 55
164
+ },
165
+ {
166
+ "completion_length": 195.894140625,
167
+ "epoch": 0.450281425891182,
168
+ "grad_norm": 1.7692217826843262,
169
+ "kl": 1.99765625,
170
+ "learning_rate": 1.348901948209167e-05,
171
+ "loss": 0.0799,
172
+ "reward": 1.21171875,
173
+ "reward_std": 0.6390306279063225,
174
+ "rewards/accuracy_reward": 0.188671875,
175
+ "rewards/format_reward": 0.782421875,
176
+ "rewards/relaxed_accuracy_reward": 0.240625,
177
+ "step": 60
178
+ },
179
+ {
180
+ "completion_length": 120.77265625,
181
+ "epoch": 0.4878048780487805,
182
+ "grad_norm": 24.82339859008789,
183
+ "kl": 2.957373046875,
184
+ "learning_rate": 1.2225209339563144e-05,
185
+ "loss": 0.1184,
186
+ "reward": 1.422265625,
187
+ "reward_std": 0.4825815072283149,
188
+ "rewards/accuracy_reward": 0.2375,
189
+ "rewards/format_reward": 0.893359375,
190
+ "rewards/relaxed_accuracy_reward": 0.29140625,
191
+ "step": 65
192
+ },
193
+ {
194
+ "completion_length": 133.466015625,
195
+ "epoch": 0.525328330206379,
196
+ "grad_norm": 10.857051849365234,
197
+ "kl": 2.94249267578125,
198
+ "learning_rate": 1.092268359463302e-05,
199
+ "loss": 0.1177,
200
+ "reward": 1.394921875,
201
+ "reward_std": 0.4191150635480881,
202
+ "rewards/accuracy_reward": 0.21953125,
203
+ "rewards/format_reward": 0.887109375,
204
+ "rewards/relaxed_accuracy_reward": 0.28828125,
205
+ "step": 70
206
+ },
207
+ {
208
+ "completion_length": 106.87109375,
209
+ "epoch": 0.5628517823639775,
210
+ "grad_norm": 4.481828689575195,
211
+ "kl": 1.958642578125,
212
+ "learning_rate": 9.604104415737309e-06,
213
+ "loss": 0.0783,
214
+ "reward": 1.502734375,
215
+ "reward_std": 0.4382351119071245,
216
+ "rewards/accuracy_reward": 0.255859375,
217
+ "rewards/format_reward": 0.918359375,
218
+ "rewards/relaxed_accuracy_reward": 0.328515625,
219
+ "step": 75
220
+ },
221
+ {
222
+ "completion_length": 54.36171875,
223
+ "epoch": 0.600375234521576,
224
+ "grad_norm": 9.72681999206543,
225
+ "kl": 1.2845703125,
226
+ "learning_rate": 8.292413279130625e-06,
227
+ "loss": 0.0514,
228
+ "reward": 1.551953125,
229
+ "reward_std": 0.2952204098924994,
230
+ "rewards/accuracy_reward": 0.25,
231
+ "rewards/format_reward": 0.973046875,
232
+ "rewards/relaxed_accuracy_reward": 0.32890625,
233
+ "step": 80
234
+ },
235
+ {
236
+ "completion_length": 61.505859375,
237
+ "epoch": 0.6378986866791745,
238
+ "grad_norm": 3.227721929550171,
239
+ "kl": 1.57088623046875,
240
+ "learning_rate": 7.010431818542298e-06,
241
+ "loss": 0.0629,
242
+ "reward": 1.4375,
243
+ "reward_std": 0.2820946782827377,
244
+ "rewards/accuracy_reward": 0.2046875,
245
+ "rewards/format_reward": 0.96640625,
246
+ "rewards/relaxed_accuracy_reward": 0.26640625,
247
+ "step": 85
248
+ },
249
+ {
250
+ "completion_length": 84.333984375,
251
+ "epoch": 0.6754221388367729,
252
+ "grad_norm": 1.625657558441162,
253
+ "kl": 1.31497802734375,
254
+ "learning_rate": 5.780464759928623e-06,
255
+ "loss": 0.0526,
256
+ "reward": 1.430078125,
257
+ "reward_std": 0.3262003194540739,
258
+ "rewards/accuracy_reward": 0.215625,
259
+ "rewards/format_reward": 0.948828125,
260
+ "rewards/relaxed_accuracy_reward": 0.265625,
261
+ "step": 90
262
+ },
263
+ {
264
+ "completion_length": 115.9625,
265
+ "epoch": 0.7129455909943715,
266
+ "grad_norm": 2.6035006046295166,
267
+ "kl": 1.67415771484375,
268
+ "learning_rate": 4.623911849714226e-06,
269
+ "loss": 0.067,
270
+ "reward": 1.365234375,
271
+ "reward_std": 0.3999515950679779,
272
+ "rewards/accuracy_reward": 0.196484375,
273
+ "rewards/format_reward": 0.930078125,
274
+ "rewards/relaxed_accuracy_reward": 0.238671875,
275
+ "step": 95
276
+ },
277
+ {
278
+ "completion_length": 138.86328125,
279
+ "epoch": 0.7504690431519699,
280
+ "grad_norm": 5.155971527099609,
281
+ "kl": 1.94195556640625,
282
+ "learning_rate": 3.560895528440844e-06,
283
+ "loss": 0.0777,
284
+ "reward": 1.417578125,
285
+ "reward_std": 0.38890475425869225,
286
+ "rewards/accuracy_reward": 0.22578125,
287
+ "rewards/format_reward": 0.91796875,
288
+ "rewards/relaxed_accuracy_reward": 0.273828125,
289
+ "step": 100
290
+ },
291
+ {
292
+ "epoch": 0.7504690431519699,
293
+ "eval_completion_length": 102.29363066825776,
294
+ "eval_kl": 1.1877522980683175,
295
+ "eval_loss": 0.04750162735581398,
296
+ "eval_reward": 2.961142601431981,
297
+ "eval_reward_std": 0.07158712283452541,
298
+ "eval_rewards/accuracy_reward": 1.0,
299
+ "eval_rewards/format_reward": 0.9611426014319809,
300
+ "eval_rewards/relaxed_accuracy_reward": 1.0,
301
+ "eval_runtime": 25687.3005,
302
+ "eval_samples_per_second": 0.261,
303
+ "eval_steps_per_second": 0.065,
304
+ "step": 100
305
+ },
306
+ {
307
+ "completion_length": 100.34609375,
308
+ "epoch": 0.7879924953095685,
309
+ "grad_norm": 1.6533355712890625,
310
+ "kl": 0.996075439453125,
311
+ "learning_rate": 2.6099108277934105e-06,
312
+ "loss": 0.0399,
313
+ "reward": 1.522265625,
314
+ "reward_std": 0.3057616015896201,
315
+ "rewards/accuracy_reward": 0.2515625,
316
+ "rewards/format_reward": 0.96875,
317
+ "rewards/relaxed_accuracy_reward": 0.301953125,
318
+ "step": 105
319
+ },
320
+ {
321
+ "completion_length": 112.546484375,
322
+ "epoch": 0.8255159474671669,
323
+ "grad_norm": 3.2183730602264404,
324
+ "kl": 1.140087890625,
325
+ "learning_rate": 1.7875035823168641e-06,
326
+ "loss": 0.0456,
327
+ "reward": 1.494140625,
328
+ "reward_std": 0.3347205650061369,
329
+ "rewards/accuracy_reward": 0.250390625,
330
+ "rewards/format_reward": 0.95234375,
331
+ "rewards/relaxed_accuracy_reward": 0.29140625,
332
+ "step": 110
333
+ },
334
+ {
335
+ "completion_length": 126.648828125,
336
+ "epoch": 0.8630393996247655,
337
+ "grad_norm": 1.3182932138442993,
338
+ "kl": 1.543743896484375,
339
+ "learning_rate": 1.1079825545001887e-06,
340
+ "loss": 0.0617,
341
+ "reward": 1.470703125,
342
+ "reward_std": 0.3658630719408393,
343
+ "rewards/accuracy_reward": 0.246875,
344
+ "rewards/format_reward": 0.93125,
345
+ "rewards/relaxed_accuracy_reward": 0.292578125,
346
+ "step": 115
347
+ },
348
+ {
349
+ "completion_length": 119.1421875,
350
+ "epoch": 0.900562851782364,
351
+ "grad_norm": 0.7645585536956787,
352
+ "kl": 1.223272705078125,
353
+ "learning_rate": 5.831704818578842e-07,
354
+ "loss": 0.0489,
355
+ "reward": 1.36796875,
356
+ "reward_std": 0.33250761572271587,
357
+ "rewards/accuracy_reward": 0.187109375,
358
+ "rewards/format_reward": 0.941796875,
359
+ "rewards/relaxed_accuracy_reward": 0.2390625,
360
+ "step": 120
361
+ },
362
+ {
363
+ "completion_length": 116.8734375,
364
+ "epoch": 0.9380863039399625,
365
+ "grad_norm": 2.7149899005889893,
366
+ "kl": 1.196661376953125,
367
+ "learning_rate": 2.2219837744959284e-07,
368
+ "loss": 0.0479,
369
+ "reward": 1.36953125,
370
+ "reward_std": 0.33634590823203325,
371
+ "rewards/accuracy_reward": 0.184765625,
372
+ "rewards/format_reward": 0.946875,
373
+ "rewards/relaxed_accuracy_reward": 0.237890625,
374
+ "step": 125
375
+ },
376
+ {
377
+ "completion_length": 111.841015625,
378
+ "epoch": 0.975609756097561,
379
+ "grad_norm": 1.0174047946929932,
380
+ "kl": 1.1353759765625,
381
+ "learning_rate": 3.134666272774034e-08,
382
+ "loss": 0.0454,
383
+ "reward": 1.441796875,
384
+ "reward_std": 0.32110827695578337,
385
+ "rewards/accuracy_reward": 0.21953125,
386
+ "rewards/format_reward": 0.951953125,
387
+ "rewards/relaxed_accuracy_reward": 0.2703125,
388
+ "step": 130
389
+ },
390
+ {
391
+ "completion_length": 112.2109375,
392
+ "epoch": 0.99812382739212,
393
+ "kl": 1.1811625162760417,
394
+ "reward": 1.55078125,
395
+ "reward_std": 0.3744206676880519,
396
+ "rewards/accuracy_reward": 0.2643229166666667,
397
+ "rewards/format_reward": 0.953125,
398
+ "rewards/relaxed_accuracy_reward": 0.3333333333333333,
399
+ "step": 133,
400
+ "total_flos": 0.0,
401
+ "train_loss": 0.8263391004140047,
402
+ "train_runtime": 308929.2472,
403
+ "train_samples_per_second": 0.055,
404
+ "train_steps_per_second": 0.0
405
+ }
406
+ ],
407
+ "logging_steps": 5,
408
+ "max_steps": 133,
409
+ "num_input_tokens_seen": 0,
410
+ "num_train_epochs": 1,
411
+ "save_steps": 30,
412
+ "stateful_callbacks": {
413
+ "TrainerControl": {
414
+ "args": {
415
+ "should_epoch_stop": false,
416
+ "should_evaluate": false,
417
+ "should_log": false,
418
+ "should_save": true,
419
+ "should_training_stop": true
420
+ },
421
+ "attributes": {}
422
+ }
423
+ },
424
+ "total_flos": 0.0,
425
+ "train_batch_size": 1,
426
+ "trial_name": null,
427
+ "trial_params": null
428
+ }