Ateeqq commited on
Commit
1d4d28e
·
verified ·
1 Parent(s): c3246b7

Upload 5 files

Browse files
config.json ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "SiglipForImageClassification"
4
+ ],
5
+ "id2label": {
6
+ "0": "graphically_violent",
7
+ "1": "nudity_pornography",
8
+ "2": "safe_normal"
9
+ },
10
+ "initializer_factor": 1.0,
11
+ "label2id": {
12
+ "graphically_violent": 0,
13
+ "nudity_pornography": 1,
14
+ "safe_normal": 2
15
+ },
16
+ "model_type": "siglip",
17
+ "problem_type": "single_label_classification",
18
+ "text_config": {
19
+ "attention_dropout": 0.0,
20
+ "hidden_act": "gelu_pytorch_tanh",
21
+ "hidden_size": 768,
22
+ "intermediate_size": 3072,
23
+ "layer_norm_eps": 1e-06,
24
+ "max_position_embeddings": 64,
25
+ "model_type": "siglip_text_model",
26
+ "num_attention_heads": 12,
27
+ "num_hidden_layers": 12,
28
+ "projection_size": 768,
29
+ "torch_dtype": "float32",
30
+ "vocab_size": 256000
31
+ },
32
+ "torch_dtype": "float32",
33
+ "transformers_version": "4.51.3",
34
+ "vision_config": {
35
+ "attention_dropout": 0.0,
36
+ "hidden_act": "gelu_pytorch_tanh",
37
+ "hidden_size": 768,
38
+ "image_size": 224,
39
+ "intermediate_size": 3072,
40
+ "layer_norm_eps": 1e-06,
41
+ "model_type": "siglip_vision_model",
42
+ "num_attention_heads": 12,
43
+ "num_channels": 3,
44
+ "num_hidden_layers": 12,
45
+ "patch_size": 16,
46
+ "torch_dtype": "float32"
47
+ }
48
+ }
nsfw-epochs-results.png ADDED
nsfw-training-results.png ADDED
preprocessor_config.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_convert_rgb": null,
3
+ "do_normalize": true,
4
+ "do_rescale": true,
5
+ "do_resize": true,
6
+ "image_mean": [
7
+ 0.5,
8
+ 0.5,
9
+ 0.5
10
+ ],
11
+ "image_processor_type": "SiglipImageProcessor",
12
+ "image_std": [
13
+ 0.5,
14
+ 0.5,
15
+ 0.5
16
+ ],
17
+ "processor_class": "SiglipProcessor",
18
+ "resample": 2,
19
+ "rescale_factor": 0.00392156862745098,
20
+ "size": {
21
+ "height": 224,
22
+ "width": 224
23
+ }
24
+ }
trainer_state.json ADDED
@@ -0,0 +1,271 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 306,
3
+ "best_metric": 0.9845288326300985,
4
+ "best_model_checkpoint": "siglip2-safety-classifier-gpu/checkpoint-306",
5
+ "epoch": 3.0,
6
+ "eval_steps": 500,
7
+ "global_step": 306,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.09803921568627451,
14
+ "grad_norm": 17.626625061035156,
15
+ "learning_rate": 2.5e-06,
16
+ "loss": 1.1335,
17
+ "step": 10
18
+ },
19
+ {
20
+ "epoch": 0.19607843137254902,
21
+ "grad_norm": 10.657720565795898,
22
+ "learning_rate": 5.2777777777777785e-06,
23
+ "loss": 0.7086,
24
+ "step": 20
25
+ },
26
+ {
27
+ "epoch": 0.29411764705882354,
28
+ "grad_norm": 6.507628440856934,
29
+ "learning_rate": 8.055555555555557e-06,
30
+ "loss": 0.2973,
31
+ "step": 30
32
+ },
33
+ {
34
+ "epoch": 0.39215686274509803,
35
+ "grad_norm": 19.603195190429688,
36
+ "learning_rate": 1.0833333333333334e-05,
37
+ "loss": 0.1598,
38
+ "step": 40
39
+ },
40
+ {
41
+ "epoch": 0.49019607843137253,
42
+ "grad_norm": 4.1803460121154785,
43
+ "learning_rate": 1.3611111111111113e-05,
44
+ "loss": 0.1714,
45
+ "step": 50
46
+ },
47
+ {
48
+ "epoch": 0.5882352941176471,
49
+ "grad_norm": 25.094463348388672,
50
+ "learning_rate": 1.638888888888889e-05,
51
+ "loss": 0.1481,
52
+ "step": 60
53
+ },
54
+ {
55
+ "epoch": 0.6862745098039216,
56
+ "grad_norm": 7.0236897468566895,
57
+ "learning_rate": 1.916666666666667e-05,
58
+ "loss": 0.1073,
59
+ "step": 70
60
+ },
61
+ {
62
+ "epoch": 0.7843137254901961,
63
+ "grad_norm": 11.48787784576416,
64
+ "learning_rate": 1.9781931464174458e-05,
65
+ "loss": 0.0816,
66
+ "step": 80
67
+ },
68
+ {
69
+ "epoch": 0.8823529411764706,
70
+ "grad_norm": 13.502949714660645,
71
+ "learning_rate": 1.947040498442368e-05,
72
+ "loss": 0.1205,
73
+ "step": 90
74
+ },
75
+ {
76
+ "epoch": 0.9803921568627451,
77
+ "grad_norm": 13.85872745513916,
78
+ "learning_rate": 1.91588785046729e-05,
79
+ "loss": 0.1086,
80
+ "step": 100
81
+ },
82
+ {
83
+ "epoch": 1.0,
84
+ "eval_accuracy": 0.9704641350210971,
85
+ "eval_loss": 0.08171376585960388,
86
+ "eval_runtime": 483.9596,
87
+ "eval_samples_per_second": 1.469,
88
+ "eval_steps_per_second": 0.012,
89
+ "step": 102
90
+ },
91
+ {
92
+ "epoch": 1.0784313725490196,
93
+ "grad_norm": 9.396234512329102,
94
+ "learning_rate": 1.8847352024922122e-05,
95
+ "loss": 0.0546,
96
+ "step": 110
97
+ },
98
+ {
99
+ "epoch": 1.1764705882352942,
100
+ "grad_norm": 3.7309114933013916,
101
+ "learning_rate": 1.853582554517134e-05,
102
+ "loss": 0.0574,
103
+ "step": 120
104
+ },
105
+ {
106
+ "epoch": 1.2745098039215685,
107
+ "grad_norm": 17.068557739257812,
108
+ "learning_rate": 1.8224299065420564e-05,
109
+ "loss": 0.0554,
110
+ "step": 130
111
+ },
112
+ {
113
+ "epoch": 1.3725490196078431,
114
+ "grad_norm": 3.3367063999176025,
115
+ "learning_rate": 1.7912772585669783e-05,
116
+ "loss": 0.0645,
117
+ "step": 140
118
+ },
119
+ {
120
+ "epoch": 1.4705882352941178,
121
+ "grad_norm": 10.730944633483887,
122
+ "learning_rate": 1.7601246105919006e-05,
123
+ "loss": 0.0565,
124
+ "step": 150
125
+ },
126
+ {
127
+ "epoch": 1.5686274509803921,
128
+ "grad_norm": 14.649934768676758,
129
+ "learning_rate": 1.7289719626168225e-05,
130
+ "loss": 0.083,
131
+ "step": 160
132
+ },
133
+ {
134
+ "epoch": 1.6666666666666665,
135
+ "grad_norm": 14.909839630126953,
136
+ "learning_rate": 1.6978193146417448e-05,
137
+ "loss": 0.0385,
138
+ "step": 170
139
+ },
140
+ {
141
+ "epoch": 1.7647058823529411,
142
+ "grad_norm": 3.265110731124878,
143
+ "learning_rate": 1.6666666666666667e-05,
144
+ "loss": 0.1152,
145
+ "step": 180
146
+ },
147
+ {
148
+ "epoch": 1.8627450980392157,
149
+ "grad_norm": 7.800755023956299,
150
+ "learning_rate": 1.635514018691589e-05,
151
+ "loss": 0.0867,
152
+ "step": 190
153
+ },
154
+ {
155
+ "epoch": 1.9607843137254903,
156
+ "grad_norm": 12.775182723999023,
157
+ "learning_rate": 1.604361370716511e-05,
158
+ "loss": 0.0415,
159
+ "step": 200
160
+ },
161
+ {
162
+ "epoch": 2.0,
163
+ "eval_accuracy": 0.9549929676511955,
164
+ "eval_loss": 0.12330988794565201,
165
+ "eval_runtime": 14.4147,
166
+ "eval_samples_per_second": 49.325,
167
+ "eval_steps_per_second": 0.416,
168
+ "step": 204
169
+ },
170
+ {
171
+ "epoch": 2.0588235294117645,
172
+ "grad_norm": 18.324607849121094,
173
+ "learning_rate": 1.5732087227414332e-05,
174
+ "loss": 0.0649,
175
+ "step": 210
176
+ },
177
+ {
178
+ "epoch": 2.156862745098039,
179
+ "grad_norm": 1.119167685508728,
180
+ "learning_rate": 1.5420560747663555e-05,
181
+ "loss": 0.0288,
182
+ "step": 220
183
+ },
184
+ {
185
+ "epoch": 2.2549019607843137,
186
+ "grad_norm": 13.742692947387695,
187
+ "learning_rate": 1.5109034267912774e-05,
188
+ "loss": 0.0569,
189
+ "step": 230
190
+ },
191
+ {
192
+ "epoch": 2.3529411764705883,
193
+ "grad_norm": 1.8996661901474,
194
+ "learning_rate": 1.4797507788161995e-05,
195
+ "loss": 0.0071,
196
+ "step": 240
197
+ },
198
+ {
199
+ "epoch": 2.450980392156863,
200
+ "grad_norm": 13.215072631835938,
201
+ "learning_rate": 1.4485981308411216e-05,
202
+ "loss": 0.0294,
203
+ "step": 250
204
+ },
205
+ {
206
+ "epoch": 2.549019607843137,
207
+ "grad_norm": 32.19113540649414,
208
+ "learning_rate": 1.4174454828660437e-05,
209
+ "loss": 0.0217,
210
+ "step": 260
211
+ },
212
+ {
213
+ "epoch": 2.6470588235294117,
214
+ "grad_norm": 7.469207286834717,
215
+ "learning_rate": 1.3862928348909658e-05,
216
+ "loss": 0.0194,
217
+ "step": 270
218
+ },
219
+ {
220
+ "epoch": 2.7450980392156863,
221
+ "grad_norm": 2.948065996170044,
222
+ "learning_rate": 1.355140186915888e-05,
223
+ "loss": 0.017,
224
+ "step": 280
225
+ },
226
+ {
227
+ "epoch": 2.843137254901961,
228
+ "grad_norm": 0.15147125720977783,
229
+ "learning_rate": 1.3239875389408102e-05,
230
+ "loss": 0.0186,
231
+ "step": 290
232
+ },
233
+ {
234
+ "epoch": 2.9411764705882355,
235
+ "grad_norm": 0.9559997320175171,
236
+ "learning_rate": 1.2928348909657323e-05,
237
+ "loss": 0.0302,
238
+ "step": 300
239
+ },
240
+ {
241
+ "epoch": 3.0,
242
+ "eval_accuracy": 0.9845288326300985,
243
+ "eval_loss": 0.05156884714961052,
244
+ "eval_runtime": 14.9504,
245
+ "eval_samples_per_second": 47.557,
246
+ "eval_steps_per_second": 0.401,
247
+ "step": 306
248
+ }
249
+ ],
250
+ "logging_steps": 10,
251
+ "max_steps": 714,
252
+ "num_input_tokens_seen": 0,
253
+ "num_train_epochs": 7,
254
+ "save_steps": 500,
255
+ "stateful_callbacks": {
256
+ "TrainerControl": {
257
+ "args": {
258
+ "should_epoch_stop": false,
259
+ "should_evaluate": false,
260
+ "should_log": false,
261
+ "should_save": true,
262
+ "should_training_stop": false
263
+ },
264
+ "attributes": {}
265
+ }
266
+ },
267
+ "total_flos": 1.6395274262481408e+18,
268
+ "train_batch_size": 64,
269
+ "trial_name": null,
270
+ "trial_params": null
271
+ }