Upload folder using huggingface_hub
Browse files- args.json +7 -7
- config.json +1 -1
- model-00001-of-00004.safetensors +3 -0
- model-00002-of-00004.safetensors +3 -0
- model-00003-of-00004.safetensors +3 -0
- model-00004-of-00004.safetensors +3 -0
args.json
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
{
|
2 |
-
"batch_size":
|
3 |
-
"accum_iter":
|
4 |
"epochs": 2,
|
5 |
"warmup_epochs": 0.001,
|
6 |
"lr": 0.0002,
|
@@ -8,25 +8,25 @@
|
|
8 |
"wd": 0.1,
|
9 |
"clip_grad": 4.0,
|
10 |
"init_from": null,
|
11 |
-
"data_config": "/mnt/petrelfs/xinyi/
|
12 |
"cache_ann_on_disk": true,
|
13 |
"length_clustering": true,
|
14 |
"num_workers": 16,
|
15 |
"pin_mem": true,
|
16 |
"seed": 0,
|
17 |
-
"output_dir": "output/Lumina-MDDM-8B-Joint-bs1024-lr2e-4-randomm01_mask-pretraining-
|
18 |
"save_interval": 1,
|
19 |
-
"save_iteration_interval":
|
20 |
"only_save_trainable": false,
|
21 |
"ckpt_max_keep": 2,
|
22 |
"auto_resume": true,
|
23 |
-
"resume_path": "/mnt/petrelfs/xinyi/
|
24 |
"model_parallel_size": 1,
|
25 |
"data_parallel": "sdp",
|
26 |
"precision": "bf16",
|
27 |
"grad_precision": "fp32",
|
28 |
"checkpointing": false,
|
29 |
-
"max_seq_len":
|
30 |
"mask_image_logits": false,
|
31 |
"dropout": 0.05,
|
32 |
"z_loss_weight": 1e-05,
|
|
|
1 |
{
|
2 |
+
"batch_size": 4,
|
3 |
+
"accum_iter": 4,
|
4 |
"epochs": 2,
|
5 |
"warmup_epochs": 0.001,
|
6 |
"lr": 0.0002,
|
|
|
8 |
"wd": 0.1,
|
9 |
"clip_grad": 4.0,
|
10 |
"init_from": null,
|
11 |
+
"data_config": "/mnt/petrelfs/xinyi/synbol_code/Lumina-MDDM/configs/data.yaml",
|
12 |
"cache_ann_on_disk": true,
|
13 |
"length_clustering": true,
|
14 |
"num_workers": 16,
|
15 |
"pin_mem": true,
|
16 |
"seed": 0,
|
17 |
+
"output_dir": "output/Lumina-MDDM-8B-Joint-bs1024-lr2e-4-randomm01_mask-pretraining-stage2-512-T",
|
18 |
"save_interval": 1,
|
19 |
+
"save_iteration_interval": 5000,
|
20 |
"only_save_trainable": false,
|
21 |
"ckpt_max_keep": 2,
|
22 |
"auto_resume": true,
|
23 |
+
"resume_path": "/mnt/petrelfs/xinyi/synbol_code/Lumina-MDDM/output/Lumina-MDDM-8B-Joint-bs1024-lr2e-4-randomm01_mask-pretraining-stage2/epoch0-iter59999",
|
24 |
"model_parallel_size": 1,
|
25 |
"data_parallel": "sdp",
|
26 |
"precision": "bf16",
|
27 |
"grad_precision": "fp32",
|
28 |
"checkpointing": false,
|
29 |
+
"max_seq_len": 2048,
|
30 |
"mask_image_logits": false,
|
31 |
"dropout": 0.05,
|
32 |
"z_loss_weight": 1e-05,
|
config.json
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
{
|
2 |
-
"_name_or_path": "/mnt/petrelfs/xinyi/
|
3 |
"activation_type": "silu",
|
4 |
"alibi": false,
|
5 |
"alibi_bias_max": 8.0,
|
|
|
1 |
{
|
2 |
+
"_name_or_path": "/mnt/petrelfs/xinyi/synbol_code/Lumina-MDDM/output/Lumina-MDDM-8B-Joint-bs1024-lr2e-4-randomm01_mask-pretraining-stage2/epoch0-iter59999",
|
3 |
"activation_type": "silu",
|
4 |
"alibi": false,
|
5 |
"alibi_bias_max": 8.0,
|
model-00001-of-00004.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4328276ab22bead0feb684bce6fb0edf965ec53845ab7810ff5ecbc2349885a5
|
3 |
+
size 4927587832
|
model-00002-of-00004.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6be25d7f27214ccbeb3cdb2179e26a9cbfac05b5c739fba8a16ed9b4d371a3f6
|
3 |
+
size 4932693832
|
model-00003-of-00004.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:34cd45fe298c104e0f21f1c6a83b95a7b98a5a91a8e8fe1254b0ee505db81955
|
3 |
+
size 4999819560
|
model-00004-of-00004.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6eb3e4dc080813b648cf501a600352fe60faa87529f7423e0a245d01f3b85976
|
3 |
+
size 1303544192
|