Commit
·
5121da0
1
Parent(s):
438adad
train folder of 20240508 commit.
Browse files- 20240418-stage1-dance800/config.yaml +103 -0
- 20240422-stage1-ubc+td1/config.yaml +106 -0
- 20240423-stage1-ubc+td10/config.yaml +106 -0
- 20240425-stage2-openpg/config.yaml +92 -0
- 20240428-stage2-6k/config.yaml +97 -0
- 20240504-stage1-51k-raw-opg/config.yaml +105 -0
- 20240508-stage1-openpg-nopaf/checkpoint-68000/optimizer.bin +3 -0
- 20240508-stage1-openpg-nopaf/checkpoint-68000/pytorch_model.bin +3 -0
- 20240508-stage1-openpg-nopaf/checkpoint-68000/random_states_0.pkl +3 -0
- 20240508-stage1-openpg-nopaf/checkpoint-68000/scaler.pt +3 -0
- 20240508-stage1-openpg-nopaf/checkpoint-68000/scheduler.bin +3 -0
- 20240508-stage1-openpg-nopaf/checkpoint-69000/optimizer.bin +3 -0
- 20240508-stage1-openpg-nopaf/checkpoint-69000/pytorch_model.bin +3 -0
- 20240508-stage1-openpg-nopaf/checkpoint-69000/random_states_0.pkl +3 -0
- 20240508-stage1-openpg-nopaf/checkpoint-69000/scaler.pt +3 -0
- 20240508-stage1-openpg-nopaf/checkpoint-69000/scheduler.bin +3 -0
- 20240508-stage1-openpg-nopaf/config.yaml +108 -0
- 20240508-stage1-openpg-nopaf/denoising_unet-65381.pth +3 -0
- 20240508-stage1-openpg-nopaf/denoising_unet-66980.pth +3 -0
- 20240508-stage1-openpg-nopaf/denoising_unet-68579.pth +3 -0
- 20240508-stage1-openpg-nopaf/pose_guider-65381.pth +3 -0
- 20240508-stage1-openpg-nopaf/pose_guider-66980.pth +3 -0
- 20240508-stage1-openpg-nopaf/pose_guider-68579.pth +3 -0
- 20240508-stage1-openpg-nopaf/reference_unet-65381.pth +3 -0
- 20240508-stage1-openpg-nopaf/reference_unet-66980.pth +3 -0
- 20240508-stage1-openpg-nopaf/reference_unet-68579.pth +3 -0
- 20240510-stage1-9k/config.yaml +105 -0
- 20240513-stage2-9k/config.yaml +99 -0
20240418-stage1-dance800/config.yaml
ADDED
|
@@ -0,0 +1,103 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
base_model_path: ./pretrained_weights/sd-image-variations-diffusers
|
| 2 |
+
checkpointing_steps: 1000
|
| 3 |
+
controlnet_openpose_path: ./pretrained_weights/control_v11p_sd15_openpose/diffusion_pytorch_model.bin
|
| 4 |
+
data:
|
| 5 |
+
crop_scale:
|
| 6 |
+
- 0.6
|
| 7 |
+
- 1
|
| 8 |
+
do_center_crop: false
|
| 9 |
+
meta_paths:
|
| 10 |
+
- /workspace/develop/video/data/tiktok-dance/good-meta.json
|
| 11 |
+
ref_augment:
|
| 12 |
+
downsample:
|
| 13 |
+
min_scale_logit: -1.2
|
| 14 |
+
p: 0.3
|
| 15 |
+
pan:
|
| 16 |
+
- 0.04
|
| 17 |
+
- 0.02
|
| 18 |
+
rotate: 8
|
| 19 |
+
scale:
|
| 20 |
+
- 0.9
|
| 21 |
+
- 1.4
|
| 22 |
+
sample_margin: 30
|
| 23 |
+
train_bs: 4
|
| 24 |
+
train_height: 1152
|
| 25 |
+
train_width: 768
|
| 26 |
+
enable_zero_snr: true
|
| 27 |
+
exp_name: stage1-dance800
|
| 28 |
+
freeze_denoise: false
|
| 29 |
+
freeze_reference: false
|
| 30 |
+
image_encoder_path: ./pretrained_weights/sd-image-variations-diffusers/image_encoder
|
| 31 |
+
noise_offset: 0.05
|
| 32 |
+
noise_scheduler_kwargs:
|
| 33 |
+
beta_end: 0.012
|
| 34 |
+
beta_schedule: scaled_linear
|
| 35 |
+
beta_start: 0.00085
|
| 36 |
+
clip_sample: false
|
| 37 |
+
num_train_timesteps: 1000
|
| 38 |
+
steps_offset: 1
|
| 39 |
+
openpose_guider:
|
| 40 |
+
enable: false
|
| 41 |
+
output_dir: /workspace/camus/train
|
| 42 |
+
pose_guider_pretrain: true
|
| 43 |
+
resume_from_checkpoint: latest
|
| 44 |
+
save_dir: /workspace/camus/train/20240418-stage1-dance800/
|
| 45 |
+
save_model_epoch_interval: 1
|
| 46 |
+
seed: 12580
|
| 47 |
+
snr_gamma: 5.0
|
| 48 |
+
solver:
|
| 49 |
+
adam_beta1: 0.9
|
| 50 |
+
adam_beta2: 0.999
|
| 51 |
+
adam_epsilon: 1.0e-08
|
| 52 |
+
adam_weight_decay: 0.01
|
| 53 |
+
enable_xformers_memory_efficient_attention: true
|
| 54 |
+
gradient_accumulation_steps: 1
|
| 55 |
+
gradient_checkpointing: false
|
| 56 |
+
learning_rate: 1.0e-05
|
| 57 |
+
lr_scheduler: constant
|
| 58 |
+
lr_warmup_steps: 1
|
| 59 |
+
max_grad_norm: 1.0
|
| 60 |
+
max_train_steps: 30000
|
| 61 |
+
mixed_precision: fp16
|
| 62 |
+
scale_lr: false
|
| 63 |
+
use_8bit_adam: false
|
| 64 |
+
uncond_ratio: 0.1
|
| 65 |
+
vae_model_path: ./pretrained_weights/sd-vae-ft-mse
|
| 66 |
+
val:
|
| 67 |
+
special_steps:
|
| 68 |
+
- 24001
|
| 69 |
+
validation_steps: 1000
|
| 70 |
+
validation:
|
| 71 |
+
metric:
|
| 72 |
+
batch_size: 4
|
| 73 |
+
generated_frames:
|
| 74 |
+
- 16
|
| 75 |
+
- 45
|
| 76 |
+
- 98
|
| 77 |
+
- 150
|
| 78 |
+
- 188
|
| 79 |
+
- 220
|
| 80 |
+
- 268
|
| 81 |
+
- 284
|
| 82 |
+
guidance_scale: 2.4
|
| 83 |
+
ref_frame: 16
|
| 84 |
+
seed: 42
|
| 85 |
+
size:
|
| 86 |
+
- 768
|
| 87 |
+
- 1152
|
| 88 |
+
steps: 20
|
| 89 |
+
videos:
|
| 90 |
+
- configs/inference/metric/o4flk5RPE4D4fgNEUNFRZIbOBjCsEgB9DQQQLA.mp4
|
| 91 |
+
- configs/inference/metric/ocMJyAEDjQzjwqAuIPABAvmRyofjKiYhBExati.mp4
|
| 92 |
+
- configs/inference/metric/ocQoBObnUgBnVskCnPe41sYRiBcAFD5f8AN1Rg.mp4
|
| 93 |
+
- configs/inference/metric/oE2tJFpDWANbMe7cxAg3hoq0QAPNeRnCQefG8F.mp4
|
| 94 |
+
- configs/inference/metric/oEOgTIKvy7lAQIfZ37E5BFmCVBID3gIQUQfMdv.mp4
|
| 95 |
+
- configs/inference/metric/oEtwozJ6AoIBJ6oyK6rAAQGAOiEWIQixF2F2fB.mp4
|
| 96 |
+
- configs/inference/metric/oUGmQqeqoAAega5fGgnAICfOWJRAAFTJCgDibU.mp4
|
| 97 |
+
- configs/inference/metric/oYQJ5zLiEgG71SAAFEfAeVIEVFROW4ZGQJTfeF.mp4
|
| 98 |
+
pose_image_paths:
|
| 99 |
+
- configs/inference/pose_images/A1eEZvfJRUS/frame70.png
|
| 100 |
+
ref_image_paths:
|
| 101 |
+
- configs/inference/ref_images/anyone-3.png
|
| 102 |
+
- configs/inference/ref_images/anyone-11.png
|
| 103 |
+
weight_dtype: fp16
|
20240422-stage1-ubc+td1/config.yaml
ADDED
|
@@ -0,0 +1,106 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
base_model_path: ./pretrained_weights/sd-image-variations-diffusers
|
| 2 |
+
checkpointing_steps: 1000
|
| 3 |
+
controlnet_openpose_path: ./pretrained_weights/control_v11p_sd15_openpose/diffusion_pytorch_model.bin
|
| 4 |
+
data:
|
| 5 |
+
crop_scale:
|
| 6 |
+
- 0.8
|
| 7 |
+
- 1.2
|
| 8 |
+
do_center_crop: false
|
| 9 |
+
meta_paths:
|
| 10 |
+
- /workspace/develop/video/data/ubc_tiktok-dropout0.03/ubc-meta.json
|
| 11 |
+
- /workspace/develop/video/data/tiktok-dance/meta-1per_person.json
|
| 12 |
+
ref_augment:
|
| 13 |
+
downsample:
|
| 14 |
+
min_scale_logit: -1.2
|
| 15 |
+
p: 0.3
|
| 16 |
+
pan:
|
| 17 |
+
- 0.04
|
| 18 |
+
- 0.02
|
| 19 |
+
rotate: 8
|
| 20 |
+
scale:
|
| 21 |
+
- 0.9
|
| 22 |
+
- 1.2
|
| 23 |
+
sample_margin: 30
|
| 24 |
+
train_bs: 4
|
| 25 |
+
train_height: 1152
|
| 26 |
+
train_width: 768
|
| 27 |
+
enable_zero_snr: true
|
| 28 |
+
exp_name: stage1-ubc+td1
|
| 29 |
+
freeze_denoise: false
|
| 30 |
+
freeze_reference: false
|
| 31 |
+
image_encoder_path: ./pretrained_weights/sd-image-variations-diffusers/image_encoder
|
| 32 |
+
noise_offset: 0.05
|
| 33 |
+
noise_scheduler_kwargs:
|
| 34 |
+
beta_end: 0.012
|
| 35 |
+
beta_schedule: scaled_linear
|
| 36 |
+
beta_start: 0.00085
|
| 37 |
+
clip_sample: false
|
| 38 |
+
num_train_timesteps: 1000
|
| 39 |
+
steps_offset: 1
|
| 40 |
+
openpose_guider:
|
| 41 |
+
enable: false
|
| 42 |
+
output_dir: /workspace/camus/train
|
| 43 |
+
pose_guider_pretrain: true
|
| 44 |
+
resume_from_checkpoint: latest
|
| 45 |
+
save_dir: /workspace/camus/train/20240422-stage1-ubc+td1
|
| 46 |
+
save_model_epoch_interval: 1
|
| 47 |
+
seed: 12580
|
| 48 |
+
snr_gamma: 5.0
|
| 49 |
+
solver:
|
| 50 |
+
adam_beta1: 0.9
|
| 51 |
+
adam_beta2: 0.999
|
| 52 |
+
adam_epsilon: 1.0e-08
|
| 53 |
+
adam_weight_decay: 0.01
|
| 54 |
+
enable_xformers_memory_efficient_attention: true
|
| 55 |
+
gradient_accumulation_steps: 1
|
| 56 |
+
gradient_checkpointing: false
|
| 57 |
+
learning_rate: 1.0e-05
|
| 58 |
+
lr_scheduler: constant
|
| 59 |
+
lr_warmup_steps: 1
|
| 60 |
+
max_grad_norm: 1.0
|
| 61 |
+
max_train_steps: 30000
|
| 62 |
+
mixed_precision: fp16
|
| 63 |
+
scale_lr: false
|
| 64 |
+
use_8bit_adam: false
|
| 65 |
+
uncond_ratio: 0.1
|
| 66 |
+
vae_model_path: ./pretrained_weights/sd-vae-ft-mse
|
| 67 |
+
val:
|
| 68 |
+
validation_steps: 2000
|
| 69 |
+
validation:
|
| 70 |
+
metric:
|
| 71 |
+
batch_size: 4
|
| 72 |
+
generated_frames:
|
| 73 |
+
- 16
|
| 74 |
+
- 45
|
| 75 |
+
- 98
|
| 76 |
+
- 150
|
| 77 |
+
- 188
|
| 78 |
+
- 220
|
| 79 |
+
- 268
|
| 80 |
+
- 284
|
| 81 |
+
guidance_scale: 2.4
|
| 82 |
+
ref_frame: 16
|
| 83 |
+
seed: 42
|
| 84 |
+
size:
|
| 85 |
+
- 768
|
| 86 |
+
- 1152
|
| 87 |
+
steps: 20
|
| 88 |
+
videos:
|
| 89 |
+
- configs/inference/metric/o4flk5RPE4D4fgNEUNFRZIbOBjCsEgB9DQQQLA.mp4
|
| 90 |
+
- configs/inference/metric/ocMJyAEDjQzjwqAuIPABAvmRyofjKiYhBExati.mp4
|
| 91 |
+
- configs/inference/metric/ocQoBObnUgBnVskCnPe41sYRiBcAFD5f8AN1Rg.mp4
|
| 92 |
+
- configs/inference/metric/oE2tJFpDWANbMe7cxAg3hoq0QAPNeRnCQefG8F.mp4
|
| 93 |
+
- configs/inference/metric/oEOgTIKvy7lAQIfZ37E5BFmCVBID3gIQUQfMdv.mp4
|
| 94 |
+
- configs/inference/metric/oEtwozJ6AoIBJ6oyK6rAAQGAOiEWIQixF2F2fB.mp4
|
| 95 |
+
- configs/inference/metric/oUGmQqeqoAAega5fGgnAICfOWJRAAFTJCgDibU.mp4
|
| 96 |
+
- configs/inference/metric/oYQJ5zLiEgG71SAAFEfAeVIEVFROW4ZGQJTfeF.mp4
|
| 97 |
+
pose_image_paths:
|
| 98 |
+
- configs/inference/pose_images/A1eEZvfJRUS/frame70.png
|
| 99 |
+
- configs/inference/pose_images/A1eEZvfJRUS/frame150.png
|
| 100 |
+
- configs/inference/pose_images/A1eEZvfJRUS/frame190.png
|
| 101 |
+
ref_image_paths:
|
| 102 |
+
- configs/inference/ref_images/anyone-1.png
|
| 103 |
+
- configs/inference/ref_images/anyone-2.png
|
| 104 |
+
- configs/inference/ref_images/anyone-3.png
|
| 105 |
+
- configs/inference/ref_images/anyone-11.png
|
| 106 |
+
weight_dtype: fp16
|
20240423-stage1-ubc+td10/config.yaml
ADDED
|
@@ -0,0 +1,106 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
base_model_path: ./pretrained_weights/sd-image-variations-diffusers
|
| 2 |
+
checkpointing_steps: 1000
|
| 3 |
+
controlnet_openpose_path: ./pretrained_weights/control_v11p_sd15_openpose/diffusion_pytorch_model.bin
|
| 4 |
+
data:
|
| 5 |
+
crop_scale:
|
| 6 |
+
- 0.8
|
| 7 |
+
- 1.2
|
| 8 |
+
do_center_crop: false
|
| 9 |
+
meta_paths:
|
| 10 |
+
- /workspace/develop/video/data/ubc_tiktok-dropout0.03/ubc-meta.json
|
| 11 |
+
- /workspace/develop/video/data/tiktok-dance/meta-10per_person.json
|
| 12 |
+
ref_augment:
|
| 13 |
+
downsample:
|
| 14 |
+
min_scale_logit: -1.2
|
| 15 |
+
p: 0.3
|
| 16 |
+
pan:
|
| 17 |
+
- 0.04
|
| 18 |
+
- 0.02
|
| 19 |
+
rotate: 8
|
| 20 |
+
scale:
|
| 21 |
+
- 0.9
|
| 22 |
+
- 1.2
|
| 23 |
+
sample_margin: 30
|
| 24 |
+
train_bs: 4
|
| 25 |
+
train_height: 1152
|
| 26 |
+
train_width: 768
|
| 27 |
+
enable_zero_snr: true
|
| 28 |
+
exp_name: stage1-ubc+td10
|
| 29 |
+
freeze_denoise: false
|
| 30 |
+
freeze_reference: false
|
| 31 |
+
image_encoder_path: ./pretrained_weights/sd-image-variations-diffusers/image_encoder
|
| 32 |
+
noise_offset: 0.05
|
| 33 |
+
noise_scheduler_kwargs:
|
| 34 |
+
beta_end: 0.012
|
| 35 |
+
beta_schedule: scaled_linear
|
| 36 |
+
beta_start: 0.00085
|
| 37 |
+
clip_sample: false
|
| 38 |
+
num_train_timesteps: 1000
|
| 39 |
+
steps_offset: 1
|
| 40 |
+
openpose_guider:
|
| 41 |
+
enable: false
|
| 42 |
+
output_dir: /workspace/camus/train
|
| 43 |
+
pose_guider_pretrain: true
|
| 44 |
+
resume_from_checkpoint: latest
|
| 45 |
+
save_dir: /workspace/camus/train/20240423-stage1-ubc+td10
|
| 46 |
+
save_model_epoch_interval: 1
|
| 47 |
+
seed: 12580
|
| 48 |
+
snr_gamma: 5.0
|
| 49 |
+
solver:
|
| 50 |
+
adam_beta1: 0.9
|
| 51 |
+
adam_beta2: 0.999
|
| 52 |
+
adam_epsilon: 1.0e-08
|
| 53 |
+
adam_weight_decay: 0.01
|
| 54 |
+
enable_xformers_memory_efficient_attention: true
|
| 55 |
+
gradient_accumulation_steps: 1
|
| 56 |
+
gradient_checkpointing: false
|
| 57 |
+
learning_rate: 1.0e-05
|
| 58 |
+
lr_scheduler: constant
|
| 59 |
+
lr_warmup_steps: 1
|
| 60 |
+
max_grad_norm: 1.0
|
| 61 |
+
max_train_steps: 30000
|
| 62 |
+
mixed_precision: fp16
|
| 63 |
+
scale_lr: false
|
| 64 |
+
use_8bit_adam: false
|
| 65 |
+
uncond_ratio: 0.1
|
| 66 |
+
vae_model_path: ./pretrained_weights/sd-vae-ft-mse
|
| 67 |
+
val:
|
| 68 |
+
validation_steps: 2000
|
| 69 |
+
validation:
|
| 70 |
+
metric:
|
| 71 |
+
batch_size: 4
|
| 72 |
+
generated_frames:
|
| 73 |
+
- 16
|
| 74 |
+
- 45
|
| 75 |
+
- 98
|
| 76 |
+
- 150
|
| 77 |
+
- 188
|
| 78 |
+
- 220
|
| 79 |
+
- 268
|
| 80 |
+
- 284
|
| 81 |
+
guidance_scale: 2.4
|
| 82 |
+
ref_frame: 16
|
| 83 |
+
seed: 42
|
| 84 |
+
size:
|
| 85 |
+
- 768
|
| 86 |
+
- 1152
|
| 87 |
+
steps: 20
|
| 88 |
+
videos:
|
| 89 |
+
- configs/inference/metric/o4flk5RPE4D4fgNEUNFRZIbOBjCsEgB9DQQQLA.mp4
|
| 90 |
+
- configs/inference/metric/ocMJyAEDjQzjwqAuIPABAvmRyofjKiYhBExati.mp4
|
| 91 |
+
- configs/inference/metric/ocQoBObnUgBnVskCnPe41sYRiBcAFD5f8AN1Rg.mp4
|
| 92 |
+
- configs/inference/metric/oE2tJFpDWANbMe7cxAg3hoq0QAPNeRnCQefG8F.mp4
|
| 93 |
+
- configs/inference/metric/oEOgTIKvy7lAQIfZ37E5BFmCVBID3gIQUQfMdv.mp4
|
| 94 |
+
- configs/inference/metric/oEtwozJ6AoIBJ6oyK6rAAQGAOiEWIQixF2F2fB.mp4
|
| 95 |
+
- configs/inference/metric/oUGmQqeqoAAega5fGgnAICfOWJRAAFTJCgDibU.mp4
|
| 96 |
+
- configs/inference/metric/oYQJ5zLiEgG71SAAFEfAeVIEVFROW4ZGQJTfeF.mp4
|
| 97 |
+
pose_image_paths:
|
| 98 |
+
- configs/inference/pose_images/A1eEZvfJRUS/frame70.png
|
| 99 |
+
- configs/inference/pose_images/A1eEZvfJRUS/frame150.png
|
| 100 |
+
- configs/inference/pose_images/A1eEZvfJRUS/frame190.png
|
| 101 |
+
ref_image_paths:
|
| 102 |
+
- configs/inference/ref_images/anyone-1.png
|
| 103 |
+
- configs/inference/ref_images/anyone-2.png
|
| 104 |
+
- configs/inference/ref_images/anyone-3.png
|
| 105 |
+
- configs/inference/ref_images/anyone-11.png
|
| 106 |
+
weight_dtype: fp16
|
20240425-stage2-openpg/config.yaml
ADDED
|
@@ -0,0 +1,92 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
base_model_path: ./pretrained_weights/stable-diffusion-v1-5
|
| 2 |
+
checkpointing_steps: 2000
|
| 3 |
+
data:
|
| 4 |
+
crop_scale:
|
| 5 |
+
- 1
|
| 6 |
+
- 1
|
| 7 |
+
do_center_crop: false
|
| 8 |
+
meta_paths:
|
| 9 |
+
- /workspace/develop/video/data/ubc_tiktok-dropout0.03/ubc-meta.json
|
| 10 |
+
- /workspace/develop/video/data/tiktok-dance/good-meta.json
|
| 11 |
+
- /workspace/develop/video/data/20240321/meta.json
|
| 12 |
+
- /workspace/develop/video/data/20240327/meta.json
|
| 13 |
+
n_sample_frames: 24
|
| 14 |
+
ref_augment:
|
| 15 |
+
pan:
|
| 16 |
+
- 0.04
|
| 17 |
+
- 0.04
|
| 18 |
+
rotate: 2
|
| 19 |
+
scale:
|
| 20 |
+
- 0.9
|
| 21 |
+
- 1.0
|
| 22 |
+
sample_rate: 4
|
| 23 |
+
train_bs: 1
|
| 24 |
+
train_height: 960
|
| 25 |
+
train_width: 640
|
| 26 |
+
enable_zero_snr: true
|
| 27 |
+
exp_name: stage2-openpg
|
| 28 |
+
image_encoder_path: ./pretrained_weights/sd-image-variations-diffusers/image_encoder
|
| 29 |
+
mm_path: ./pretrained_weights/mm_sd_v15_v2.ckpt
|
| 30 |
+
noise_offset: 0.05
|
| 31 |
+
noise_scheduler_kwargs:
|
| 32 |
+
beta_end: 0.012
|
| 33 |
+
beta_schedule: linear
|
| 34 |
+
beta_start: 0.00085
|
| 35 |
+
clip_sample: false
|
| 36 |
+
num_train_timesteps: 1000
|
| 37 |
+
steps_offset: 1
|
| 38 |
+
openpose_guider:
|
| 39 |
+
block_out_channels:
|
| 40 |
+
- 96
|
| 41 |
+
- 192
|
| 42 |
+
enable: true
|
| 43 |
+
output_dir: /workspace/camus/train
|
| 44 |
+
resume_from_checkpoint: latest
|
| 45 |
+
save_dir: /workspace/camus/train/20240425-stage2-openpg
|
| 46 |
+
save_model_epoch_interval: 1
|
| 47 |
+
seed: 12580
|
| 48 |
+
snr_gamma: 5.0
|
| 49 |
+
solver:
|
| 50 |
+
adam_beta1: 0.9
|
| 51 |
+
adam_beta2: 0.999
|
| 52 |
+
adam_epsilon: 1.0e-08
|
| 53 |
+
adam_weight_decay: 0.01
|
| 54 |
+
enable_xformers_memory_efficient_attention: true
|
| 55 |
+
gradient_accumulation_steps: 1
|
| 56 |
+
gradient_checkpointing: true
|
| 57 |
+
learning_rate: 1.0e-05
|
| 58 |
+
lr_scheduler: constant
|
| 59 |
+
lr_warmup_steps: 1
|
| 60 |
+
max_grad_norm: 1.0
|
| 61 |
+
max_train_steps: 160000
|
| 62 |
+
mixed_precision: fp16
|
| 63 |
+
scale_lr: false
|
| 64 |
+
use_8bit_adam: true
|
| 65 |
+
stage1_ckpt_dir: /workspace/camus/train/20240418-stage1-openpg-c96_192
|
| 66 |
+
stage1_ckpt_step: 86396
|
| 67 |
+
uncond_ratio: 0.1
|
| 68 |
+
vae_model_path: ./pretrained_weights/sd-vae-ft-mse
|
| 69 |
+
val:
|
| 70 |
+
validation_steps: 1000
|
| 71 |
+
validation:
|
| 72 |
+
metric:
|
| 73 |
+
generate_frame_range:
|
| 74 |
+
- 50
|
| 75 |
+
- 74
|
| 76 |
+
guidance_scale: 2.4
|
| 77 |
+
ref_frame: 29
|
| 78 |
+
seed: 42
|
| 79 |
+
steps: 20
|
| 80 |
+
videos:
|
| 81 |
+
- configs/inference/metric/A1ubDo0PbQS.mp4
|
| 82 |
+
- configs/inference/metric/oEtwozJ6AoIBJ6oyK6rAAQGAOiEWIQixF2F2fB.mp4
|
| 83 |
+
- configs/inference/metric/oEOgTIKvy7lAQIfZ37E5BFmCVBID3gIQUQfMdv.mp4
|
| 84 |
+
- configs/inference/metric/ocQoBObnUgBnVskCnPe41sYRiBcAFD5f8AN1Rg.mp4
|
| 85 |
+
pose_range:
|
| 86 |
+
- 24
|
| 87 |
+
- 48
|
| 88 |
+
test_cases:
|
| 89 |
+
- - ./configs/inference/ref_images/anyone-2.png
|
| 90 |
+
- ./configs/inference/metric/91HzMhq7eOS.mp4
|
| 91 |
+
uniform_along_time: false
|
| 92 |
+
weight_dtype: fp16
|
20240428-stage2-6k/config.yaml
ADDED
|
@@ -0,0 +1,97 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
base_model_path: ./pretrained_weights/stable-diffusion-v1-5
|
| 2 |
+
checkpointing_steps: 2000
|
| 3 |
+
data:
|
| 4 |
+
crop_scale:
|
| 5 |
+
- 1
|
| 6 |
+
- 1
|
| 7 |
+
do_center_crop: false
|
| 8 |
+
meta_paths:
|
| 9 |
+
- /workspace/develop/video/data/ubc_tiktok-dropout0.03/ubc-meta.json
|
| 10 |
+
- /workspace/develop/video/data/tiktok-dance/good-meta.json
|
| 11 |
+
- /workspace/develop/video/data/20240321/meta.json
|
| 12 |
+
- /workspace/develop/video/data/20240327/meta.json
|
| 13 |
+
n_sample_frames: 24
|
| 14 |
+
ref_augment:
|
| 15 |
+
pan:
|
| 16 |
+
- 0.04
|
| 17 |
+
- 0.04
|
| 18 |
+
rotate: 2
|
| 19 |
+
scale:
|
| 20 |
+
- 0.9
|
| 21 |
+
- 1.2
|
| 22 |
+
sample_rate: 4
|
| 23 |
+
train_bs: 1
|
| 24 |
+
train_height: 960
|
| 25 |
+
train_width: 640
|
| 26 |
+
enable_zero_snr: true
|
| 27 |
+
exp_name: stage2-6k
|
| 28 |
+
image_encoder_path: ./pretrained_weights/sd-image-variations-diffusers/image_encoder
|
| 29 |
+
mm_path: ./pretrained_weights/mm_sd_v15_v2.ckpt
|
| 30 |
+
noise_offset: 0.05
|
| 31 |
+
noise_scheduler_kwargs:
|
| 32 |
+
beta_end: 0.012
|
| 33 |
+
beta_schedule: linear
|
| 34 |
+
beta_start: 0.00085
|
| 35 |
+
clip_sample: false
|
| 36 |
+
num_train_timesteps: 1000
|
| 37 |
+
steps_offset: 1
|
| 38 |
+
output_dir: /workspace/camus/train
|
| 39 |
+
resume_from_checkpoint: latest
|
| 40 |
+
save_dir: /workspace/camus/train/20240428-stage2-6k
|
| 41 |
+
save_model_epoch_interval: 1
|
| 42 |
+
seed: 12580
|
| 43 |
+
snr_gamma: 5.0
|
| 44 |
+
solver:
|
| 45 |
+
adam_beta1: 0.9
|
| 46 |
+
adam_beta2: 0.999
|
| 47 |
+
adam_epsilon: 1.0e-08
|
| 48 |
+
adam_weight_decay: 0.01
|
| 49 |
+
enable_xformers_memory_efficient_attention: true
|
| 50 |
+
gradient_accumulation_steps: 1
|
| 51 |
+
gradient_checkpointing: true
|
| 52 |
+
learning_rate: 1.0e-05
|
| 53 |
+
lr_scheduler: constant
|
| 54 |
+
lr_warmup_steps: 1
|
| 55 |
+
max_grad_norm: 1.0
|
| 56 |
+
max_train_steps: 160000
|
| 57 |
+
mixed_precision: fp16
|
| 58 |
+
scale_lr: false
|
| 59 |
+
use_8bit_adam: true
|
| 60 |
+
stage1_ckpt_dir: /workspace/camus/train/20240421-stage1-6k
|
| 61 |
+
stage1_ckpt_step: 78782
|
| 62 |
+
uncond_ratio: 0.1
|
| 63 |
+
vae_model_path: ./pretrained_weights/sd-vae-ft-mse
|
| 64 |
+
val:
|
| 65 |
+
validation_steps: 1000
|
| 66 |
+
validation:
|
| 67 |
+
metric:
|
| 68 |
+
generate_frame_range:
|
| 69 |
+
- 30
|
| 70 |
+
- 54
|
| 71 |
+
guidance_scale: 2.8
|
| 72 |
+
ref_frame: 29
|
| 73 |
+
seed: 42
|
| 74 |
+
steps: 30
|
| 75 |
+
videos:
|
| 76 |
+
- configs/inference/metric/oATCBbieJIB8u3QAMAUwvMi9ymEOIc1AoDOajA.mp4
|
| 77 |
+
- configs/inference/metric/oonQq0HjAC7ExkJlRSMBBs1q3EIiQgFveLD7fD.mp4
|
| 78 |
+
- configs/inference/metric/os0aLDIkagGgAfAFQsfICCWMuoL8jm3IgJ0Wey.mp4
|
| 79 |
+
- configs/inference/metric/oYflAvAyfAIFRf3yQDrLRDCWcEDoFENF9tBEgg.mp4
|
| 80 |
+
pose_range:
|
| 81 |
+
- 0
|
| 82 |
+
- 24
|
| 83 |
+
test_cases:
|
| 84 |
+
- - ./configs/inference/ref_images/anyone-3.png
|
| 85 |
+
- ./configs/inference/pose_videos/demo18.mp4
|
| 86 |
+
- - ./configs/inference/ref_images/anyone-3-partial.png
|
| 87 |
+
- ./configs/inference/pose_videos/demo6.mp4
|
| 88 |
+
- - ./configs/inference/ref_images/anyone-2.png
|
| 89 |
+
- ./configs/inference/pose_videos/demo11.mp4
|
| 90 |
+
- - ./configs/inference/ref_images/anyone-1.png
|
| 91 |
+
- ./configs/inference/pose_videos/demo11.mp4
|
| 92 |
+
- - ./configs/inference/ref_images/anyone-5.png
|
| 93 |
+
- ./configs/inference/pose_videos/demo11.mp4
|
| 94 |
+
- - ./configs/inference/ref_images/anyone-11.png
|
| 95 |
+
- ./configs/inference/pose_videos/demo15.mp4
|
| 96 |
+
uniform_along_time: false
|
| 97 |
+
weight_dtype: fp16
|
20240504-stage1-51k-raw-opg/config.yaml
ADDED
|
@@ -0,0 +1,105 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
base_model_path: ./pretrained_weights/sd-image-variations-diffusers
|
| 2 |
+
checkpointing_steps: 1000
|
| 3 |
+
controlnet_openpose_path: ./pretrained_weights/control_v11p_sd15_openpose/diffusion_pytorch_model.bin
|
| 4 |
+
data:
|
| 5 |
+
crop_scale:
|
| 6 |
+
- 0.8
|
| 7 |
+
- 1.2
|
| 8 |
+
do_center_crop: false
|
| 9 |
+
meta_paths:
|
| 10 |
+
- /workspace/develop/video/data/202403raw/meta.json
|
| 11 |
+
ref_augment:
|
| 12 |
+
downsample:
|
| 13 |
+
min_scale_logit: -1.2
|
| 14 |
+
p: 0.3
|
| 15 |
+
pan:
|
| 16 |
+
- 0.04
|
| 17 |
+
- 0.02
|
| 18 |
+
rotate: 8
|
| 19 |
+
scale:
|
| 20 |
+
- 0.9
|
| 21 |
+
- 1.2
|
| 22 |
+
sample_margin: 30
|
| 23 |
+
train_bs: 4
|
| 24 |
+
train_height: 1152
|
| 25 |
+
train_width: 768
|
| 26 |
+
enable_zero_snr: true
|
| 27 |
+
exp_name: stage1-51k-raw-opg
|
| 28 |
+
freeze_denoise: false
|
| 29 |
+
freeze_reference: false
|
| 30 |
+
image_encoder_path: ./pretrained_weights/sd-image-variations-diffusers/image_encoder
|
| 31 |
+
noise_offset: 0.05
|
| 32 |
+
noise_scheduler_kwargs:
|
| 33 |
+
beta_end: 0.012
|
| 34 |
+
beta_schedule: scaled_linear
|
| 35 |
+
beta_start: 0.00085
|
| 36 |
+
clip_sample: false
|
| 37 |
+
num_train_timesteps: 1000
|
| 38 |
+
steps_offset: 1
|
| 39 |
+
openpose_guider:
|
| 40 |
+
block_out_channels:
|
| 41 |
+
- 96
|
| 42 |
+
- 192
|
| 43 |
+
enable: true
|
| 44 |
+
model_path: ./pretrained_weights/body_pose_model.pth
|
| 45 |
+
output_dir: /workspace/camus/train
|
| 46 |
+
pose_guider_pretrain: true
|
| 47 |
+
resume_from_checkpoint: latest
|
| 48 |
+
save_dir: /workspace/camus/train/20240504-stage1-51k-raw-opg
|
| 49 |
+
save_model_epoch_interval: 1
|
| 50 |
+
seed: 12580
|
| 51 |
+
snr_gamma: 5.0
|
| 52 |
+
solver:
|
| 53 |
+
adam_beta1: 0.9
|
| 54 |
+
adam_beta2: 0.999
|
| 55 |
+
adam_epsilon: 1.0e-08
|
| 56 |
+
adam_weight_decay: 0.01
|
| 57 |
+
enable_xformers_memory_efficient_attention: true
|
| 58 |
+
gradient_accumulation_steps: 1
|
| 59 |
+
gradient_checkpointing: false
|
| 60 |
+
learning_rate: 1.0e-05
|
| 61 |
+
lr_scheduler: constant
|
| 62 |
+
lr_warmup_steps: 1
|
| 63 |
+
max_grad_norm: 1.0
|
| 64 |
+
max_train_steps: 100000
|
| 65 |
+
mixed_precision: fp16
|
| 66 |
+
scale_lr: false
|
| 67 |
+
use_8bit_adam: false
|
| 68 |
+
uncond_ratio: 0.1
|
| 69 |
+
vae_model_path: ./pretrained_weights/sd-vae-ft-mse
|
| 70 |
+
val:
|
| 71 |
+
validation_steps: 1000
|
| 72 |
+
validation:
|
| 73 |
+
metric:
|
| 74 |
+
batch_size: 4
|
| 75 |
+
generated_frames:
|
| 76 |
+
- 16
|
| 77 |
+
- 45
|
| 78 |
+
- 98
|
| 79 |
+
- 150
|
| 80 |
+
- 188
|
| 81 |
+
- 220
|
| 82 |
+
- 268
|
| 83 |
+
- 300
|
| 84 |
+
guidance_scale: 1.9
|
| 85 |
+
ref_frame: 28
|
| 86 |
+
seed: 42
|
| 87 |
+
size:
|
| 88 |
+
- 640
|
| 89 |
+
- 960
|
| 90 |
+
steps: 20
|
| 91 |
+
videos:
|
| 92 |
+
- configs/inference/metric/91HzMhq7eOS.mp4
|
| 93 |
+
- configs/inference/metric/A1T-Ea-FlQS.mp4
|
| 94 |
+
- configs/inference/metric/A1ubDo0PbQS.mp4
|
| 95 |
+
- configs/inference/metric/A1YNmKj0sCS.mp4
|
| 96 |
+
pose_image_paths:
|
| 97 |
+
- configs/inference/ref_images/91c+SL7Cg7S-98.png
|
| 98 |
+
- configs/inference/ref_images/91c+SL7Cg7S-150.png
|
| 99 |
+
- configs/inference/ref_images/91c+SL7Cg7S-220.png
|
| 100 |
+
ref_image_paths:
|
| 101 |
+
- configs/inference/ref_images/anyone-1.png
|
| 102 |
+
- configs/inference/ref_images/anyone-2.png
|
| 103 |
+
- configs/inference/ref_images/anyone-3.png
|
| 104 |
+
- configs/inference/ref_images/anyone-11.png
|
| 105 |
+
weight_dtype: fp16
|
20240508-stage1-openpg-nopaf/checkpoint-68000/optimizer.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4992207980f1536bd01774c79570964dc5914869eacdbf4e036e5be6d3a08009
|
| 3 |
+
size 13608730321
|
20240508-stage1-openpg-nopaf/checkpoint-68000/pytorch_model.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:10cba949e36100e45347b68dbb7c450f68f22ca6eddd374135e04a167d349939
|
| 3 |
+
size 7089007643
|
20240508-stage1-openpg-nopaf/checkpoint-68000/random_states_0.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:006807bad9ad34cbe6cea8f2852e1958e302fc1d48af87193038df01ebdf7f54
|
| 3 |
+
size 14663
|
20240508-stage1-openpg-nopaf/checkpoint-68000/scaler.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e8044a6001b7773ad8995c7d8eaca15afedbae86f4988d9021814efe220547b7
|
| 3 |
+
size 557
|
20240508-stage1-openpg-nopaf/checkpoint-68000/scheduler.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:531528e64a56b148ae89500f53709845c48cc71002664c9dc655272f32520b7a
|
| 3 |
+
size 563
|
20240508-stage1-openpg-nopaf/checkpoint-69000/optimizer.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5bf4e2162da621736604cc2fb72622ae9bd06c5f62135da6bec546d792f89da2
|
| 3 |
+
size 13608730321
|
20240508-stage1-openpg-nopaf/checkpoint-69000/pytorch_model.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:74feaf9a2826975a96d2148c6768b6a36493b561fc973bda16d11df5d0cbc471
|
| 3 |
+
size 7089007643
|
20240508-stage1-openpg-nopaf/checkpoint-69000/random_states_0.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a2fbfcb40a710a5f4392303e002cc3301bc6a3cb5f094e119b47fb82ff721292
|
| 3 |
+
size 14727
|
20240508-stage1-openpg-nopaf/checkpoint-69000/scaler.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d27fb664e5da2431936336dcfd332b3f059d007cac2905bf45738234c84c3618
|
| 3 |
+
size 557
|
20240508-stage1-openpg-nopaf/checkpoint-69000/scheduler.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0f47d91314bb33995bbc73a56304786baf645f2d50ad3fd39f0b8c360b7e8e85
|
| 3 |
+
size 563
|
20240508-stage1-openpg-nopaf/config.yaml
ADDED
|
@@ -0,0 +1,108 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
base_model_path: ./pretrained_weights/sd-image-variations-diffusers
|
| 2 |
+
checkpointing_steps: 1000
|
| 3 |
+
data:
|
| 4 |
+
crop_scale:
|
| 5 |
+
- 0.8
|
| 6 |
+
- 1.2
|
| 7 |
+
do_center_crop: false
|
| 8 |
+
meta_paths:
|
| 9 |
+
- /workspace/develop/video/data/ubc_tiktok-dropout0.03/ubc-meta.json
|
| 10 |
+
- /workspace/develop/video/data/tiktok-dance/good-meta.json
|
| 11 |
+
- /workspace/develop/video/data/20240321/meta.json
|
| 12 |
+
- /workspace/develop/video/data/20240327/meta.json
|
| 13 |
+
ref_augment:
|
| 14 |
+
downsample:
|
| 15 |
+
min_scale_logit: -1.2
|
| 16 |
+
p: 0.3
|
| 17 |
+
pan:
|
| 18 |
+
- 0.04
|
| 19 |
+
- 0.02
|
| 20 |
+
rotate: 8
|
| 21 |
+
scale:
|
| 22 |
+
- 0.9
|
| 23 |
+
- 1.2
|
| 24 |
+
sample_margin: 30
|
| 25 |
+
train_bs: 4
|
| 26 |
+
train_height: 1152
|
| 27 |
+
train_width: 768
|
| 28 |
+
enable_zero_snr: true
|
| 29 |
+
exp_name: stage1-openpg-nopaf
|
| 30 |
+
image_encoder_path: ./pretrained_weights/sd-image-variations-diffusers/image_encoder
|
| 31 |
+
noise_offset: 0.05
|
| 32 |
+
noise_scheduler_kwargs:
|
| 33 |
+
beta_end: 0.012
|
| 34 |
+
beta_schedule: scaled_linear
|
| 35 |
+
beta_start: 0.00085
|
| 36 |
+
clip_sample: false
|
| 37 |
+
num_train_timesteps: 1000
|
| 38 |
+
steps_offset: 1
|
| 39 |
+
openpose_guider:
|
| 40 |
+
block_out_channels:
|
| 41 |
+
- 96
|
| 42 |
+
- 192
|
| 43 |
+
enable: true
|
| 44 |
+
exclude_paf: true
|
| 45 |
+
model_path: ./pretrained_weights/body_pose_model.pth
|
| 46 |
+
output_dir: /workspace/camus/train
|
| 47 |
+
pose_guider_pretrain: false
|
| 48 |
+
resume_from_checkpoint: latest
|
| 49 |
+
save_dir: /workspace/camus/train/20240508-stage1-openpg-nopaf
|
| 50 |
+
save_model_epoch_interval: 1
|
| 51 |
+
seed: 12580
|
| 52 |
+
snr_gamma: 5.0
|
| 53 |
+
solver:
|
| 54 |
+
adam_beta1: 0.9
|
| 55 |
+
adam_beta2: 0.999
|
| 56 |
+
adam_epsilon: 1.0e-08
|
| 57 |
+
adam_weight_decay: 0.01
|
| 58 |
+
enable_xformers_memory_efficient_attention: true
|
| 59 |
+
gradient_accumulation_steps: 1
|
| 60 |
+
gradient_checkpointing: false
|
| 61 |
+
learning_rate: 1.0e-05
|
| 62 |
+
lr_scheduler: constant
|
| 63 |
+
lr_warmup_steps: 1
|
| 64 |
+
max_grad_norm: 1.0
|
| 65 |
+
max_train_steps: 100000
|
| 66 |
+
mixed_precision: fp16
|
| 67 |
+
scale_lr: false
|
| 68 |
+
use_8bit_adam: false
|
| 69 |
+
uncond_ratio: 0.1
|
| 70 |
+
vae_model_path: ./pretrained_weights/sd-vae-ft-mse
|
| 71 |
+
val:
|
| 72 |
+
special_steps:
|
| 73 |
+
- 200
|
| 74 |
+
validation_steps: 1000
|
| 75 |
+
validation:
|
| 76 |
+
metric:
|
| 77 |
+
batch_size: 4
|
| 78 |
+
generated_frames:
|
| 79 |
+
- 16
|
| 80 |
+
- 45
|
| 81 |
+
- 98
|
| 82 |
+
- 150
|
| 83 |
+
- 188
|
| 84 |
+
- 220
|
| 85 |
+
- 268
|
| 86 |
+
- 300
|
| 87 |
+
guidance_scale: 2.8
|
| 88 |
+
ref_frame: 28
|
| 89 |
+
seed: 42
|
| 90 |
+
size:
|
| 91 |
+
- 640
|
| 92 |
+
- 960
|
| 93 |
+
steps: 20
|
| 94 |
+
videos:
|
| 95 |
+
- configs/inference/metric/91HzMhq7eOS.mp4
|
| 96 |
+
- configs/inference/metric/A1T-Ea-FlQS.mp4
|
| 97 |
+
- configs/inference/metric/A1ubDo0PbQS.mp4
|
| 98 |
+
- configs/inference/metric/A1YNmKj0sCS.mp4
|
| 99 |
+
pose_image_paths:
|
| 100 |
+
- configs/inference/ref_images/91c+SL7Cg7S-98.png
|
| 101 |
+
- configs/inference/ref_images/91c+SL7Cg7S-150.png
|
| 102 |
+
- configs/inference/ref_images/91c+SL7Cg7S-220.png
|
| 103 |
+
ref_image_paths:
|
| 104 |
+
- configs/inference/ref_images/anyone-1.png
|
| 105 |
+
- configs/inference/ref_images/anyone-2.png
|
| 106 |
+
- configs/inference/ref_images/anyone-3.png
|
| 107 |
+
- configs/inference/ref_images/anyone-11.png
|
| 108 |
+
weight_dtype: fp16
|
20240508-stage1-openpg-nopaf/denoising_unet-65381.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c863d62ce3d4338a23eb0d144e810b4a88eb9c1ce505f574bfdf5601244af887
|
| 3 |
+
size 3438374293
|
20240508-stage1-openpg-nopaf/denoising_unet-66980.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:52632f6c8100a3b591211b521af2a55238f3bc8f405e07813fba81d64fd4bee2
|
| 3 |
+
size 3438374293
|
20240508-stage1-openpg-nopaf/denoising_unet-68579.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b981a2f1da3c8079614b855d4912b1893dc11ea45d693722c6825c2b4de0b77e
|
| 3 |
+
size 3438374293
|
20240508-stage1-openpg-nopaf/pose_guider-65381.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e1f8aca9ca3f12cf87678561cc385c2a2240f6342645212fe4d1eaf6f5d63c65
|
| 3 |
+
size 212263301
|
20240508-stage1-openpg-nopaf/pose_guider-66980.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:28e32fd312ab48515f8d6c89987a60075a57400493754999fd7a54dea28c6930
|
| 3 |
+
size 212263301
|
20240508-stage1-openpg-nopaf/pose_guider-68579.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2fb06215f0e5c360108d597cdbb069d414c8520909b800f548dd7e1f449fbe21
|
| 3 |
+
size 212263301
|
20240508-stage1-openpg-nopaf/reference_unet-65381.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:42ded4e21f6a816dc1520f8d17ab9aebfe6fe274462a3e22d4c9de156da7c78a
|
| 3 |
+
size 3438323817
|
20240508-stage1-openpg-nopaf/reference_unet-66980.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c6cb760f41afd9af4aba70e3baecad276883c3047f1ffe435a12b780d7054df5
|
| 3 |
+
size 3438323817
|
20240508-stage1-openpg-nopaf/reference_unet-68579.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:64466c7b2fc2d6d3a180e33cd392f33edda3c1e9223b9dc824a324d9f722cd75
|
| 3 |
+
size 3438323817
|
20240510-stage1-9k/config.yaml
ADDED
|
@@ -0,0 +1,105 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
base_model_path: ./pretrained_weights/sd-image-variations-diffusers
|
| 2 |
+
checkpointing_steps: 1000
|
| 3 |
+
controlnet_openpose_path: ./pretrained_weights/control_v11p_sd15_openpose/diffusion_pytorch_model.bin
|
| 4 |
+
data:
|
| 5 |
+
crop_scale:
|
| 6 |
+
- 0.8
|
| 7 |
+
- 1.2
|
| 8 |
+
do_center_crop: false
|
| 9 |
+
meta_paths:
|
| 10 |
+
- /workspace/develop/video/data/ubc_tiktok-dropout0.03/ubc-meta.json
|
| 11 |
+
- /workspace/develop/video/data/tiktok-dance/good-meta.json
|
| 12 |
+
- /workspace/develop/video/data/20240321/meta.json
|
| 13 |
+
- /workspace/develop/video/data/20240327/meta.json
|
| 14 |
+
- /workspace/develop/video/data/20240506/meta.json
|
| 15 |
+
- /workspace/develop/video/data/20240509/meta.json
|
| 16 |
+
ref_augment:
|
| 17 |
+
downsample:
|
| 18 |
+
min_scale_logit: -1.2
|
| 19 |
+
p: 0.3
|
| 20 |
+
pan:
|
| 21 |
+
- 0.04
|
| 22 |
+
- 0.02
|
| 23 |
+
rotate: 8
|
| 24 |
+
scale:
|
| 25 |
+
- 0.9
|
| 26 |
+
- 1.2
|
| 27 |
+
sample_margin: 30
|
| 28 |
+
train_bs: 4
|
| 29 |
+
train_height: 1152
|
| 30 |
+
train_width: 768
|
| 31 |
+
enable_zero_snr: true
|
| 32 |
+
exp_name: stage1-9k
|
| 33 |
+
freeze_denoise: false
|
| 34 |
+
freeze_reference: false
|
| 35 |
+
image_encoder_path: ./pretrained_weights/sd-image-variations-diffusers/image_encoder
|
| 36 |
+
noise_offset: 0.05
|
| 37 |
+
noise_scheduler_kwargs:
|
| 38 |
+
beta_end: 0.012
|
| 39 |
+
beta_schedule: scaled_linear
|
| 40 |
+
beta_start: 0.00085
|
| 41 |
+
clip_sample: false
|
| 42 |
+
num_train_timesteps: 1000
|
| 43 |
+
steps_offset: 1
|
| 44 |
+
openpose_guider:
|
| 45 |
+
enable: false
|
| 46 |
+
output_dir: /workspace/camus/train
|
| 47 |
+
pose_guider_pretrain: true
|
| 48 |
+
resume_from_checkpoint: ''
|
| 49 |
+
save_model_epoch_interval: 1
|
| 50 |
+
seed: 12580
|
| 51 |
+
snr_gamma: 5.0
|
| 52 |
+
solver:
|
| 53 |
+
adam_beta1: 0.9
|
| 54 |
+
adam_beta2: 0.999
|
| 55 |
+
adam_epsilon: 1.0e-08
|
| 56 |
+
adam_weight_decay: 0.01
|
| 57 |
+
enable_xformers_memory_efficient_attention: true
|
| 58 |
+
gradient_accumulation_steps: 1
|
| 59 |
+
gradient_checkpointing: false
|
| 60 |
+
learning_rate: 1.0e-05
|
| 61 |
+
lr_scheduler: constant
|
| 62 |
+
lr_warmup_steps: 1
|
| 63 |
+
max_grad_norm: 1.0
|
| 64 |
+
max_train_steps: 100000
|
| 65 |
+
mixed_precision: fp16
|
| 66 |
+
scale_lr: false
|
| 67 |
+
use_8bit_adam: false
|
| 68 |
+
uncond_ratio: 0.1
|
| 69 |
+
vae_model_path: ./pretrained_weights/sd-vae-ft-mse
|
| 70 |
+
val:
|
| 71 |
+
validation_steps: 1000
|
| 72 |
+
validation:
|
| 73 |
+
metric:
|
| 74 |
+
batch_size: 4
|
| 75 |
+
generated_frames:
|
| 76 |
+
- 16
|
| 77 |
+
- 45
|
| 78 |
+
- 98
|
| 79 |
+
- 150
|
| 80 |
+
- 188
|
| 81 |
+
- 220
|
| 82 |
+
- 268
|
| 83 |
+
- 300
|
| 84 |
+
guidance_scale: 1.9
|
| 85 |
+
ref_frame: 28
|
| 86 |
+
seed: 42
|
| 87 |
+
size:
|
| 88 |
+
- 640
|
| 89 |
+
- 960
|
| 90 |
+
steps: 20
|
| 91 |
+
videos:
|
| 92 |
+
- configs/inference/metric/91HzMhq7eOS.mp4
|
| 93 |
+
- configs/inference/metric/A1T-Ea-FlQS.mp4
|
| 94 |
+
- configs/inference/metric/A1ubDo0PbQS.mp4
|
| 95 |
+
- configs/inference/metric/A1YNmKj0sCS.mp4
|
| 96 |
+
pose_image_paths:
|
| 97 |
+
- configs/inference/pose_images/A1eEZvfJRUS/frame70.png
|
| 98 |
+
- configs/inference/pose_images/A1eEZvfJRUS/frame150.png
|
| 99 |
+
- configs/inference/pose_images/A1eEZvfJRUS/frame190.png
|
| 100 |
+
ref_image_paths:
|
| 101 |
+
- configs/inference/ref_images/anyone-1.png
|
| 102 |
+
- configs/inference/ref_images/anyone-2.png
|
| 103 |
+
- configs/inference/ref_images/anyone-3.png
|
| 104 |
+
- configs/inference/ref_images/anyone-11.png
|
| 105 |
+
weight_dtype: fp16
|
20240513-stage2-9k/config.yaml
ADDED
|
@@ -0,0 +1,99 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
base_model_path: ./pretrained_weights/stable-diffusion-v1-5
|
| 2 |
+
checkpointing_steps: 2000
|
| 3 |
+
data:
|
| 4 |
+
crop_scale:
|
| 5 |
+
- 1
|
| 6 |
+
- 1
|
| 7 |
+
do_center_crop: false
|
| 8 |
+
meta_paths:
|
| 9 |
+
- /workspace/develop/video/data/ubc_tiktok-dropout0.03/ubc-meta.json
|
| 10 |
+
- /workspace/develop/video/data/tiktok-dance/good-meta.json
|
| 11 |
+
- /workspace/develop/video/data/20240321/meta.json
|
| 12 |
+
- /workspace/develop/video/data/20240327/meta.json
|
| 13 |
+
- /workspace/develop/video/data/20240506/meta.json
|
| 14 |
+
- /workspace/develop/video/data/20240509/meta.json
|
| 15 |
+
n_sample_frames: 24
|
| 16 |
+
ref_augment:
|
| 17 |
+
pan:
|
| 18 |
+
- 0.04
|
| 19 |
+
- 0.04
|
| 20 |
+
rotate: 2
|
| 21 |
+
scale:
|
| 22 |
+
- 0.9
|
| 23 |
+
- 1.2
|
| 24 |
+
sample_rate: 4
|
| 25 |
+
train_bs: 1
|
| 26 |
+
train_height: 960
|
| 27 |
+
train_width: 640
|
| 28 |
+
enable_zero_snr: true
|
| 29 |
+
exp_name: stage2-9k
|
| 30 |
+
image_encoder_path: ./pretrained_weights/sd-image-variations-diffusers/image_encoder
|
| 31 |
+
mm_path: ./pretrained_weights/mm_sd_v15_v2.ckpt
|
| 32 |
+
noise_offset: 0.05
|
| 33 |
+
noise_scheduler_kwargs:
|
| 34 |
+
beta_end: 0.012
|
| 35 |
+
beta_schedule: linear
|
| 36 |
+
beta_start: 0.00085
|
| 37 |
+
clip_sample: false
|
| 38 |
+
num_train_timesteps: 1000
|
| 39 |
+
steps_offset: 1
|
| 40 |
+
output_dir: /workspace/camus/train
|
| 41 |
+
resume_from_checkpoint: ''
|
| 42 |
+
save_dir: /workspace/camus/train/20240513-stage2-9k
|
| 43 |
+
save_model_epoch_interval: 1
|
| 44 |
+
seed: 12580
|
| 45 |
+
snr_gamma: 5.0
|
| 46 |
+
solver:
|
| 47 |
+
adam_beta1: 0.9
|
| 48 |
+
adam_beta2: 0.999
|
| 49 |
+
adam_epsilon: 1.0e-08
|
| 50 |
+
adam_weight_decay: 0.01
|
| 51 |
+
enable_xformers_memory_efficient_attention: true
|
| 52 |
+
gradient_accumulation_steps: 1
|
| 53 |
+
gradient_checkpointing: true
|
| 54 |
+
learning_rate: 1.0e-05
|
| 55 |
+
lr_scheduler: constant
|
| 56 |
+
lr_warmup_steps: 1
|
| 57 |
+
max_grad_norm: 1.0
|
| 58 |
+
max_train_steps: 160000
|
| 59 |
+
mixed_precision: fp16
|
| 60 |
+
scale_lr: false
|
| 61 |
+
use_8bit_adam: true
|
| 62 |
+
stage1_ckpt_dir: /workspace/camus/train/20240510-stage1-9k
|
| 63 |
+
stage1_ckpt_step: 75392
|
| 64 |
+
uncond_ratio: 0.1
|
| 65 |
+
vae_model_path: ./pretrained_weights/sd-vae-ft-mse
|
| 66 |
+
val:
|
| 67 |
+
validation_steps: 1000
|
| 68 |
+
validation:
|
| 69 |
+
metric:
|
| 70 |
+
generate_frame_range:
|
| 71 |
+
- 30
|
| 72 |
+
- 54
|
| 73 |
+
guidance_scale: 2.8
|
| 74 |
+
ref_frame: 29
|
| 75 |
+
seed: 42
|
| 76 |
+
steps: 30
|
| 77 |
+
videos:
|
| 78 |
+
- configs/inference/metric/oATCBbieJIB8u3QAMAUwvMi9ymEOIc1AoDOajA.mp4
|
| 79 |
+
- configs/inference/metric/oonQq0HjAC7ExkJlRSMBBs1q3EIiQgFveLD7fD.mp4
|
| 80 |
+
- configs/inference/metric/os0aLDIkagGgAfAFQsfICCWMuoL8jm3IgJ0Wey.mp4
|
| 81 |
+
- configs/inference/metric/oYflAvAyfAIFRf3yQDrLRDCWcEDoFENF9tBEgg.mp4
|
| 82 |
+
pose_range:
|
| 83 |
+
- 0
|
| 84 |
+
- 24
|
| 85 |
+
test_cases:
|
| 86 |
+
- - ./configs/inference/ref_images/anyone-3.png
|
| 87 |
+
- ./configs/inference/pose_videos/demo18.mp4
|
| 88 |
+
- - ./configs/inference/ref_images/anyone-3-partial.png
|
| 89 |
+
- ./configs/inference/pose_videos/demo6.mp4
|
| 90 |
+
- - ./configs/inference/ref_images/anyone-2.png
|
| 91 |
+
- ./configs/inference/pose_videos/demo11.mp4
|
| 92 |
+
- - ./configs/inference/ref_images/anyone-1.png
|
| 93 |
+
- ./configs/inference/pose_videos/demo11.mp4
|
| 94 |
+
- - ./configs/inference/ref_images/anyone-5.png
|
| 95 |
+
- ./configs/inference/pose_videos/demo11.mp4
|
| 96 |
+
- - ./configs/inference/ref_images/anyone-11.png
|
| 97 |
+
- ./configs/inference/pose_videos/demo15.mp4
|
| 98 |
+
uniform_along_time: false
|
| 99 |
+
weight_dtype: fp16
|