Checkpoint at 250 steps

Browse files

Files changed (10) hide show

.gitattributes +1 -0
README.md +133 -3
assets/image_0_0.png +3 -0
optimizer.bin +3 -0
pytorch_lora_weights.safetensors +3 -0
random_states_0.pkl +3 -0
scheduler.bin +3 -0
simpletuner_config.json +318 -0
training_state-dreambooth-1024.json +1 -0
training_state.json +1 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+*.png filter=lfs diff=lfs merge=lfs -text

README.md CHANGED Viewed

@@ -1,3 +1,133 @@
----
-license: apache-2.0
----

+---
+license: other
+base_model: "Qwen/Qwen-Image"
+tags:
+  - qwen_image
+  - qwen_image-diffusers
+  - text-to-image
+  - image-to-image
+  - diffusers
+  - simpletuner
+  - not-for-all-audiences
+  - lora
+  - template:sd-lora
+  - standard
+pipeline_tag: text-to-image
+inference: true
+widget:
+- text: 'An domokun in minecraft style.'
+  parameters:
+    negative_prompt: 'ugly, cropped, blurry, low-quality, mediocre average'
+  output:
+    url: ./assets/image_0_0.png
+---
+# simpletuner-example-qwen_image-peft-lora
+This is a PEFT LoRA derived from [Qwen/Qwen-Image](https://huggingface.co/Qwen/Qwen-Image).
+The main validation prompt used during training was:
+```
+An domokun in minecraft style.
+```
+## Validation settings
+- CFG: `4.0`
+- CFG Rescale: `0.0`
+- Steps: `30`
+- Sampler: `FlowMatchEulerDiscreteScheduler`
+- Seed: `42`
+- Resolution: `1024x1024`
+Note: The validation settings are not necessarily the same as the [training settings](#training-settings).
+You can find some example images in the following gallery:
+<Gallery />
+The text encoder **was not** trained.
+You may reuse the base model text encoder for inference.
+## Training settings
+- Training epochs: 9
+- Training steps: 250
+- Learning rate: 0.0001
+  - Learning rate schedule: constant_with_warmup
+  - Warmup steps: 100
+- Max grad value: 0.01
+- Effective batch size: 1
+  - Micro-batch size: 1
+  - Gradient accumulation steps: 1
+  - Number of GPUs: 1
+- Gradient checkpointing: True
+- Prediction type: flow_matching[]
+- Optimizer: optimi-lion
+- Trainable parameter precision: Pure BF16
+- Base model precision: `int8-quanto`
+- Caption dropout probability: 0.0%
+- LoRA Rank: 8
+- LoRA Alpha: 8.0
+- LoRA Dropout: 0.1
+- LoRA initialisation style: default
+- LoRA mode: Standard
+## Datasets
+### dreambooth-1024
+- Repeats: 0
+- Total number of images: 26
+- Total number of aspect buckets: 1
+- Resolution: 1.048576 megapixels
+- Cropped: True
+- Crop style: random
+- Crop aspect: square
+- Used for regularisation data: No
+## Inference
+```python
+import torch
+from diffusers import DiffusionPipeline
+model_id = 'Qwen/Qwen-Image'
+adapter_id = 'simpletuner-example-qwen_image-peft-lora'
+pipeline = DiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.bfloat16) # loading directly in bf16
+pipeline.load_lora_weights(adapter_id)
+prompt = "An domokun in minecraft style."
+negative_prompt = 'ugly, cropped, blurry, low-quality, mediocre average'
+## Optional: quantise the model to save on vram.
+## Note: The model was quantised during training, and so it is recommended to do the same during inference time.
+from optimum.quanto import quantize, freeze, qint8
+quantize(pipeline.transformer, weights=qint8)
+freeze(pipeline.transformer)
+pipeline.to('cuda' if torch.cuda.is_available() else 'mps' if torch.backends.mps.is_available() else 'cpu') # the pipeline is already in its target precision level
+model_output = pipeline(
+    prompt=prompt,
+    negative_prompt=negative_prompt,
+    num_inference_steps=30,
+    generator=torch.Generator(device='cuda' if torch.cuda.is_available() else 'mps' if torch.backends.mps.is_available() else 'cpu').manual_seed(42),
+    width=1024,
+    height=1024,
+    guidance_scale=4.0,
+).images[0]
+model_output.save("output.png", format="PNG")
+```

assets/image_0_0.png ADDED Viewed

Git LFS Details

SHA256: 5a59bd90cf08f7bfba3620f4a1a1bb1a29cb2716447fc5e710386ed69f78d836
Pointer size: 132 Bytes
Size of remote file: 2.38 MB

optimizer.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:795d78330cb4804f1c740618f5d8cb601b17e16cc8884c9d47fb677c50a6d7ff
+size 47467915

pytorch_lora_weights.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:624788e67f07796cfb5e74e6735dd2150099239b0f8249d3d4b19ebeabae1b15
+size 23655824

random_states_0.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c5b31014e0091952ce994169c3eadc8fb6e327066ab3c61b994cd6ada6888c89
+size 14757

scheduler.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:db38142dfbc297aaf07b39e2404980521f44adccd3b871d12fb14d2f143a693e
+size 1401

simpletuner_config.json ADDED Viewed

	@@ -0,0 +1,318 @@

+{
+    "snr_gamma": null,
+    "use_soft_min_snr": false,
+    "soft_min_snr_sigma_data": null,
+    "model_family": "qwen_image",
+    "model_flavour": "v1.0",
+    "model_type": "lora",
+    "loss_type": "l2",
+    "huber_schedule": "snr",
+    "huber_c": 0.1,
+    "hidream_use_load_balancing_loss": false,
+    "hidream_load_balancing_loss_weight": null,
+    "flux_lora_target": "all",
+    "flow_sigmoid_scale": 1.0,
+    "flux_fast_schedule": false,
+    "flow_use_uniform_schedule": false,
+    "flow_use_beta_schedule": false,
+    "flow_beta_schedule_alpha": 2.0,
+    "flow_beta_schedule_beta": 2.0,
+    "flow_schedule_shift": 0.0,
+    "flow_schedule_auto_shift": true,
+    "flux_guidance_mode": "constant",
+    "flux_guidance_value": 1.0,
+    "flux_guidance_min": 0.0,
+    "flux_guidance_max": 4.0,
+    "flux_attention_masked_training": false,
+    "ltx_train_mode": "i2v",
+    "ltx_i2v_prob": 0.1,
+    "ltx_protect_first_frame": false,
+    "ltx_partial_noise_fraction": 0.05,
+    "t5_padding": "unmodified",
+    "sd3_clip_uncond_behaviour": "empty_string",
+    "sd3_t5_uncond_behaviour": null,
+    "lora_type": "standard",
+    "peft_lora_mode": "standard",
+    "singlora_ramp_up_steps": 0,
+    "lora_init_type": "default",
+    "init_lora": null,
+    "lora_rank": 8,
+    "lora_alpha": 8.0,
+    "lora_dropout": 0.1,
+    "lycoris_config": "config/lycoris_config.json",
+    "init_lokr_norm": null,
+    "conditioning_multidataset_sampling": "random",
+    "control": false,
+    "controlnet": false,
+    "controlnet_custom_config": null,
+    "tread_config": null,
+    "controlnet_model_name_or_path": null,
+    "pretrained_model_name_or_path": "Qwen/Qwen-Image",
+    "pretrained_transformer_model_name_or_path": null,
+    "pretrained_transformer_subfolder": "transformer",
+    "pretrained_unet_model_name_or_path": null,
+    "pretrained_unet_subfolder": "unet",
+    "pretrained_vae_model_name_or_path": "Qwen/Qwen-Image",
+    "pretrained_t5_model_name_or_path": null,
+    "prediction_type": "flow_matching",
+    "snr_weight": 1.0,
+    "training_scheduler_timestep_spacing": "trailing",
+    "inference_scheduler_timestep_spacing": "trailing",
+    "refiner_training": false,
+    "refiner_training_invert_schedule": false,
+    "refiner_training_strength": 0.2,
+    "timestep_bias_strategy": "none",
+    "timestep_bias_multiplier": 1.0,
+    "timestep_bias_begin": 0,
+    "timestep_bias_end": 1000,
+    "timestep_bias_portion": 0.25,
+    "disable_segmented_timestep_sampling": false,
+    "rescale_betas_zero_snr": false,
+    "vae_dtype": "bf16",
+    "vae_batch_size": 1,
+    "vae_enable_tiling": false,
+    "vae_enable_slicing": false,
+    "vae_cache_scan_behaviour": "recreate",
+    "vae_cache_ondemand": false,
+    "compress_disk_cache": false,
+    "aspect_bucket_disable_rebuild": false,
+    "keep_vae_loaded": false,
+    "skip_file_discovery": "",
+    "revision": null,
+    "variant": null,
+    "preserve_data_backend_cache": false,
+    "use_dora": false,
+    "override_dataset_config": false,
+    "cache_dir_text": "cache",
+    "cache_dir_vae": "",
+    "data_backend_config": "config/examples/multidatabackend-small-dreambooth-1024px.json",
+    "data_backend_sampling": "auto-weighting",
+    "ignore_missing_files": false,
+    "write_batch_size": 128,
+    "read_batch_size": 25,
+    "image_processing_batch_size": 32,
+    "enable_multiprocessing": false,
+    "max_workers": 32,
+    "aws_max_pool_connections": 128,
+    "torch_num_threads": 8,
+    "dataloader_prefetch": false,
+    "dataloader_prefetch_qlen": 10,
+    "aspect_bucket_worker_count": 12,
+    "cache_dir": "output/examples/qwen_image.peft-lora/cache",
+    "cache_clear_validation_prompts": false,
+    "caption_strategy": "filename",
+    "parquet_caption_column": null,
+    "parquet_filename_column": null,
+    "instance_prompt": null,
+    "output_dir": "output/examples/qwen_image.peft-lora",
+    "seed": 42,
+    "seed_for_each_device": true,
+    "framerate": null,
+    "resolution": 1024.0,
+    "resolution_type": "pixel_area",
+    "aspect_bucket_rounding": null,
+    "aspect_bucket_alignment": 32,
+    "minimum_image_size": 0.0,
+    "maximum_image_size": null,
+    "target_downsample_size": null,
+    "train_text_encoder": false,
+    "tokenizer_max_length": null,
+    "train_batch_size": 1,
+    "num_train_epochs": 77,
+    "max_train_steps": 2000,
+    "ignore_final_epochs": true,
+    "checkpointing_steps": 50,
+    "checkpointing_rolling_steps": 0,
+    "checkpointing_use_tempdir": false,
+    "checkpoints_total_limit": 20,
+    "checkpoints_rolling_total_limit": 1,
+    "resume_from_checkpoint": null,
+    "gradient_accumulation_steps": 1,
+    "gradient_checkpointing": true,
+    "gradient_checkpointing_interval": null,
+    "learning_rate": 0.0001,
+    "text_encoder_lr": null,
+    "lr_scale": false,
+    "lr_scale_sqrt": false,
+    "lr_scheduler": "constant_with_warmup",
+    "lr_warmup_steps": 100,
+    "lr_num_cycles": 1,
+    "lr_power": 0.8,
+    "distillation_method": null,
+    "distillation_config": null,
+    "use_ema": false,
+    "ema_device": "cpu",
+    "ema_validation": "comparison",
+    "ema_cpu_only": false,
+    "ema_foreach_disable": false,
+    "ema_update_interval": null,
+    "ema_decay": 0.995,
+    "non_ema_revision": null,
+    "offload_during_startup": false,
+    "offload_param_path": null,
+    "optimizer": "optimi-lion",
+    "optimizer_config": null,
+    "optimizer_cpu_offload_method": "none",
+    "optimizer_offload_gradients": false,
+    "fuse_optimizer": false,
+    "optimizer_beta1": null,
+    "optimizer_beta2": null,
+    "optimizer_release_gradients": false,
+    "adam_beta1": 0.9,
+    "adam_beta2": 0.999,
+    "adam_weight_decay": 0.01,
+    "adam_epsilon": 1e-08,
+    "prodigy_steps": null,
+    "max_grad_norm": 0.01,
+    "grad_clip_method": "value",
+    "push_to_hub": false,
+    "push_checkpoints_to_hub": false,
+    "hub_model_id": "simpletuner-example-qwen_image-peft-lora",
+    "model_card_note": null,
+    "model_card_safe_for_work": false,
+    "logging_dir": "output/examples/qwen_image.peft-lora/logs",
+    "disable_benchmark": false,
+    "evaluation_type": null,
+    "eval_dataset_pooling": false,
+    "pretrained_evaluation_model_name_or_path": "openai/clip-vit-large-patch14-336",
+    "validation_on_startup": false,
+    "validation_seed_source": "cpu",
+    "validation_lycoris_strength": 1.0,
+    "validation_torch_compile": false,
+    "validation_torch_compile_mode": "max-autotune",
+    "validation_guidance_skip_layers": null,
+    "validation_guidance_skip_layers_start": 0.01,
+    "validation_guidance_skip_layers_stop": 0.2,
+    "validation_guidance_skip_scale": 2.8,
+    "sana_complex_human_instruction": [
+        "Given a user prompt, generate an 'Enhanced prompt' that provides detailed visual descriptions suitable for image generation. Evaluate the level of detail in the user prompt:",
+        "- If the prompt is simple, focus on adding specifics about colors, shapes, sizes, textures, and spatial relationships to create vivid and concrete scenes.",
+        "- Do not add detail that is not in the original prompt! Do not mention colors if the User Prompt does not!",
+        "- If the prompt is already detailed, refine and enhance the existing details slightly without overcomplicating.",
+        "- If the prompt contains explicit material, do not remove this information from the output, which would damage the performance of downstream classifiers.",
+        "Here are examples of how to transform or refine prompts:",
+        "- User Prompt: A cat sleeping -> Enhanced: A cat sleeping peacefully, showcasing the joy of pet ownership. Cute floof kitty cat gatto.",
+        "- User Prompt: A busy city street -> Enhanced: A bustling city street scene featuring a crowd of people.",
+        "Please generate only the enhanced description for the prompt below and avoid including any additional commentary or evaluations:",
+        "User Prompt: "
+    ],
+    "disable_tf32": false,
+    "validation_using_datasets": null,
+    "webhook_config": null,
+    "webhook_reporting_interval": null,
+    "report_to": "none",
+    "tracker_run_name": "example-training-run",
+    "tracker_project_name": "lora-training",
+    "tracker_image_layout": "gallery",
+    "validation_prompt": "An domokun in minecraft style.",
+    "validation_prompt_library": false,
+    "user_prompt_library": null,
+    "validation_negative_prompt": "ugly, cropped, blurry, low-quality, mediocre average",
+    "num_validation_images": 1,
+    "validation_disable": false,
+    "validation_steps": 50,
+    "validation_stitch_input_location": "left",
+    "eval_steps_interval": null,
+    "eval_timesteps": 28,
+    "num_eval_images": 25,
+    "eval_dataset_id": null,
+    "validation_num_inference_steps": 30,
+    "validation_num_video_frames": null,
+    "validation_resolution": "1024x1024",
+    "validation_noise_scheduler": null,
+    "validation_disable_unconditional": true,
+    "enable_watermark": false,
+    "mixed_precision": "bf16",
+    "gradient_precision": null,
+    "quantize_via": "cpu",
+    "base_model_precision": "int8-quanto",
+    "quantize_activations": false,
+    "base_model_default_dtype": "bf16",
+    "text_encoder_1_precision": "no_change",
+    "text_encoder_2_precision": "no_change",
+    "text_encoder_3_precision": "no_change",
+    "text_encoder_4_precision": "no_change",
+    "local_rank": -1,
+    "fuse_qkv_projections": false,
+    "attention_mechanism": "diffusers",
+    "sageattention_usage": "inference",
+    "set_grads_to_none": false,
+    "noise_offset": 0.1,
+    "noise_offset_probability": 0.25,
+    "masked_loss_probability": 1.0,
+    "validation_guidance": 4.0,
+    "validation_guidance_real": 1.0,
+    "validation_no_cfg_until_timestep": 2,
+    "validation_guidance_rescale": 0.0,
+    "validation_randomize": false,
+    "validation_seed": 42,
+    "fully_unload_text_encoder": false,
+    "freeze_encoder_before": 12,
+    "freeze_encoder_after": 17,
+    "freeze_encoder_strategy": "after",
+    "layer_freeze_strategy": "none",
+    "unet_attention_slice": false,
+    "print_filenames": false,
+    "print_sampler_statistics": false,
+    "metadata_update_interval": 3600,
+    "debug_aspect_buckets": false,
+    "debug_dataset_loader": false,
+    "freeze_encoder": true,
+    "save_text_encoder": false,
+    "text_encoder_limit": 25,
+    "prepend_instance_prompt": false,
+    "only_instance_prompt": false,
+    "data_aesthetic_score": 7.0,
+    "sdxl_refiner_uses_full_range": false,
+    "caption_dropout_probability": 0.0,
+    "delete_unwanted_images": false,
+    "delete_problematic_images": false,
+    "disable_bucket_pruning": true,
+    "offset_noise": false,
+    "input_perturbation": 0.0,
+    "input_perturbation_steps": 0,
+    "lr_end": "4e-7",
+    "i_know_what_i_am_doing": false,
+    "accelerator_cache_clear_interval": null,
+    "vae_path": "Qwen/Qwen-Image",
+    "accelerator_project_config": {
+        "project_dir": "output/examples/qwen_image.peft-lora",
+        "logging_dir": "output/examples/qwen_image.peft-lora/logs",
+        "automatic_checkpoint_naming": false,
+        "total_limit": null,
+        "iteration": 5,
+        "save_on_each_node": false
+    },
+    "process_group_kwargs": {
+        "backend": "nccl",
+        "init_method": null,
+        "timeout": "1:30:00"
+    },
+    "is_quantized": true,
+    "weight_dtype": "torch.bfloat16",
+    "disable_accelerator": false,
+    "lora_initialisation_style": true,
+    "model_type_label": "Qwen-Image",
+    "use_deepspeed_optimizer": false,
+    "use_deepspeed_scheduler": false,
+    "base_weight_dtype": "torch.bfloat16",
+    "is_quanto": true,
+    "is_torchao": false,
+    "is_bnb": false,
+    "flow_matching": true,
+    "vae_kwargs": {
+        "pretrained_model_name_or_path": "Qwen/Qwen-Image",
+        "subfolder": "vae",
+        "revision": null,
+        "force_upcast": false,
+        "variant": null
+    },
+    "enable_adamw_bf16": true,
+    "overrode_max_train_steps": false,
+    "total_num_batches": 26,
+    "num_update_steps_per_epoch": 26,
+    "total_batch_size": 1,
+    "is_schedulefree": false,
+    "is_lr_scheduler_disabled": false,
+    "total_steps_remaining_at_start": 2000
+}

training_state-dreambooth-1024.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"aspect_ratio_bucket_indices": {"1.0": ["0.jpg", "1.jpg", "2.jpg", "3.jpg", "4.jpg", "5.jpg", "6.jpg", "7.jpg", "8.jpg", "9.jpg", "10.jpg", "11.jpg", "12.jpg", "13.jpg", "14.jpg", "15.jpg", "16.jpg", "17.jpg", "18.jpg", "19.jpg", "20.jpg", "21.jpg", "22.jpg", "23.jpg", "24.jpg", "25.jpg"]}, "buckets": ["1.0"], "exhausted_buckets": [], "batch_size": 1, "current_bucket": 0, "seen_images": {"4.jpg": true, "14.jpg": true, "6.jpg": true, "10.jpg": true, "18.jpg": true, "9.jpg": true, "2.jpg": true, "21.jpg": true, "25.jpg": true, "5.jpg": true, "1.jpg": true, "19.jpg": true, "16.jpg": true, "0.jpg": true, "7.jpg": true, "12.jpg": true}, "current_epoch": 10}

training_state.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"global_step": 250, "epoch_step": 259, "epoch": 10, "exhausted_backends": [], "repeats": {"dreambooth-1024": 0}}