This is just the transformer model with the fused 8-steps lighting lora

Original model: Qwen-Image

I'm using this repository for testing purposes, so keep that in mind, this is not the official method to use it.

How to test (24GB GPU)

Install diffusers from main:

pip install git+https://github.com/huggingface/diffusers

import torch

from diffusers import DiffusionPipeline, GGUFQuantizationConfig, QwenImageTransformer2DModel


torch_dtype = torch.bfloat16
model_id = "Qwen/Qwen-Image"

transformer = QwenImageTransformer2DModel.from_single_file(
    "https://huggingface.co/OzzyGT/qwen-image-lighting-gguf/blob/main/qwen-image-lighting-Q4_K_S.gguf",
    quantization_config=GGUFQuantizationConfig(compute_dtype=torch_dtype),
    torch_dtype=torch_dtype,
    config="Qwen/Qwen-Image",
    subfolder="transformer",
)
pipe = DiffusionPipeline.from_pretrained(model_id, transformer=transformer, torch_dtype=torch_dtype)
pipe.enable_model_cpu_offload()
prompt = "stock photo of two people, a man and a woman, wearing lab coats writing on a white board with markers, the white board has text that reads 'The Diffusers library by Hugging Face makes it easy for developers to run image generation and inference using state-of-the-art diffusion models with just a few lines of code' with sloppy writing and traces clearly made by a human. The photo is taken from the side and has depth of field so some parts of the board looks blurred giving it a more professional look"

generator = torch.Generator(device="cuda").manual_seed(42)

image = pipe(
    prompt=prompt,
    negative_prompt="",
    width=1664,
    height=928,
    num_inference_steps=8,
    true_cfg_scale=1.0,
    generator=generator,
).images[0]

image.save("gguf_lighting_qwen.png")

Result

How to test (16GB GPU)

Install diffusers from main:

pip install git+https://github.com/huggingface/diffusers

import torch
from transformers import BitsAndBytesConfig as TransformersBitsAndBytesConfig
from transformers import Qwen2_5_VLForConditionalGeneration

from diffusers import DiffusionPipeline, GGUFQuantizationConfig, QwenImageTransformer2DModel


torch_dtype = torch.bfloat16
model_id = "Qwen/Qwen-Image"

transformer = QwenImageTransformer2DModel.from_single_file(
    "https://huggingface.co/OzzyGT/qwen-image-lighting-gguf/blob/main/qwen-image-lighting-Q4_K_S.gguf",
    quantization_config=GGUFQuantizationConfig(compute_dtype=torch_dtype),
    torch_dtype=torch_dtype,
    config="Qwen/Qwen-Image",
    subfolder="transformer",
)

quantization_config = TransformersBitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
)

text_encoder = Qwen2_5_VLForConditionalGeneration.from_pretrained(
    model_id,
    subfolder="text_encoder",
    quantization_config=quantization_config,
    torch_dtype=torch_dtype,
)
text_encoder = text_encoder.to("cpu")

pipe = DiffusionPipeline.from_pretrained(
    model_id, transformer=transformer, text_encoder=text_encoder, torch_dtype=torch_dtype
)
pipe.enable_model_cpu_offload()
prompt = "stock photo of two people, a man and a woman, wearing lab coats writing on a white board with markers, the white board has text that reads 'The Diffusers library by Hugging Face makes it easy for developers to run image generation and inference using state-of-the-art diffusion models with just a few lines of code' with sloppy writing and traces clearly made by a human. The photo is taken from the side and has depth of field so some parts of the board looks blurred giving it a more professional look"

generator = torch.Generator(device="cuda").manual_seed(42)

image = pipe(
    prompt=prompt,
    negative_prompt="",
    width=1664,
    height=928,
    num_inference_steps=8,
    true_cfg_scale=1.0,
    generator=generator,
).images[0]

image.save("gguf_lighting_qwen.png")