WanImageProcessor / README.md
YiYiXu's picture
Update README.md
d0034f7 verified
this repo contains the default image preprocessing code for wan 2.2 5B I2V pipeline
this will load an image from URL, resize and center crop it
```py
image_processor = ModularPipeline.from_pretrained("YiYiXu/WanImageProcessor", trust_remote_code=True)
image = image_processor(
image="https://huggingface.co/datasets/YiYiXu/testing-images/resolve/main/wan_i2v_input.JPG",
max_area=1280*704, output="processed_image")
```
it does this:
```py
# copied from https://github.com/Wan-Video/Wan2.2/blob/388807310646ed5f318a99f8e8d9ad28c5b65373/wan/utils/utils.py#L136
def best_output_size(w, h, dw, dh, expected_area):
# float output size
ratio = w / h
ow = (expected_area * ratio)**0.5
oh = expected_area / ow
# process width first
ow1 = int(ow // dw * dw)
oh1 = int(expected_area / ow1 // dh * dh)
assert ow1 % dw == 0 and oh1 % dh == 0 and ow1 * oh1 <= expected_area
ratio1 = ow1 / oh1
# process height first
oh2 = int(oh // dh * dh)
ow2 = int(expected_area / oh2 // dw * dw)
assert oh2 % dh == 0 and ow2 % dw == 0 and ow2 * oh2 <= expected_area
ratio2 = ow2 / oh2
# compare ratios
if max(ratio / ratio1, ratio1 / ratio) < max(ratio / ratio2,
ratio2 / ratio):
return ow1, oh1
else:
return ow2, oh2
from diffusers.utils import load_image
image = load_image(block_state.image).convert("RGB")
max_area = 1280*704
ih, iw = image.height, image.width
dh, dw = pipe.transformer.config.patch_size[1] * pipe.vae_scale_factor_spatial, pipe.transformer.config.patch_size[2] * pipe.vae_scale_factor_spatial
ow, oh = best_output_size(iw, ih, dw, dh, max_area)
scale = max(ow / iw, oh / ih)
resized_image = image.resize((round(iw * scale), round(ih * scale)), Image.LANCZOS)
# center-crop
x1 = (resized_image.width - ow) // 2
y1 = (resized_image.height - oh) // 2
image = resized_image.crop((x1, y1, x1 + ow, y1 + oh))
```