|
this repo contains the default image preprocessing code for wan 2.2 5B I2V pipeline |
|
|
|
|
|
this will load an image from URL, resize and center crop it |
|
|
|
```py |
|
image_processor = ModularPipeline.from_pretrained("YiYiXu/WanImageProcessor", trust_remote_code=True) |
|
image = image_processor( |
|
image="https://huggingface.co/datasets/YiYiXu/testing-images/resolve/main/wan_i2v_input.JPG", |
|
max_area=1280*704, output="processed_image") |
|
``` |
|
|
|
|
|
it does this: |
|
|
|
```py |
|
# copied from https://github.com/Wan-Video/Wan2.2/blob/388807310646ed5f318a99f8e8d9ad28c5b65373/wan/utils/utils.py#L136 |
|
def best_output_size(w, h, dw, dh, expected_area): |
|
# float output size |
|
ratio = w / h |
|
ow = (expected_area * ratio)**0.5 |
|
oh = expected_area / ow |
|
|
|
# process width first |
|
ow1 = int(ow // dw * dw) |
|
oh1 = int(expected_area / ow1 // dh * dh) |
|
assert ow1 % dw == 0 and oh1 % dh == 0 and ow1 * oh1 <= expected_area |
|
ratio1 = ow1 / oh1 |
|
|
|
# process height first |
|
oh2 = int(oh // dh * dh) |
|
ow2 = int(expected_area / oh2 // dw * dw) |
|
assert oh2 % dh == 0 and ow2 % dw == 0 and ow2 * oh2 <= expected_area |
|
ratio2 = ow2 / oh2 |
|
|
|
# compare ratios |
|
if max(ratio / ratio1, ratio1 / ratio) < max(ratio / ratio2, |
|
ratio2 / ratio): |
|
return ow1, oh1 |
|
else: |
|
return ow2, oh2 |
|
|
|
from diffusers.utils import load_image |
|
|
|
image = load_image(block_state.image).convert("RGB") |
|
max_area = 1280*704 |
|
|
|
ih, iw = image.height, image.width |
|
dh, dw = pipe.transformer.config.patch_size[1] * pipe.vae_scale_factor_spatial, pipe.transformer.config.patch_size[2] * pipe.vae_scale_factor_spatial |
|
ow, oh = best_output_size(iw, ih, dw, dh, max_area) |
|
|
|
scale = max(ow / iw, oh / ih) |
|
resized_image = image.resize((round(iw * scale), round(ih * scale)), Image.LANCZOS) |
|
|
|
# center-crop |
|
x1 = (resized_image.width - ow) // 2 |
|
y1 = (resized_image.height - oh) // 2 |
|
image = resized_image.crop((x1, y1, x1 + ow, y1 + oh)) |
|
|
|
``` |