remove asymmetric
Browse files- README.md +0 -1
- asymmetric_vae/config.json +0 -38
- asymmetric_vae/diffusion_pytorch_model.safetensors +0 -3
- asymmetric_vae_new/config.json +0 -45
- asymmetric_vae_new/diffusion_pytorch_model.safetensors +0 -3
- eval_alchemist.py +129 -38
- samples/sample_0.jpg +0 -3
- samples/sample_1.jpg +0 -3
- samples/sample_2.jpg +0 -3
- samples/sample_decoded.jpg +0 -3
- samples/sample_real.jpg +0 -3
- train_sdxl_vae_gpt5.py → train_sdxl_vae.py +3 -3
- vae/diffusion_pytorch_model.safetensors +1 -1
README.md
CHANGED
|
@@ -24,7 +24,6 @@ Alchemist eval (512px)
|
|
| 24 |
| madebyollin/sdxl-vae-fp16 | 100% | 100% | 100% |
|
| 25 |
| KBlueLeaf/EQ-SDXL-VAE | 107.8% | 100.1% | 95.5% |
|
| 26 |
| AiArtLab/sdxl_vae | 112.3% | 101.8% | 106.6% |
|
| 27 |
-
| AiArtLab/sdxl_vae_asym | 111.7% | 101.1% | 89.4% |
|
| 28 |
| FLUX.1-schnell-vae | 324.0% | 119.8% | 292.0% |
|
| 29 |
|
| 30 |
[](https://imgsli.com/NDA3OTgz)
|
|
|
|
| 24 |
| madebyollin/sdxl-vae-fp16 | 100% | 100% | 100% |
|
| 25 |
| KBlueLeaf/EQ-SDXL-VAE | 107.8% | 100.1% | 95.5% |
|
| 26 |
| AiArtLab/sdxl_vae | 112.3% | 101.8% | 106.6% |
|
|
|
|
| 27 |
| FLUX.1-schnell-vae | 324.0% | 119.8% | 292.0% |
|
| 28 |
|
| 29 |
[](https://imgsli.com/NDA3OTgz)
|
asymmetric_vae/config.json
DELETED
|
@@ -1,38 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"_class_name": "AsymmetricAutoencoderKL",
|
| 3 |
-
"_diffusers_version": "0.34.0",
|
| 4 |
-
"_name_or_path": "asymmetric_vae_empty",
|
| 5 |
-
"act_fn": "silu",
|
| 6 |
-
"down_block_out_channels": [
|
| 7 |
-
128,
|
| 8 |
-
256,
|
| 9 |
-
512,
|
| 10 |
-
512
|
| 11 |
-
],
|
| 12 |
-
"down_block_types": [
|
| 13 |
-
"DownEncoderBlock2D",
|
| 14 |
-
"DownEncoderBlock2D",
|
| 15 |
-
"DownEncoderBlock2D",
|
| 16 |
-
"DownEncoderBlock2D"
|
| 17 |
-
],
|
| 18 |
-
"in_channels": 3,
|
| 19 |
-
"latent_channels": 4,
|
| 20 |
-
"layers_per_down_block": 2,
|
| 21 |
-
"layers_per_up_block": 3,
|
| 22 |
-
"norm_num_groups": 32,
|
| 23 |
-
"out_channels": 3,
|
| 24 |
-
"sample_size": 1024,
|
| 25 |
-
"scaling_factor": 0.13025,
|
| 26 |
-
"up_block_out_channels": [
|
| 27 |
-
128,
|
| 28 |
-
256,
|
| 29 |
-
512,
|
| 30 |
-
512
|
| 31 |
-
],
|
| 32 |
-
"up_block_types": [
|
| 33 |
-
"UpDecoderBlock2D",
|
| 34 |
-
"UpDecoderBlock2D",
|
| 35 |
-
"UpDecoderBlock2D",
|
| 36 |
-
"UpDecoderBlock2D"
|
| 37 |
-
]
|
| 38 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
asymmetric_vae/diffusion_pytorch_model.safetensors
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:ded3c30322578e3371f32a58423b6a3be3a2c3b81d3eb5d35433772be796a1ba
|
| 3 |
-
size 421473052
|
|
|
|
|
|
|
|
|
|
|
|
asymmetric_vae_new/config.json
DELETED
|
@@ -1,45 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"_class_name": "AsymmetricAutoencoderKL",
|
| 3 |
-
"_diffusers_version": "0.35.0.dev0",
|
| 4 |
-
"_name_or_path": "asymmetric_vae",
|
| 5 |
-
"act_fn": "silu",
|
| 6 |
-
"block_out_channels": [
|
| 7 |
-
128,
|
| 8 |
-
256,
|
| 9 |
-
512,
|
| 10 |
-
512
|
| 11 |
-
],
|
| 12 |
-
"down_block_out_channels": [
|
| 13 |
-
128,
|
| 14 |
-
256,
|
| 15 |
-
512,
|
| 16 |
-
512
|
| 17 |
-
],
|
| 18 |
-
"down_block_types": [
|
| 19 |
-
"DownEncoderBlock2D",
|
| 20 |
-
"DownEncoderBlock2D",
|
| 21 |
-
"DownEncoderBlock2D",
|
| 22 |
-
"DownEncoderBlock2D"
|
| 23 |
-
],
|
| 24 |
-
"force_upcast": false,
|
| 25 |
-
"in_channels": 3,
|
| 26 |
-
"latent_channels": 4,
|
| 27 |
-
"layers_per_down_block": 2,
|
| 28 |
-
"layers_per_up_block": 3,
|
| 29 |
-
"norm_num_groups": 32,
|
| 30 |
-
"out_channels": 3,
|
| 31 |
-
"sample_size": 1024,
|
| 32 |
-
"scaling_factor": 0.13025,
|
| 33 |
-
"up_block_out_channels": [
|
| 34 |
-
128,
|
| 35 |
-
256,
|
| 36 |
-
512,
|
| 37 |
-
512
|
| 38 |
-
],
|
| 39 |
-
"up_block_types": [
|
| 40 |
-
"UpDecoderBlock2D",
|
| 41 |
-
"UpDecoderBlock2D",
|
| 42 |
-
"UpDecoderBlock2D",
|
| 43 |
-
"UpDecoderBlock2D"
|
| 44 |
-
]
|
| 45 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
asymmetric_vae_new/diffusion_pytorch_model.safetensors
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:df9380b1e8d8b1a36b3d0f9501a854717a911ae9b8d2aebe18809a6eefa9318b
|
| 3 |
-
size 421473052
|
|
|
|
|
|
|
|
|
|
|
|
eval_alchemist.py
CHANGED
|
@@ -5,29 +5,71 @@ import lpips
|
|
| 5 |
from PIL import Image, UnidentifiedImageError
|
| 6 |
from tqdm import tqdm
|
| 7 |
from torch.utils.data import Dataset, DataLoader
|
| 8 |
-
from torchvision.transforms import Compose, Resize, ToTensor, CenterCrop
|
| 9 |
from diffusers import AutoencoderKL, AsymmetricAutoencoderKL
|
| 10 |
import random
|
| 11 |
|
| 12 |
# --------------------------- Параметры ---------------------------
|
| 13 |
DEVICE = "cuda"
|
| 14 |
DTYPE = torch.float16
|
| 15 |
-
IMAGE_FOLDER = "/workspace/alchemist"
|
| 16 |
MIN_SIZE = 1280
|
| 17 |
CROP_SIZE = 512
|
| 18 |
-
BATCH_SIZE =
|
| 19 |
-
MAX_IMAGES =
|
| 20 |
-
NUM_WORKERS = 4
|
|
|
|
|
|
|
| 21 |
|
| 22 |
# Список VAE для тестирования
|
| 23 |
VAE_LIST = [
|
|
|
|
|
|
|
|
|
|
| 24 |
("madebyollin/sdxl-vae-fp16", AutoencoderKL, "madebyollin/sdxl-vae-fp16-fix", None),
|
| 25 |
-
("
|
| 26 |
("AiArtLab/sdxl_vae", AutoencoderKL, "AiArtLab/sdxl_vae", None),
|
| 27 |
-
("AiArtLab/sdxl_vae_asym", AsymmetricAutoencoderKL, "AiArtLab/sdxl_vae", "asymmetric_vae"),
|
| 28 |
-
("
|
|
|
|
|
|
|
| 29 |
]
|
| 30 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
# --------------------------- Dataset ---------------------------
|
| 32 |
class ImageFolderDataset(Dataset):
|
| 33 |
def __init__(self, root_dir, extensions=('.png',), min_size=1024, crop_size=512, limit=None):
|
|
@@ -36,18 +78,15 @@ class ImageFolderDataset(Dataset):
|
|
| 36 |
self.crop_size = crop_size
|
| 37 |
self.paths = []
|
| 38 |
|
| 39 |
-
# Собираем пути к файлам
|
| 40 |
print("Сканирование папки...")
|
| 41 |
for root, _, files in os.walk(root_dir):
|
| 42 |
for fname in files:
|
| 43 |
if fname.lower().endswith(extensions):
|
| 44 |
self.paths.append(os.path.join(root, fname))
|
| 45 |
|
| 46 |
-
# Ограничение количества
|
| 47 |
if limit:
|
| 48 |
self.paths = self.paths[:limit]
|
| 49 |
|
| 50 |
-
# Быстрая проверка валидности (опционально, можно убрать для скорости)
|
| 51 |
print("Проверка изображений...")
|
| 52 |
valid = []
|
| 53 |
for p in tqdm(self.paths, desc="Проверка"):
|
|
@@ -62,11 +101,9 @@ class ImageFolderDataset(Dataset):
|
|
| 62 |
if len(self.paths) == 0:
|
| 63 |
raise RuntimeError(f"Не найдено валидных изображений в {root_dir}")
|
| 64 |
|
| 65 |
-
# Перемешиваем для случайности
|
| 66 |
random.shuffle(self.paths)
|
| 67 |
print(f"Найдено {len(self.paths)} изображений")
|
| 68 |
|
| 69 |
-
# Трансформации
|
| 70 |
self.transform = Compose([
|
| 71 |
Resize(min_size, interpolation=Image.LANCZOS),
|
| 72 |
CenterCrop(crop_size),
|
|
@@ -89,9 +126,14 @@ def process(x):
|
|
| 89 |
def deprocess(x):
|
| 90 |
return x * 0.5 + 0.5
|
| 91 |
|
|
|
|
|
|
|
|
|
|
| 92 |
# --------------------------- Основной код ---------------------------
|
| 93 |
if __name__ == "__main__":
|
| 94 |
-
|
|
|
|
|
|
|
| 95 |
dataset = ImageFolderDataset(
|
| 96 |
IMAGE_FOLDER,
|
| 97 |
extensions=('.png',),
|
|
@@ -103,16 +145,14 @@ if __name__ == "__main__":
|
|
| 103 |
dataloader = DataLoader(
|
| 104 |
dataset,
|
| 105 |
batch_size=BATCH_SIZE,
|
| 106 |
-
shuffle=False,
|
| 107 |
num_workers=NUM_WORKERS,
|
| 108 |
pin_memory=True,
|
| 109 |
drop_last=False
|
| 110 |
)
|
| 111 |
|
| 112 |
-
# Инициализация LPIPS
|
| 113 |
lpips_net = lpips.LPIPS(net="vgg").eval().to(DEVICE).requires_grad_(False)
|
| 114 |
|
| 115 |
-
# Загрузка VAE моделей
|
| 116 |
print("\nЗагрузка VAE моделей...")
|
| 117 |
vaes = []
|
| 118 |
names = []
|
|
@@ -120,67 +160,118 @@ if __name__ == "__main__":
|
|
| 120 |
for name, vae_class, model_path, subfolder in VAE_LIST:
|
| 121 |
try:
|
| 122 |
print(f" Загружаю {name}...")
|
| 123 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 124 |
vae = vae.to(DEVICE, DTYPE).eval()
|
| 125 |
vaes.append(vae)
|
| 126 |
names.append(name)
|
| 127 |
except Exception as e:
|
| 128 |
print(f" ❌ Ошибка загрузки {name}: {e}")
|
| 129 |
|
| 130 |
-
# Оценка метрик
|
| 131 |
print("\nОценка метрик...")
|
| 132 |
-
results = {name: {"mse": 0.0, "psnr": 0.0, "lpips": 0.0, "count": 0} for name in names}
|
| 133 |
-
|
|
|
|
|
|
|
|
|
|
| 134 |
with torch.no_grad():
|
|
|
|
| 135 |
for batch in tqdm(dataloader, desc="Обработка батчей"):
|
| 136 |
-
batch = batch.to(DEVICE)
|
| 137 |
-
test_inp = process(batch).to(DTYPE)
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
|
|
|
| 141 |
latent = vae.encode(test_inp).latent_dist.mode()
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
|
|
|
|
|
|
|
|
|
| 145 |
for i in range(batch.shape[0]):
|
| 146 |
img_orig = batch[i:i+1]
|
| 147 |
img_recon = recon[i:i+1]
|
| 148 |
-
|
| 149 |
mse = F.mse_loss(img_orig, img_recon).item()
|
| 150 |
psnr = 10 * torch.log10(1 / torch.tensor(mse)).item()
|
| 151 |
lpips_val = lpips_net(img_orig, img_recon, normalize=True).mean().item()
|
| 152 |
-
|
| 153 |
results[name]["mse"] += mse
|
| 154 |
results[name]["psnr"] += psnr
|
| 155 |
results[name]["lpips"] += lpips_val
|
|
|
|
| 156 |
results[name]["count"] += 1
|
| 157 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 158 |
# Усреднение результатов
|
| 159 |
for name in names:
|
| 160 |
count = results[name]["count"]
|
| 161 |
results[name]["mse"] /= count
|
| 162 |
results[name]["psnr"] /= count
|
| 163 |
results[name]["lpips"] /= count
|
|
|
|
| 164 |
|
| 165 |
# Вывод абсолютных значений
|
| 166 |
print("\n=== Абсолютные значения ===")
|
| 167 |
for name in names:
|
| 168 |
-
print(f"{name:30s}: MSE: {results[name]['mse']:.3e}, PSNR: {results[name]['psnr']:.4f},
|
|
|
|
| 169 |
|
| 170 |
# Вывод таблицы с процентами
|
| 171 |
print("\n=== Сравнение с первой моделью (%) ===")
|
| 172 |
-
print(f"| {'Модель':30s} | {'MSE':>10s} | {'PSNR':>10s} | {'LPIPS':>10s} |")
|
| 173 |
-
print(f"|{'-'*32}|{'-'*12}|{'-'*12}|{'-'*12}|")
|
| 174 |
|
| 175 |
baseline = names[0]
|
| 176 |
for name in names:
|
|
|
|
| 177 |
mse_pct = (results[baseline]["mse"] / results[name]["mse"]) * 100
|
|
|
|
| 178 |
psnr_pct = (results[name]["psnr"] / results[baseline]["psnr"]) * 100
|
|
|
|
| 179 |
lpips_pct = (results[baseline]["lpips"] / results[name]["lpips"]) * 100
|
|
|
|
| 180 |
|
| 181 |
if name == baseline:
|
| 182 |
-
print(f"| {name:30s} | {'100%':>10s} | {'100%':>10s} | {'100%':>10s} |")
|
| 183 |
else:
|
| 184 |
-
print(f"| {name:30s} | {f'{mse_pct:.1f}%':>10s} | {f'{psnr_pct:.1f}%':>10s} |
|
|
|
|
| 185 |
|
| 186 |
-
print("\n✅ Готово!")
|
|
|
|
| 5 |
from PIL import Image, UnidentifiedImageError
|
| 6 |
from tqdm import tqdm
|
| 7 |
from torch.utils.data import Dataset, DataLoader
|
| 8 |
+
from torchvision.transforms import Compose, Resize, ToTensor, CenterCrop,ToPILImage
|
| 9 |
from diffusers import AutoencoderKL, AsymmetricAutoencoderKL
|
| 10 |
import random
|
| 11 |
|
| 12 |
# --------------------------- Параметры ---------------------------
|
| 13 |
DEVICE = "cuda"
|
| 14 |
DTYPE = torch.float16
|
| 15 |
+
IMAGE_FOLDER = "/workspace/alchemist" #wget https://huggingface.co/datasets/AiArtLab/alchemist/resolve/main/alchemist.zip
|
| 16 |
MIN_SIZE = 1280
|
| 17 |
CROP_SIZE = 512
|
| 18 |
+
BATCH_SIZE = 5
|
| 19 |
+
MAX_IMAGES = 100
|
| 20 |
+
NUM_WORKERS = 4
|
| 21 |
+
NUM_SAMPLES_TO_SAVE = 10 # Сколько примеров сохранить (0 - не сохранять)
|
| 22 |
+
SAMPLES_FOLDER = "vaetest"
|
| 23 |
|
| 24 |
# Список VAE для тестирования
|
| 25 |
VAE_LIST = [
|
| 26 |
+
|
| 27 |
+
# ("stable-diffusion-v1-5/stable-diffusion-v1-5", AutoencoderKL, "stable-diffusion-v1-5/stable-diffusion-v1-5", "vae"),
|
| 28 |
+
# ("cross-attention/asymmetric-autoencoder-kl-x-1-5", AsymmetricAutoencoderKL, "cross-attention/asymmetric-autoencoder-kl-x-1-5", None),
|
| 29 |
("madebyollin/sdxl-vae-fp16", AutoencoderKL, "madebyollin/sdxl-vae-fp16-fix", None),
|
| 30 |
+
# ("AiArtLab/sdxs", AutoencoderKL, "AiArtLab/sdxs", "vae"),
|
| 31 |
("AiArtLab/sdxl_vae", AutoencoderKL, "AiArtLab/sdxl_vae", None),
|
| 32 |
+
# ("AiArtLab/sdxl_vae_asym", AsymmetricAutoencoderKL, "AiArtLab/sdxl_vae", "asymmetric_vae"),
|
| 33 |
+
("AiArtLab/sdxl_vae_asym_new", AsymmetricAutoencoderKL, "AiArtLab/sdxl_vae", "asymmetric_vae_new"),
|
| 34 |
+
# ("KBlueLeaf/EQ-SDXL-VAE", AutoencoderKL, "KBlueLeaf/EQ-SDXL-VAE", None),
|
| 35 |
+
# ("FLUX.1-schnell-vae", AutoencoderKL, "black-forest-labs/FLUX.1-schnell", "vae"),
|
| 36 |
]
|
| 37 |
|
| 38 |
+
# --------------------------- Sobel Edge Detection ---------------------------
|
| 39 |
+
# Определяем фильтры Собеля глобально
|
| 40 |
+
_sobel_kx = torch.tensor([[-1, 0, 1], [-2, 0, 2], [-1, 0, 1]], dtype=torch.float32).view(1, 1, 3, 3)
|
| 41 |
+
_sobel_ky = torch.tensor([[-1, -2, -1], [0, 0, 0], [1, 2, 1]], dtype=torch.float32).view(1, 1, 3, 3)
|
| 42 |
+
|
| 43 |
+
def sobel_edges(x: torch.Tensor) -> torch.Tensor:
|
| 44 |
+
"""
|
| 45 |
+
Вычисляет карту границ с помощью оператора Собеля
|
| 46 |
+
x: [B,C,H,W] в диапазоне [-1,1]
|
| 47 |
+
Возвращает: [B,C,H,W] - магнитуда градиента
|
| 48 |
+
"""
|
| 49 |
+
C = x.shape[1]
|
| 50 |
+
kx = _sobel_kx.to(x.device, x.dtype).repeat(C, 1, 1, 1)
|
| 51 |
+
ky = _sobel_ky.to(x.device, x.dtype).repeat(C, 1, 1, 1)
|
| 52 |
+
gx = F.conv2d(x, kx, padding=1, groups=C)
|
| 53 |
+
gy = F.conv2d(x, ky, padding=1, groups=C)
|
| 54 |
+
return torch.sqrt(gx * gx + gy * gy + 1e-12)
|
| 55 |
+
|
| 56 |
+
def compute_edge_loss(real: torch.Tensor, fake: torch.Tensor) -> float:
|
| 57 |
+
"""
|
| 58 |
+
Вычисляет Edge Loss между реальным и сгенерированным изображением
|
| 59 |
+
real, fake: [B,C,H,W] в диапазоне [0,1]
|
| 60 |
+
Возвращает: скалярное значение loss
|
| 61 |
+
"""
|
| 62 |
+
# Конвертируем в [-1,1] для sobel_edges
|
| 63 |
+
real_norm = real * 2 - 1
|
| 64 |
+
fake_norm = fake * 2 - 1
|
| 65 |
+
|
| 66 |
+
# Получаем карты границ
|
| 67 |
+
edges_real = sobel_edges(real_norm)
|
| 68 |
+
edges_fake = sobel_edges(fake_norm)
|
| 69 |
+
|
| 70 |
+
# L1 loss между картами границ
|
| 71 |
+
return F.l1_loss(edges_fake, edges_real).item()
|
| 72 |
+
|
| 73 |
# --------------------------- Dataset ---------------------------
|
| 74 |
class ImageFolderDataset(Dataset):
|
| 75 |
def __init__(self, root_dir, extensions=('.png',), min_size=1024, crop_size=512, limit=None):
|
|
|
|
| 78 |
self.crop_size = crop_size
|
| 79 |
self.paths = []
|
| 80 |
|
|
|
|
| 81 |
print("Сканирование папки...")
|
| 82 |
for root, _, files in os.walk(root_dir):
|
| 83 |
for fname in files:
|
| 84 |
if fname.lower().endswith(extensions):
|
| 85 |
self.paths.append(os.path.join(root, fname))
|
| 86 |
|
|
|
|
| 87 |
if limit:
|
| 88 |
self.paths = self.paths[:limit]
|
| 89 |
|
|
|
|
| 90 |
print("Проверка изображений...")
|
| 91 |
valid = []
|
| 92 |
for p in tqdm(self.paths, desc="Проверка"):
|
|
|
|
| 101 |
if len(self.paths) == 0:
|
| 102 |
raise RuntimeError(f"Не найдено валидных изображений в {root_dir}")
|
| 103 |
|
|
|
|
| 104 |
random.shuffle(self.paths)
|
| 105 |
print(f"Найдено {len(self.paths)} изображений")
|
| 106 |
|
|
|
|
| 107 |
self.transform = Compose([
|
| 108 |
Resize(min_size, interpolation=Image.LANCZOS),
|
| 109 |
CenterCrop(crop_size),
|
|
|
|
| 126 |
def deprocess(x):
|
| 127 |
return x * 0.5 + 0.5
|
| 128 |
|
| 129 |
+
def _sanitize_name(name: str) -> str:
|
| 130 |
+
return name.replace('/', '_').replace('-', '_')
|
| 131 |
+
|
| 132 |
# --------------------------- Основной код ---------------------------
|
| 133 |
if __name__ == "__main__":
|
| 134 |
+
if NUM_SAMPLES_TO_SAVE > 0:
|
| 135 |
+
os.makedirs(SAMPLES_FOLDER, exist_ok=True)
|
| 136 |
+
|
| 137 |
dataset = ImageFolderDataset(
|
| 138 |
IMAGE_FOLDER,
|
| 139 |
extensions=('.png',),
|
|
|
|
| 145 |
dataloader = DataLoader(
|
| 146 |
dataset,
|
| 147 |
batch_size=BATCH_SIZE,
|
| 148 |
+
shuffle=False,
|
| 149 |
num_workers=NUM_WORKERS,
|
| 150 |
pin_memory=True,
|
| 151 |
drop_last=False
|
| 152 |
)
|
| 153 |
|
|
|
|
| 154 |
lpips_net = lpips.LPIPS(net="vgg").eval().to(DEVICE).requires_grad_(False)
|
| 155 |
|
|
|
|
| 156 |
print("\nЗагрузка VAE моделей...")
|
| 157 |
vaes = []
|
| 158 |
names = []
|
|
|
|
| 160 |
for name, vae_class, model_path, subfolder in VAE_LIST:
|
| 161 |
try:
|
| 162 |
print(f" Загружаю {name}...")
|
| 163 |
+
# Исправлена загрузка для variant
|
| 164 |
+
if "sdxs" in model_path:
|
| 165 |
+
vae = vae_class.from_pretrained(model_path, subfolder=subfolder, variant="fp16")
|
| 166 |
+
else:
|
| 167 |
+
vae = vae_class.from_pretrained(model_path, subfolder=subfolder)
|
| 168 |
vae = vae.to(DEVICE, DTYPE).eval()
|
| 169 |
vaes.append(vae)
|
| 170 |
names.append(name)
|
| 171 |
except Exception as e:
|
| 172 |
print(f" ❌ Ошибка загрузки {name}: {e}")
|
| 173 |
|
|
|
|
| 174 |
print("\nОценка метрик...")
|
| 175 |
+
results = {name: {"mse": 0.0, "psnr": 0.0, "lpips": 0.0, "edge": 0.0, "count": 0} for name in names}
|
| 176 |
+
|
| 177 |
+
to_pil = ToPILImage()
|
| 178 |
+
|
| 179 |
+
# >>>>>>>> ОСНОВНЫЕ ИЗМЕНЕНИЯ ЗДЕСЬ (KISS) <<<<<<<<
|
| 180 |
with torch.no_grad():
|
| 181 |
+
images_saved = 0 # считаем именно КОЛ-ВО ИЗОБРАЖЕНИЙ, а не сохранённых файлов
|
| 182 |
for batch in tqdm(dataloader, desc="Обработка батчей"):
|
| 183 |
+
batch = batch.to(DEVICE) # [B,3,H,W] в [0,1]
|
| 184 |
+
test_inp = process(batch).to(DTYPE) # [-1,1] для энкодера
|
| 185 |
+
|
| 186 |
+
# 1) считаем реконструкции для всех VAE на весь батч
|
| 187 |
+
recon_list = []
|
| 188 |
+
for vae in vaes:
|
| 189 |
latent = vae.encode(test_inp).latent_dist.mode()
|
| 190 |
+
dec = vae.decode(latent).sample.float() # [-1,1] (как правило)
|
| 191 |
+
recon = deprocess(dec).clamp(0.0, 1.0) # -> [0,1], clamp убирает артефакты
|
| 192 |
+
recon_list.append(recon)
|
| 193 |
+
|
| 194 |
+
# 2) обновляем метрики (по каждой VAE)
|
| 195 |
+
for recon, name in zip(recon_list, names):
|
| 196 |
for i in range(batch.shape[0]):
|
| 197 |
img_orig = batch[i:i+1]
|
| 198 |
img_recon = recon[i:i+1]
|
|
|
|
| 199 |
mse = F.mse_loss(img_orig, img_recon).item()
|
| 200 |
psnr = 10 * torch.log10(1 / torch.tensor(mse)).item()
|
| 201 |
lpips_val = lpips_net(img_orig, img_recon, normalize=True).mean().item()
|
| 202 |
+
edge_loss = compute_edge_loss(img_orig, img_recon)
|
| 203 |
results[name]["mse"] += mse
|
| 204 |
results[name]["psnr"] += psnr
|
| 205 |
results[name]["lpips"] += lpips_val
|
| 206 |
+
results[name]["edge"] += edge_loss
|
| 207 |
results[name]["count"] += 1
|
| 208 |
+
|
| 209 |
+
# 3) сохраняем ровно NUM_SAMPLES_TO_SAVE изображений (orig + все VAE + общий коллаж)
|
| 210 |
+
if NUM_SAMPLES_TO_SAVE > 0:
|
| 211 |
+
for i in range(batch.shape[0]):
|
| 212 |
+
if images_saved >= NUM_SAMPLES_TO_SAVE:
|
| 213 |
+
break
|
| 214 |
+
idx_str = f"{images_saved + 1:03d}"
|
| 215 |
+
|
| 216 |
+
# original
|
| 217 |
+
orig_pil = to_pil(batch[i].detach().float().cpu())
|
| 218 |
+
orig_pil.save(os.path.join(SAMPLES_FOLDER, f"{idx_str}_orig.png"))
|
| 219 |
+
|
| 220 |
+
# per-VAE decodes
|
| 221 |
+
tiles = [orig_pil]
|
| 222 |
+
for recon, name in zip(recon_list, names):
|
| 223 |
+
recon_pil = to_pil(recon[i].detach().cpu())
|
| 224 |
+
recon_pil.save(os.path.join(
|
| 225 |
+
SAMPLES_FOLDER, f"{idx_str}_decoded_{_sanitize_name(name)}.png"
|
| 226 |
+
))
|
| 227 |
+
tiles.append(recon_pil)
|
| 228 |
+
|
| 229 |
+
# общий коллаж: [orig | vae1 | vae2 | ...]
|
| 230 |
+
collage_w = CROP_SIZE * len(tiles)
|
| 231 |
+
collage_h = CROP_SIZE
|
| 232 |
+
collage = Image.new("RGB", (collage_w, collage_h))
|
| 233 |
+
x = 0
|
| 234 |
+
for tile in tiles:
|
| 235 |
+
collage.paste(tile, (x, 0))
|
| 236 |
+
x += CROP_SIZE
|
| 237 |
+
collage.save(os.path.join(SAMPLES_FOLDER, f"{idx_str}_all.png"))
|
| 238 |
+
|
| 239 |
+
images_saved += 1
|
| 240 |
+
|
| 241 |
+
|
| 242 |
# Усреднение результатов
|
| 243 |
for name in names:
|
| 244 |
count = results[name]["count"]
|
| 245 |
results[name]["mse"] /= count
|
| 246 |
results[name]["psnr"] /= count
|
| 247 |
results[name]["lpips"] /= count
|
| 248 |
+
results[name]["edge"] /= count
|
| 249 |
|
| 250 |
# Вывод абсолютных значений
|
| 251 |
print("\n=== Абсолютные значения ===")
|
| 252 |
for name in names:
|
| 253 |
+
print(f"{name:30s}: MSE: {results[name]['mse']:.3e}, PSNR: {results[name]['psnr']:.4f}, "
|
| 254 |
+
f"LPIPS: {results[name]['lpips']:.4f}, Edge: {results[name]['edge']:.4f}")
|
| 255 |
|
| 256 |
# Вывод таблицы с процентами
|
| 257 |
print("\n=== Сравнение с первой моделью (%) ===")
|
| 258 |
+
print(f"| {'Модель':30s} | {'MSE':>10s} | {'PSNR':>10s} | {'LPIPS':>10s} | {'Edge':>10s} |")
|
| 259 |
+
print(f"|{'-'*32}|{'-'*12}|{'-'*12}|{'-'*12}|{'-'*12}|")
|
| 260 |
|
| 261 |
baseline = names[0]
|
| 262 |
for name in names:
|
| 263 |
+
# Для MSE, LPIPS и Edge: меньше = лучше, поэтому инвертируем
|
| 264 |
mse_pct = (results[baseline]["mse"] / results[name]["mse"]) * 100
|
| 265 |
+
# Для PSNR: больше = лучше
|
| 266 |
psnr_pct = (results[name]["psnr"] / results[baseline]["psnr"]) * 100
|
| 267 |
+
# Для LPIPS и Edge: меньше = лучше
|
| 268 |
lpips_pct = (results[baseline]["lpips"] / results[name]["lpips"]) * 100
|
| 269 |
+
edge_pct = (results[baseline]["edge"] / results[name]["edge"]) * 100
|
| 270 |
|
| 271 |
if name == baseline:
|
| 272 |
+
print(f"| {name:30s} | {'100%':>10s} | {'100%':>10s} | {'100%':>10s} | {'100%':>10s} |")
|
| 273 |
else:
|
| 274 |
+
print(f"| {name:30s} | {f'{mse_pct:.1f}%':>10s} | {f'{psnr_pct:.1f}%':>10s} | "
|
| 275 |
+
f"{f'{lpips_pct:.1f}%':>10s} | {f'{edge_pct:.1f}%':>10s} |")
|
| 276 |
|
| 277 |
+
print("\n✅ Готово!")
|
samples/sample_0.jpg
DELETED
Git LFS Details
|
samples/sample_1.jpg
DELETED
Git LFS Details
|
samples/sample_2.jpg
DELETED
Git LFS Details
|
samples/sample_decoded.jpg
DELETED
Git LFS Details
|
samples/sample_real.jpg
DELETED
Git LFS Details
|
train_sdxl_vae_gpt5.py → train_sdxl_vae.py
RENAMED
|
@@ -24,7 +24,7 @@ from collections import deque
|
|
| 24 |
|
| 25 |
# --------------------------- Параметры ---------------------------
|
| 26 |
ds_path = "/workspace/png"
|
| 27 |
-
project = "
|
| 28 |
batch_size = 3
|
| 29 |
base_learning_rate = 6e-6
|
| 30 |
min_learning_rate = 1e-6
|
|
@@ -50,7 +50,7 @@ clip_grad_norm = 1.0
|
|
| 50 |
mixed_precision = "no" # или "fp16"/"bf16" при поддержке
|
| 51 |
gradient_accumulation_steps = 5
|
| 52 |
generated_folder = "samples"
|
| 53 |
-
save_as = "
|
| 54 |
num_workers = 0
|
| 55 |
device = None # accelerator задаст устройство
|
| 56 |
|
|
@@ -65,7 +65,7 @@ loss_ratios = {
|
|
| 65 |
median_coeff_steps = 256 # за сколько шагов считать медианные коэффициенты
|
| 66 |
|
| 67 |
# --------------------------- параметры препроцессинга ---------------------------
|
| 68 |
-
resize_long_side = 1280 # если None или 0 — ресайза не будет; рекомендовано
|
| 69 |
|
| 70 |
Path(generated_folder).mkdir(parents=True, exist_ok=True)
|
| 71 |
|
|
|
|
| 24 |
|
| 25 |
# --------------------------- Параметры ---------------------------
|
| 26 |
ds_path = "/workspace/png"
|
| 27 |
+
project = "vae"
|
| 28 |
batch_size = 3
|
| 29 |
base_learning_rate = 6e-6
|
| 30 |
min_learning_rate = 1e-6
|
|
|
|
| 50 |
mixed_precision = "no" # или "fp16"/"bf16" при поддержке
|
| 51 |
gradient_accumulation_steps = 5
|
| 52 |
generated_folder = "samples"
|
| 53 |
+
save_as = "vae_nightly"
|
| 54 |
num_workers = 0
|
| 55 |
device = None # accelerator задаст устройство
|
| 56 |
|
|
|
|
| 65 |
median_coeff_steps = 256 # за сколько шагов считать медианные коэффициенты
|
| 66 |
|
| 67 |
# --------------------------- параметры препроцессинга ---------------------------
|
| 68 |
+
resize_long_side = 1280 # если None или 0 — ресайза не будет; рекомендовано 1280
|
| 69 |
|
| 70 |
Path(generated_folder).mkdir(parents=True, exist_ok=True)
|
| 71 |
|
vae/diffusion_pytorch_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 334643268
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:03f2412467f6bedce9efeddba5860b5ec0d3267931d14c500d4bd7a878e14cbd
|
| 3 |
size 334643268
|