matthewyuan
/

image-quality-fusion

@@ -48,40 +48,83 @@ pip install torch torchvision huggingface_hub opencv-python pillow open-clip-tor
 ### Basic Usage
 ```python
 from huggingface_hub import PyTorchModelHubMixin
-from PIL import Image
-# Load the model
-model = PyTorchModelHubMixin.from_pretrained("matthewyuan/image-quality-fusion")
-# Predict quality for a single image
-quality_score = model.predict_quality("path/to/your/image.jpg")
-print(f"Image quality: {quality_score:.2f}/10")
-# Batch prediction
-image_paths = ["image1.jpg", "image2.jpg", "image3.jpg"]
-scores = model.predict_batch(image_paths)
-for path, score in zip(image_paths, scores):
-    print(f"{path}: {score:.2f}/10")
 ```
 ### Advanced Usage
 ```python
-# Load with PIL Image
 from PIL import Image
-image = Image.open("photo.jpg")
-score = model.predict_quality(image)
-# Works with different input formats
-import numpy as np
-image_array = np.array(image)
-score = model.predict_quality(image_array)
-# Get model information
-info = model.get_model_info()
-print(f"Model: {info['name']} v{info['version']}")
-print(f"Performance: Correlation = {info['performance']['correlation']}")
 ```
 ## 📊 Performance Metrics

 ### Basic Usage
 ```python
+# Define a minimal loader class that matches the uploaded head (512 -> 256 -> 1)
+import torch
+import torch.nn as nn
 from huggingface_hub import PyTorchModelHubMixin
+class IQFModel(nn.Module, PyTorchModelHubMixin):
+    def __init__(self, in_dim=512, hidden=256, **kwargs):
+        # Accept either in_dim/hidden or clip_embed_dim/hidden_dim from config.json
+        in_dim = kwargs.pop("clip_embed_dim", in_dim)
+        hidden = kwargs.pop("hidden_dim", hidden)
+        super().__init__()
+        self.mlp = nn.Sequential(
+            nn.Linear(in_dim, hidden),
+            nn.ReLU(),
+            nn.Linear(hidden, 1),
+        )
+    def forward(self, x):
+        return self.mlp(x)
+# Load weights from the Hub (defaults to model.safetensors)
+model = IQFModel.from_pretrained("matthewyuan/image-quality-fusion", map_location="cpu")
+model.eval()
+# Smoke test on a dummy 512-d vector
+with torch.no_grad():
+    y = model(torch.randn(1, 512)).item()
+print(f"score: {y}")
 ```
 ### Advanced Usage
 ```python
+import torch
+import torch.nn as nn
 from PIL import Image
+import open_clip
+from huggingface_hub import PyTorchModelHubMixin
+# Minimal loader class (same as above)
+class IQFModel(nn.Module, PyTorchModelHubMixin):
+    def __init__(self, in_dim=512, hidden=256, **kwargs):
+        in_dim = kwargs.pop("clip_embed_dim", in_dim)
+        hidden = kwargs.pop("hidden_dim", hidden)
+        super().__init__()
+        self.mlp = nn.Sequential(
+            nn.Linear(in_dim, hidden),
+            nn.ReLU(),
+            nn.Linear(hidden, 1),
+        )
+    def forward(self, x):
+        return self.mlp(x)
+# 1) Load CLIP ViT-B/32 image encoder (512-d output)
+clip_model, _, clip_preprocess = open_clip.create_model_and_transforms(
+    "ViT-B-32", pretrained="openai"
+)
+clip_model.eval()
+# 2) Load the fusion head from the Hub
+fusion = IQFModel.from_pretrained("matthewyuan/image-quality-fusion", map_location="cpu")
+fusion.eval()
+def image_to_clip_embedding(img: Image.Image) -> torch.Tensor:
+    x = clip_preprocess(img).unsqueeze(0)  # [1, 3, H, W]
+    with torch.no_grad():
+        feat = clip_model.encode_image(x)   # [1, 512]
+        feat = feat / feat.norm(dim=-1, keepdim=True)
+    return feat
+def predict_quality(image_path: str) -> float:
+    img = Image.open(image_path).convert("RGB")
+    emb = image_to_clip_embedding(img)      # [1, 512]
+    with torch.no_grad():
+        score = fusion(emb).item()          # scalar
+    return float(score)
+print("score:", predict_quality("test.jpg"))
 ```
 ## 📊 Performance Metrics