matthewyuan commited on
Commit
93cc485
·
verified ·
1 Parent(s): e11ef26

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +69 -26
README.md CHANGED
@@ -48,40 +48,83 @@ pip install torch torchvision huggingface_hub opencv-python pillow open-clip-tor
48
  ### Basic Usage
49
 
50
  ```python
 
 
 
51
  from huggingface_hub import PyTorchModelHubMixin
52
- from PIL import Image
53
-
54
- # Load the model
55
- model = PyTorchModelHubMixin.from_pretrained("matthewyuan/image-quality-fusion")
56
-
57
- # Predict quality for a single image
58
- quality_score = model.predict_quality("path/to/your/image.jpg")
59
- print(f"Image quality: {quality_score:.2f}/10")
60
 
61
- # Batch prediction
62
- image_paths = ["image1.jpg", "image2.jpg", "image3.jpg"]
63
- scores = model.predict_batch(image_paths)
64
- for path, score in zip(image_paths, scores):
65
- print(f"{path}: {score:.2f}/10")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
  ```
67
 
68
  ### Advanced Usage
69
 
70
  ```python
71
- # Load with PIL Image
 
72
  from PIL import Image
73
- image = Image.open("photo.jpg")
74
- score = model.predict_quality(image)
75
-
76
- # Works with different input formats
77
- import numpy as np
78
- image_array = np.array(image)
79
- score = model.predict_quality(image_array)
80
-
81
- # Get model information
82
- info = model.get_model_info()
83
- print(f"Model: {info['name']} v{info['version']}")
84
- print(f"Performance: Correlation = {info['performance']['correlation']}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85
  ```
86
 
87
  ## 📊 Performance Metrics
 
48
  ### Basic Usage
49
 
50
  ```python
51
+ # Define a minimal loader class that matches the uploaded head (512 -> 256 -> 1)
52
+ import torch
53
+ import torch.nn as nn
54
  from huggingface_hub import PyTorchModelHubMixin
 
 
 
 
 
 
 
 
55
 
56
+ class IQFModel(nn.Module, PyTorchModelHubMixin):
57
+ def __init__(self, in_dim=512, hidden=256, **kwargs):
58
+ # Accept either in_dim/hidden or clip_embed_dim/hidden_dim from config.json
59
+ in_dim = kwargs.pop("clip_embed_dim", in_dim)
60
+ hidden = kwargs.pop("hidden_dim", hidden)
61
+ super().__init__()
62
+ self.mlp = nn.Sequential(
63
+ nn.Linear(in_dim, hidden),
64
+ nn.ReLU(),
65
+ nn.Linear(hidden, 1),
66
+ )
67
+ def forward(self, x):
68
+ return self.mlp(x)
69
+
70
+ # Load weights from the Hub (defaults to model.safetensors)
71
+ model = IQFModel.from_pretrained("matthewyuan/image-quality-fusion", map_location="cpu")
72
+ model.eval()
73
+
74
+ # Smoke test on a dummy 512-d vector
75
+ with torch.no_grad():
76
+ y = model(torch.randn(1, 512)).item()
77
+ print(f"score: {y}")
78
  ```
79
 
80
  ### Advanced Usage
81
 
82
  ```python
83
+ import torch
84
+ import torch.nn as nn
85
  from PIL import Image
86
+ import open_clip
87
+ from huggingface_hub import PyTorchModelHubMixin
88
+
89
+ # Minimal loader class (same as above)
90
+ class IQFModel(nn.Module, PyTorchModelHubMixin):
91
+ def __init__(self, in_dim=512, hidden=256, **kwargs):
92
+ in_dim = kwargs.pop("clip_embed_dim", in_dim)
93
+ hidden = kwargs.pop("hidden_dim", hidden)
94
+ super().__init__()
95
+ self.mlp = nn.Sequential(
96
+ nn.Linear(in_dim, hidden),
97
+ nn.ReLU(),
98
+ nn.Linear(hidden, 1),
99
+ )
100
+ def forward(self, x):
101
+ return self.mlp(x)
102
+
103
+ # 1) Load CLIP ViT-B/32 image encoder (512-d output)
104
+ clip_model, _, clip_preprocess = open_clip.create_model_and_transforms(
105
+ "ViT-B-32", pretrained="openai"
106
+ )
107
+ clip_model.eval()
108
+
109
+ # 2) Load the fusion head from the Hub
110
+ fusion = IQFModel.from_pretrained("matthewyuan/image-quality-fusion", map_location="cpu")
111
+ fusion.eval()
112
+
113
+ def image_to_clip_embedding(img: Image.Image) -> torch.Tensor:
114
+ x = clip_preprocess(img).unsqueeze(0) # [1, 3, H, W]
115
+ with torch.no_grad():
116
+ feat = clip_model.encode_image(x) # [1, 512]
117
+ feat = feat / feat.norm(dim=-1, keepdim=True)
118
+ return feat
119
+
120
+ def predict_quality(image_path: str) -> float:
121
+ img = Image.open(image_path).convert("RGB")
122
+ emb = image_to_clip_embedding(img) # [1, 512]
123
+ with torch.no_grad():
124
+ score = fusion(emb).item() # scalar
125
+ return float(score)
126
+
127
+ print("score:", predict_quality("test.jpg"))
128
  ```
129
 
130
  ## 📊 Performance Metrics