syscv-community
/

sam-hq-vit-base

@@ -108,123 +108,134 @@ plt.show()
 ## Complete Example with Visualization
-Here's a complete example showing how to use SAM-HQ with the image embedding workflow and how to visualize the results:
 ```python
-import torch
 import numpy as np
 import matplotlib.pyplot as plt
-from PIL import Image
-import requests
 from transformers import SamHQModel, SamHQProcessor
-# 1. Load model and processor
 device = "cuda" if torch.cuda.is_available() else "cpu"
 model = SamHQModel.from_pretrained("sushmanth/sam_hq_vit_b").to(device)
 processor = SamHQProcessor.from_pretrained("sushmanth/sam_hq_vit_b")
-# 2. Load and display image
 img_url = "https://raw.githubusercontent.com/SysCV/sam-hq/refs/heads/main/demo/input_imgs/example1.png"
 raw_image = Image.open(requests.get(img_url, stream=True).raw).convert("RGB")
-plt.figure(figsize=(10, 10))
 plt.imshow(raw_image)
-plt.axis('off')
-plt.show()
-# 3. Compute image embeddings
 inputs = processor(raw_image, return_tensors="pt").to(device)
 image_embeddings, intermediate_embeddings = model.get_image_embeddings(inputs["pixel_values"])
-# 4. Define bounding box and visualize it
-input_boxes = [[[306, 132, 925, 893]]]  # Define bounding box [x1, y1, x2, y2]
-# Helper function to display bounding box
-def show_box(box, ax):
-    x0, y0 = box[0], box[1]
-    w, h = box[2] - box[0], box[3] - box[1]
-    ax.add_patch(plt.Rectangle((x0, y0), w, h, edgecolor='green', facecolor=(0,0,0,0), lw=2))
-plt.figure(figsize=(10, 10))
-plt.imshow(raw_image)
-for box in input_boxes[0]:
-    show_box(box, plt.gca())
-plt.axis('on')
-plt.title("Input Image with Bounding Box")
-plt.show()
-# 5. Run inference with the bounding box
-# First update the inputs with the image embeddings
 inputs.pop("pixel_values", None)
 inputs.update({"image_embeddings": image_embeddings})
 inputs.update({"intermediate_embeddings": intermediate_embeddings})
-inputs.update({"input_boxes": torch.tensor(input_boxes).to(device)})
-# Run inference
 with torch.no_grad():
     outputs = model(**inputs)
-# 6. Post-process the masks
-masks = processor.image_processor.post_process_masks(
-    outputs.pred_masks.cpu(),
-    inputs["original_sizes"].cpu(),
-    inputs["reshaped_input_sizes"].cpu()
-)
 scores = outputs.iou_scores
-# 7. Visualize results
-# Helper function to show masks
-def show_mask(mask, ax, random_color=False):
-    if random_color:
-        color = np.concatenate([np.random.random(3), np.array([0.6])], axis=0)
-    else:
-        color = np.array([30/255, 144/255, 255/255, 0.6])
-    h, w = mask.shape[-2:]
-    mask_image = mask.reshape(h, w, 1) * color.reshape(1, 1, -1)
-    ax.imshow(mask_image)
-# Show all masks with scores
-if len(masks[0].shape) == 4:
-    masks_to_show = masks[0].squeeze()
-else:
-    masks_to_show = masks[0]
-if scores.shape[0] == 1:
-    scores_to_show = scores.squeeze()
-else:
-    scores_to_show = scores
-# Create a figure with subplots for each mask
-nb_predictions = scores_to_show.shape[-1]
-fig, axes = plt.subplots(1, nb_predictions, figsize=(15, 15))
-# Handle the case where there's only one mask
-if nb_predictions == 1:
-    axes = [axes]
-for i, (mask, score) in enumerate(zip(masks_to_show, scores_to_show)):
-    mask = mask.cpu().detach()
-    axes[i].imshow(np.array(raw_image))
-    show_mask(mask, axes[i])
-    axes[i].title.set_text(f"Mask {i+1}, Score: {score.item():.3f}")
-    axes[i].axis("off")
-plt.tight_layout()
-plt.show()
-# Show all masks overlaid on a single image
-fig, ax = plt.subplots(figsize=(10, 10))
-ax.imshow(np.array(raw_image))
-for i, (mask, score) in enumerate(zip(masks_to_show, scores_to_show)):
-    if len(mask.shape) > 2:
-        mask = mask.squeeze()
-    show_mask(mask, ax, random_color=True)
-ax.set_title("All Masks Overlaid")
-ax.axis("off")
-plt.tight_layout()
-plt.show()
 ```
-This example demonstrates the complete workflow of using SAM-HQ with the "sushmanth/sam_hq_vit_b" model. It computes image embeddings once and then uses them for inference with a bounding box prompt. The resulting masks are visualized both individually with their confidence scores and overlaid on a single image with different colors.
 # Citation
 ```

 ## Complete Example with Visualization
 ```python
 import numpy as np
 import matplotlib.pyplot as plt
+def show_mask(mask, ax, random_color=False):
+    if random_color:
+        color = np.concatenate([np.random.random(3), np.array([0.6])], axis=0)
+    else:
+        color = np.array([30/255, 144/255, 255/255, 0.6])
+    h, w = mask.shape[-2:]
+    mask_image = mask.reshape(h, w, 1) * color.reshape(1, 1, -1)
+    ax.imshow(mask_image)
+def show_box(box, ax):
+    x0, y0 = box[0], box[1]
+    w, h = box[2] - box[0], box[3] - box[1]
+    ax.add_patch(plt.Rectangle((x0, y0), w, h, edgecolor='green', facecolor=(0,0,0,0), lw=2))
+def show_boxes_on_image(raw_image, boxes):
+    plt.figure(figsize=(10,10))
+    plt.imshow(raw_image)
+    for box in boxes:
+      show_box(box, plt.gca())
+    plt.axis('on')
+    plt.show()
+def show_points_on_image(raw_image, input_points, input_labels=None):
+    plt.figure(figsize=(10,10))
+    plt.imshow(raw_image)
+    input_points = np.array(input_points)
+    if input_labels is None:
+      labels = np.ones_like(input_points[:, 0])
+    else:
+      labels = np.array(input_labels)
+    show_points(input_points, labels, plt.gca())
+    plt.axis('on')
+    plt.show()
+def show_points_and_boxes_on_image(raw_image, boxes, input_points, input_labels=None):
+    plt.figure(figsize=(10,10))
+    plt.imshow(raw_image)
+    input_points = np.array(input_points)
+    if input_labels is None:
+      labels = np.ones_like(input_points[:, 0])
+    else:
+      labels = np.array(input_labels)
+    show_points(input_points, labels, plt.gca())
+    for box in boxes:
+      show_box(box, plt.gca())
+    plt.axis('on')
+    plt.show()
+def show_points_and_boxes_on_image(raw_image, boxes, input_points, input_labels=None):
+    plt.figure(figsize=(10,10))
+    plt.imshow(raw_image)
+    input_points = np.array(input_points)
+    if input_labels is None:
+      labels = np.ones_like(input_points[:, 0])
+    else:
+      labels = np.array(input_labels)
+    show_points(input_points, labels, plt.gca())
+    for box in boxes:
+      show_box(box, plt.gca())
+    plt.axis('on')
+    plt.show()
+def show_points(coords, labels, ax, marker_size=375):
+    pos_points = coords[labels==1]
+    neg_points = coords[labels==0]
+    ax.scatter(pos_points[:, 0], pos_points[:, 1], color='green', marker='*', s=marker_size, edgecolor='white', linewidth=1.25)
+    ax.scatter(neg_points[:, 0], neg_points[:, 1], color='red', marker='*', s=marker_size, edgecolor='white', linewidth=1.25)
+def show_masks_on_image(raw_image, masks, scores):
+    if len(masks.shape) == 4:
+      masks = masks.squeeze()
+    if scores.shape[0] == 1:
+      scores = scores.squeeze()
+    nb_predictions = scores.shape[-1]
+    fig, axes = plt.subplots(1, nb_predictions, figsize=(15, 15))
+    for i, (mask, score) in enumerate(zip(masks, scores)):
+      mask = mask.cpu().detach()
+      axes[i].imshow(np.array(raw_image))
+      show_mask(mask, axes[i])
+      axes[i].title.set_text(f"Mask {i+1}, Score: {score.item():.3f}")
+      axes[i].axis("off")
+    plt.show()
+def show_masks_on_single_image(raw_image, masks, scores):
+    if len(masks.shape) == 4:
+        masks = masks.squeeze()
+    if scores.shape[0] == 1:
+        scores = scores.squeeze()
+    # Convert image to numpy array if it's not already
+    image_np = np.array(raw_image)
+    # Create a figure
+    fig, ax = plt.subplots(figsize=(8, 8))
+    ax.imshow(image_np)
+    # Overlay all masks on the same image
+    for i, (mask, score) in enumerate(zip(masks, scores)):
+        mask = mask.cpu().detach().numpy()  # Convert to NumPy
+        show_mask(mask, ax)  # Assuming `show_mask` properly overlays the mask
+    ax.set_title(f"Overlayed Masks with Scores")
+    ax.axis("off")
+    plt.show()
+import torch
 from transformers import SamHQModel, SamHQProcessor
 device = "cuda" if torch.cuda.is_available() else "cpu"
 model = SamHQModel.from_pretrained("sushmanth/sam_hq_vit_b").to(device)
 processor = SamHQProcessor.from_pretrained("sushmanth/sam_hq_vit_b")
+from PIL import Image
+import requests
 img_url = "https://raw.githubusercontent.com/SysCV/sam-hq/refs/heads/main/demo/input_imgs/example1.png"
 raw_image = Image.open(requests.get(img_url, stream=True).raw).convert("RGB")
 plt.imshow(raw_image)
 inputs = processor(raw_image, return_tensors="pt").to(device)
 image_embeddings, intermediate_embeddings = model.get_image_embeddings(inputs["pixel_values"])
+input_boxes = [[[306, 132, 925, 893]]]
+show_boxes_on_image(raw_image, input_boxes[0])
 inputs.pop("pixel_values", None)
 inputs.update({"image_embeddings": image_embeddings})
 inputs.update({"intermediate_embeddings": intermediate_embeddings})
 with torch.no_grad():
     outputs = model(**inputs)
+masks = processor.image_processor.post_process_masks(outputs.pred_masks.cpu(), inputs["original_sizes"].cpu(), inputs["reshaped_input_sizes"].cpu())
 scores = outputs.iou_scores
+show_masks_on_single_image(raw_image, masks[0], scores)
+show_masks_on_image(raw_image, masks[0], scores)
 ```
 # Citation
 ```