TimM77
/

SegformerPlusPlus

Model card Files Files and versions

Tim77777767 commited on 8 days ago

Commit

b7716fe

·

1 Parent(s): 762b1e6

Anpassungen preTrained

Files changed (1) hide show

preTrainedTest.py +0 -9

preTrainedTest.py CHANGED Viewed

@@ -23,15 +23,9 @@ model.to(device).eval()
 image_path = "segformer_plusplus/cityscape/berlin_000543_000019_leftImg8bit.png"
 image = Image.open(image_path).convert("RGB")
-# --- Start of changes to match cityscape_benchmark ---
-# Define the target image size as in cityscape_benchmark's default
-# cityscape_benchmark uses (3, 1024, 1024), so spatial size is 1024x1024
 target_image_height = 1024
 target_image_width = 1024
-# Calculate mean and std dynamically from the image as done in cityscape_benchmark
-# Note: This is usually done over the entire training dataset for consistent normalization
-# For a single image, this just normalizes to its own mean/std.
 img_tensor_temp = T.ToTensor()(image)
 mean = img_tensor_temp.mean(dim=(1, 2)).tolist()
 std = img_tensor_temp.std(dim=(1, 2)).tolist()
@@ -39,21 +33,18 @@ std = img_tensor_temp.std(dim=(1, 2)).tolist()
 print(f"Calculated Mean (for this image): {mean}")
 print(f"Calculated Std (for this image): {std}")
-# Preprocessing - Adjusted to match cityscape_benchmark's T.Resize and T.Normalize
 transform = T.Compose([
     T.Resize((target_image_height, target_image_width)), # Resize to 1024x1024
     T.ToTensor(),
     T.Normalize(mean=mean, std=std) # Use dynamically calculated mean/std
 ])
 input_tensor = transform(image).unsqueeze(0).to(device)
-# --- End of changes ---
 print("Modell geladen, Bild geladen, Preprocessing abgeschlossen")
 # Inferenz
 with torch.no_grad():
     output = model(input_tensor)
-    # This ensures you're always getting the raw logits if the model returns an object
     logits = output.logits if hasattr(output, "logits") else output
     pred = torch.argmax(logits, dim=1).squeeze(0).cpu().numpy()

 image_path = "segformer_plusplus/cityscape/berlin_000543_000019_leftImg8bit.png"
 image = Image.open(image_path).convert("RGB")
 target_image_height = 1024
 target_image_width = 1024
 img_tensor_temp = T.ToTensor()(image)
 mean = img_tensor_temp.mean(dim=(1, 2)).tolist()
 std = img_tensor_temp.std(dim=(1, 2)).tolist()
 print(f"Calculated Mean (for this image): {mean}")
 print(f"Calculated Std (for this image): {std}")
 transform = T.Compose([
     T.Resize((target_image_height, target_image_width)), # Resize to 1024x1024
     T.ToTensor(),
     T.Normalize(mean=mean, std=std) # Use dynamically calculated mean/std
 ])
 input_tensor = transform(image).unsqueeze(0).to(device)
 print("Modell geladen, Bild geladen, Preprocessing abgeschlossen")
 # Inferenz
 with torch.no_grad():
     output = model(input_tensor)
     logits = output.logits if hasattr(output, "logits") else output
     pred = torch.argmax(logits, dim=1).squeeze(0).cpu().numpy()