import torch from PIL import Image import torchvision.transforms as T import numpy as np import os from modeling_my_segformer import MySegformerForSemanticSegmentation from mix_vision_transformer_config import MySegformerConfig # Gerät auswählen device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') print(f"Using device: {device}") # Modell laden model_name_or_path = "TimM77/SegformerPlusPlus" print("Starte config_load") config = MySegformerConfig.from_pretrained(model_name_or_path) print("Starte Model_load") model = MySegformerForSemanticSegmentation.from_pretrained(model_name_or_path, config=config) model.to(device).eval() # Bild laden image_path = "segformer_plusplus/cityscape/berlin_000543_000019_leftImg8bit.png" image = Image.open(image_path).convert("RGB") target_image_height = 1024 target_image_width = 1024 img_tensor_temp = T.ToTensor()(image) mean = img_tensor_temp.mean(dim=(1, 2)).tolist() std = img_tensor_temp.std(dim=(1, 2)).tolist() print(f"Calculated Mean (for this image): {mean}") print(f"Calculated Std (for this image): {std}") transform = T.Compose([ T.Resize((target_image_height, target_image_width)), # Resize to 1024x1024 T.ToTensor(), T.Normalize(mean=mean, std=std) # Use dynamically calculated mean/std ]) input_tensor = transform(image).unsqueeze(0).to(device) print("Modell geladen, Bild geladen, Preprocessing abgeschlossen") # Inferenz with torch.no_grad(): output = model(input_tensor) logits = output.logits if hasattr(output, "logits") else output pred = torch.argmax(logits, dim=1).squeeze(0).cpu().numpy() # Ergebnis als Textdatei speichern output_path = os.path.join("segformer_plusplus", "cityscapes_prediction_output_overHF.txt") np.savetxt(output_path, pred, fmt="%d") print(f"Prediction saved as {output_path}")