Anpassungen für HF

Files changed (3) hide show

modeling_my_segformer.py CHANGED Viewed

@@ -120,3 +120,12 @@ class MySegformerForSemanticSegmentation(PreTrainedModel):
         )
         self.post_init()

         )
         self.post_init()
+    def forward(self, x):
+        # Backbone liefert eine Liste von Features (Multi-Scale Features)
+        features = self.backbone(x)  # z.B. List[Tensor]
+        # Übergabe an den Segmentation Head
+        output = self.segmentation_head(features)  # Tensor: logits oder Segmentationsmasken
+        return output

preTrainedTest.py CHANGED Viewed

@@ -1,13 +1,42 @@
 from modeling_my_segformer import MySegformerForSemanticSegmentation
 from mix_vision_transformer_config import MySegformerConfig
-# Der Pfad zu deinem HF-Repo (kann auch einfach als String benutzt werden)
-model_name_or_path = "TimM77/SegformerPlusPlus"
-# Config laden (automatisch aus config.json im Repo)
 config = MySegformerConfig.from_pretrained(model_name_or_path)
-# Modell laden (Gewichte aus pytorch_model.bin + Config)
 model = MySegformerForSemanticSegmentation.from_pretrained(model_name_or_path, config=config)
-print(model, config)

+import torch
+from PIL import Image
+import torchvision.transforms as T
+import numpy as np
+import os
 from modeling_my_segformer import MySegformerForSemanticSegmentation
 from mix_vision_transformer_config import MySegformerConfig
+# Gerät auswählen
+device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+print(f"Using device: {device}")
+# Modell laden
+model_name_or_path = "TimM77/SegformerPlusPlus"
 config = MySegformerConfig.from_pretrained(model_name_or_path)
 model = MySegformerForSemanticSegmentation.from_pretrained(model_name_or_path, config=config)
+model.to(device).eval()
+# Bild laden
+image_path = "segformer_plusplus/cityscape/berlin_000543_000019_leftImg8bit.png"
+image = Image.open(image_path).convert("RGB")
+# Preprocessing
+transform = T.Compose([
+    T.Resize((512, 512)),
+    T.ToTensor(),
+    T.Normalize(mean=[0.485, 0.456, 0.406],
+                std=[0.229, 0.224, 0.225])
+])
+input_tensor = transform(image).unsqueeze(0).to(device)
+# Inferenz
+with torch.no_grad():
+    output = model(input_tensor)
+    logits = output.logits if hasattr(output, "logits") else output
+    pred = torch.argmax(logits, dim=1).squeeze(0).cpu().numpy()
+# Ergebnis als Textdatei speichern
+output_path = os.path.join("segformer_plusplus", "cityscapes_prediction_output_overHF.txt")
+np.savetxt(output_path, pred, fmt="%d")
+print(f"Prediction saved as {output_path}")

segformer_plusplus/model/backbone/mit.py CHANGED Viewed

@@ -415,7 +415,7 @@ class MixVisionTransformer(BaseModule):
         cur = 0
         self.layers = ModuleList()
         for i, num_layer in enumerate(num_layers):
-            embed_dims_i = embed_dims[i]
             patch_embed = PatchEmbed(
                 in_channels=in_channels,
                 embed_dims=embed_dims_i,

         cur = 0
         self.layers = ModuleList()
         for i, num_layer in enumerate(num_layers):
+            embed_dims_i = embed_dims * num_heads[i]
             patch_embed = PatchEmbed(
                 in_channels=in_channels,
                 embed_dims=embed_dims_i,