Tim77777767 commited on
Commit
1a260cd
·
1 Parent(s): 02508fb

Anpassungen für HF, Checkpoint umgewandelt, config.json angepasst

Browse files
config.json CHANGED
@@ -1,16 +1,57 @@
1
  {
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  "model_type": "my_segformer",
3
- "embed_dims": 64,
 
 
 
 
 
 
 
 
 
 
 
 
4
  "num_stages": 4,
5
- "num_layers": [3,4,6,3],
6
- "num_heads": [1,2,4,8],
7
- "patch_sizes": [7,3,3,3],
8
- "strides": [4,2,2,2],
9
- "sr_ratios": [8,4,2,1],
10
- "mlp_ratio": 4,
 
 
 
 
 
 
11
  "qkv_bias": true,
12
- "drop_rate": 0.0,
13
- "attn_drop_rate": 0.0,
14
- "drop_path_rate": 0.0,
15
- "out_indices": [0,1,2,3]
 
 
 
 
 
 
 
 
 
 
16
  }
 
1
  {
2
+ "architectures": [
3
+ "MySegformerForSemanticSegmentation"
4
+ ],
5
+ "attn_drop_rate": 0.0,
6
+ "drop_path_rate": 0.0,
7
+ "drop_rate": 0.0,
8
+ "embed_dims": [
9
+ 64,
10
+ 128,
11
+ 320,
12
+ 512
13
+ ],
14
+ "mlp_ratio": 4,
15
  "model_type": "my_segformer",
16
+ "num_classes": 19,
17
+ "num_heads": [
18
+ 1,
19
+ 2,
20
+ 4,
21
+ 8
22
+ ],
23
+ "num_layers": [
24
+ 3,
25
+ 4,
26
+ 6,
27
+ 3
28
+ ],
29
  "num_stages": 4,
30
+ "out_indices": [
31
+ 0,
32
+ 1,
33
+ 2,
34
+ 3
35
+ ],
36
+ "patch_sizes": [
37
+ 7,
38
+ 3,
39
+ 3,
40
+ 3
41
+ ],
42
  "qkv_bias": true,
43
+ "sr_ratios": [
44
+ 8,
45
+ 4,
46
+ 2,
47
+ 1
48
+ ],
49
+ "strides": [
50
+ 4,
51
+ 2,
52
+ 2,
53
+ 2
54
+ ],
55
+ "torch_dtype": "float32",
56
+ "transformers_version": "4.55.0"
57
  }
hf_segformer_converted/config.json ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "MySegformerForSemanticSegmentation"
4
+ ],
5
+ "attn_drop_rate": 0.0,
6
+ "drop_path_rate": 0.0,
7
+ "drop_rate": 0.0,
8
+ "embed_dims": [
9
+ 64,
10
+ 128,
11
+ 320,
12
+ 512
13
+ ],
14
+ "mlp_ratio": 4,
15
+ "model_type": "my_segformer",
16
+ "num_classes": 19,
17
+ "num_heads": [
18
+ 1,
19
+ 2,
20
+ 4,
21
+ 8
22
+ ],
23
+ "num_layers": [
24
+ 3,
25
+ 4,
26
+ 6,
27
+ 3
28
+ ],
29
+ "num_stages": 4,
30
+ "out_indices": [
31
+ 0,
32
+ 1,
33
+ 2,
34
+ 3
35
+ ],
36
+ "patch_sizes": [
37
+ 7,
38
+ 3,
39
+ 3,
40
+ 3
41
+ ],
42
+ "qkv_bias": true,
43
+ "sr_ratios": [
44
+ 8,
45
+ 4,
46
+ 2,
47
+ 1
48
+ ],
49
+ "strides": [
50
+ 4,
51
+ 2,
52
+ 2,
53
+ 2
54
+ ],
55
+ "torch_dtype": "float32",
56
+ "transformers_version": "4.55.0"
57
+ }
mix_vision_transformer_config.py CHANGED
@@ -5,7 +5,7 @@ class MySegformerConfig(PretrainedConfig):
5
 
6
  def __init__(
7
  self,
8
- embed_dims=64,
9
  num_stages=4,
10
  num_layers=[3, 4, 6, 3],
11
  num_heads=[1, 2, 4, 8],
@@ -21,6 +21,11 @@ class MySegformerConfig(PretrainedConfig):
21
  **kwargs
22
  ):
23
  super().__init__(**kwargs)
 
 
 
 
 
24
  self.embed_dims = embed_dims
25
  self.num_stages = num_stages
26
  self.num_layers = num_layers
@@ -34,3 +39,4 @@ class MySegformerConfig(PretrainedConfig):
34
  self.attn_drop_rate = attn_drop_rate
35
  self.drop_path_rate = drop_path_rate
36
  self.out_indices = out_indices
 
 
5
 
6
  def __init__(
7
  self,
8
+ embed_dims=[64, 128, 320, 512],
9
  num_stages=4,
10
  num_layers=[3, 4, 6, 3],
11
  num_heads=[1, 2, 4, 8],
 
21
  **kwargs
22
  ):
23
  super().__init__(**kwargs)
24
+
25
+ # Absicherung, falls embed_dims als int übergeben wird
26
+ if isinstance(embed_dims, int):
27
+ embed_dims = [embed_dims]
28
+
29
  self.embed_dims = embed_dims
30
  self.num_stages = num_stages
31
  self.num_layers = num_layers
 
39
  self.attn_drop_rate = attn_drop_rate
40
  self.drop_path_rate = drop_path_rate
41
  self.out_indices = out_indices
42
+
mmengineToHFCheckpoint.py ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from mix_vision_transformer_config import MySegformerConfig
3
+ from modeling_my_segformer import MySegformerForSemanticSegmentation
4
+
5
+ def convert_mmengine_checkpoint_to_hf(mm_checkpoint_path, hf_save_dir):
6
+ # 1. Lade mmengine checkpoint
7
+ mm_ckpt = torch.load(mm_checkpoint_path, map_location="cpu")
8
+ if 'state_dict' in mm_ckpt:
9
+ mm_state_dict = mm_ckpt['state_dict']
10
+ else:
11
+ mm_state_dict = mm_ckpt
12
+
13
+ # 2. Erstelle Config & Modell (achte darauf, dass Config-Parameter zum Checkpoint passen)
14
+ config = MySegformerConfig(
15
+ embed_dims=[64, 128, 320, 512], # <--- korrekte Liste mit 4 Werten
16
+ num_stages=4,
17
+ num_layers=[3, 4, 6, 3],
18
+ num_heads=[1, 2, 4, 8],
19
+ patch_sizes=[7, 3, 3, 3],
20
+ strides=[4, 2, 2, 2],
21
+ sr_ratios=[8, 4, 2, 1],
22
+ mlp_ratio=4,
23
+ qkv_bias=True,
24
+ drop_rate=0.0,
25
+ attn_drop_rate=0.0,
26
+ drop_path_rate=0.0,
27
+ out_indices=(0, 1, 2, 3),
28
+ num_classes=19
29
+ )
30
+
31
+ model = MySegformerForSemanticSegmentation(config)
32
+
33
+ # 3. Mappe mmengine Keys auf HF Keys
34
+ hf_state_dict = {}
35
+
36
+ for k, v in mm_state_dict.items():
37
+ new_k = k
38
+
39
+ # Falls "module." als Prefix da ist (DataParallel), entfernen
40
+ if new_k.startswith("module."):
41
+ new_k = new_k[len("module."):]
42
+
43
+ # Mapping von decode_head.* -> segmentation_head.*
44
+ if new_k.startswith("decode_head."):
45
+ new_k = new_k.replace("decode_head.", "segmentation_head.")
46
+
47
+ # BatchNorm-Namen vereinheitlichen
48
+ new_k = new_k.replace(".bn.", ".")
49
+
50
+ # Nur Keys übernehmen, die im HF-Modell existieren
51
+ if new_k not in model.state_dict():
52
+ print(f"⚠️ Ignoriere {new_k} (nicht im HF-Modell)")
53
+ continue
54
+
55
+ hf_state_dict[new_k] = v
56
+
57
+ # 4. Lade die Gewichte ins Modell
58
+ missing_keys, unexpected_keys = model.load_state_dict(hf_state_dict, strict=False)
59
+
60
+ print("Missing keys:", missing_keys)
61
+ print("Unexpected keys:", unexpected_keys)
62
+
63
+ # 5. Speichere das HF-kompatible Modell & Config
64
+ model.save_pretrained(hf_save_dir)
65
+ config.save_pretrained(hf_save_dir)
66
+
67
+ print(f"✅ Model und Config erfolgreich gespeichert in {hf_save_dir}")
68
+
69
+ # 5b. Auch als klassische .pth-Datei speichern
70
+ pth_path = hf_save_dir.rstrip("/") + ".pth"
71
+ torch.save(model.state_dict(), pth_path)
72
+ print(f"✅ Zusätzlich als .pth gespeichert unter {pth_path}")
73
+
74
+
75
+ if __name__ == "__main__":
76
+ mm_checkpoint_path = "./segformer-b5-bsm_hq.pth"
77
+ hf_save_dir = "hf_segformer_converted"
78
+
79
+ convert_mmengine_checkpoint_to_hf(mm_checkpoint_path, hf_save_dir)
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:818e1b51093355a5915fd844e68edc7af583b2a397272c84e6e12a670896bc23
3
+ size 98934820
modeling_my_segformer.py CHANGED
@@ -2,17 +2,17 @@ from transformers import PreTrainedModel
2
  import torch
3
  import torch.nn as nn
4
  from segformer_plusplus.utils import resize
5
- from segformer_plusplus.model.backbone.mit import MixVisionTransformer # deine Backbone-Importierung
6
- from mix_vision_transformer_config import MySegformerConfig # deine Config-Importierung
7
 
8
- # Head-Implementierung (etwas vereinfacht und angepasst)
9
  class SegformerHead(nn.Module):
10
  def __init__(self,
11
  in_channels=[64, 128, 256, 512], # anpassen je nach Backbone-Ausgabe!
12
  in_index=[0, 1, 2, 3],
13
  channels=256,
14
  dropout_ratio=0.1,
15
- out_channels=19, # Anzahl Klassen, anpassen!
16
  norm_cfg=None,
17
  align_corners=False,
18
  interpolate_mode='bilinear'):
@@ -26,6 +26,11 @@ class SegformerHead(nn.Module):
26
  self.align_corners = align_corners
27
  self.interpolate_mode = interpolate_mode
28
 
 
 
 
 
 
29
  self.act_cfg = dict(type='ReLU')
30
  self.conv_seg = nn.Conv2d(channels, out_channels, kernel_size=1)
31
  self.dropout = nn.Dropout2d(dropout_ratio) if dropout_ratio > 0 else None
@@ -43,6 +48,7 @@ class SegformerHead(nn.Module):
43
  out_channels=channels,
44
  kernel_size=1,
45
  stride=1,
 
46
  norm_cfg=norm_cfg,
47
  act_cfg=self.act_cfg))
48
 
@@ -50,6 +56,7 @@ class SegformerHead(nn.Module):
50
  in_channels=channels * num_inputs,
51
  out_channels=channels,
52
  kernel_size=1,
 
53
  norm_cfg=norm_cfg)
54
 
55
  def cls_seg(self, feat):
@@ -81,9 +88,9 @@ class MySegformerForSemanticSegmentation(PreTrainedModel):
81
  def __init__(self, config):
82
  super().__init__(config)
83
 
84
- # Backbone initialisieren mit Parametern aus Config
85
  self.backbone = MixVisionTransformer(
86
- embed_dims=config.embed_dims,
87
  num_stages=config.num_stages,
88
  num_layers=config.num_layers,
89
  num_heads=config.num_heads,
@@ -98,17 +105,18 @@ class MySegformerForSemanticSegmentation(PreTrainedModel):
98
  out_indices=config.out_indices
99
  )
100
 
101
- # Head initialisieren, out_channels aus config oder fix setzen
 
 
 
 
 
102
  self.segmentation_head = SegformerHead(
103
- in_channels=[64, 128, 256, 512], # <- Anpassen, je nachdem wie Backbone ausgibt!
 
104
  out_channels=config.num_classes if hasattr(config, 'num_classes') else 19,
105
  dropout_ratio=0.1,
106
  align_corners=False
107
  )
108
 
109
  self.post_init()
110
-
111
- def forward(self, x):
112
- features = self.backbone(x)
113
- segmentation_output = self.segmentation_head(features)
114
- return segmentation_output
 
2
  import torch
3
  import torch.nn as nn
4
  from segformer_plusplus.utils import resize
5
+ from segformer_plusplus.model.backbone.mit import MixVisionTransformer # Backbone-Import
6
+ from mix_vision_transformer_config import MySegformerConfig # Config-Import
7
 
8
+ # Head-Implementierung (vereinfacht)
9
  class SegformerHead(nn.Module):
10
  def __init__(self,
11
  in_channels=[64, 128, 256, 512], # anpassen je nach Backbone-Ausgabe!
12
  in_index=[0, 1, 2, 3],
13
  channels=256,
14
  dropout_ratio=0.1,
15
+ out_channels=19, # Anzahl Klassen anpassen!
16
  norm_cfg=None,
17
  align_corners=False,
18
  interpolate_mode='bilinear'):
 
26
  self.align_corners = align_corners
27
  self.interpolate_mode = interpolate_mode
28
 
29
+ print(f"in_channels: {self.in_channels}, type: {type(self.in_channels)}")
30
+ print(f"in_index: {self.in_index}, type: {type(self.in_index)}")
31
+ print(f"len(in_channels): {len(self.in_channels) if hasattr(self.in_channels, '__len__') else 'no len'}")
32
+ print(f"len(in_index): {len(self.in_index) if hasattr(self.in_index, '__len__') else 'no len'}")
33
+
34
  self.act_cfg = dict(type='ReLU')
35
  self.conv_seg = nn.Conv2d(channels, out_channels, kernel_size=1)
36
  self.dropout = nn.Dropout2d(dropout_ratio) if dropout_ratio > 0 else None
 
48
  out_channels=channels,
49
  kernel_size=1,
50
  stride=1,
51
+ bias=False,
52
  norm_cfg=norm_cfg,
53
  act_cfg=self.act_cfg))
54
 
 
56
  in_channels=channels * num_inputs,
57
  out_channels=channels,
58
  kernel_size=1,
59
+ bias=False,
60
  norm_cfg=norm_cfg)
61
 
62
  def cls_seg(self, feat):
 
88
  def __init__(self, config):
89
  super().__init__(config)
90
 
91
+ # Wichtig: die gesamte Liste übergeben, nicht nur das erste Element
92
  self.backbone = MixVisionTransformer(
93
+ embed_dims=config.embed_dims, # GANZE Liste, z.B. [64, 128, 320, 512]
94
  num_stages=config.num_stages,
95
  num_layers=config.num_layers,
96
  num_heads=config.num_heads,
 
105
  out_indices=config.out_indices
106
  )
107
 
108
+ # Sicherstellen, dass in_channels eine Liste ist
109
+ in_channels = config.embed_dims
110
+ if isinstance(in_channels, int):
111
+ in_channels = [in_channels]
112
+
113
+ print(f"config.embed_dims: {config.embed_dims}, type: {type(config.embed_dims)}")
114
  self.segmentation_head = SegformerHead(
115
+ in_channels=config.embed_dims, # z.B. [64, 128, 320, 512]
116
+ in_index=list(config.out_indices), # z.B. [0, 1, 2, 3]
117
  out_channels=config.num_classes if hasattr(config, 'num_classes') else 19,
118
  dropout_ratio=0.1,
119
  align_corners=False
120
  )
121
 
122
  self.post_init()
 
 
 
 
 
preTrainedTest.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from modeling_my_segformer import MySegformerForSemanticSegmentation
2
+ from mix_vision_transformer_config import MySegformerConfig
3
+
4
+ # Der Pfad zu deinem HF-Repo (kann auch einfach als String benutzt werden)
5
+ model_name_or_path = "TimM77/SegformerPlusPlus"
6
+
7
+ # Config laden (automatisch aus config.json im Repo)
8
+ config = MySegformerConfig.from_pretrained(model_name_or_path)
9
+
10
+ # Modell laden (Gewichte aus pytorch_model.bin + Config)
11
+ model = MySegformerForSemanticSegmentation.from_pretrained(model_name_or_path, config=config)
12
+
13
+ print(model, config)
segformer_plusplus/model/backbone/mit.py CHANGED
@@ -415,7 +415,7 @@ class MixVisionTransformer(BaseModule):
415
  cur = 0
416
  self.layers = ModuleList()
417
  for i, num_layer in enumerate(num_layers):
418
- embed_dims_i = embed_dims * num_heads[i]
419
  patch_embed = PatchEmbed(
420
  in_channels=in_channels,
421
  embed_dims=embed_dims_i,
 
415
  cur = 0
416
  self.layers = ModuleList()
417
  for i, num_layer in enumerate(num_layers):
418
+ embed_dims_i = embed_dims[i]
419
  patch_embed = PatchEmbed(
420
  in_channels=in_channels,
421
  embed_dims=embed_dims_i,