Tim77777767 commited on
Commit
c2cafb7
·
1 Parent(s): 4346c95

Anpassungen der COnfigs für b5 backbone

Browse files
Files changed (2) hide show
  1. config.json +3 -3
  2. mix_vision_transformer_config.py +10 -7
config.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "model_type": "my_segformer",
3
  "in_channels": 3,
4
- "embed_dims": 64,
5
  "num_stages": 4,
6
- "num_layers": [2, 2, 2, 2],
7
  "num_heads": [1, 2, 5, 8],
8
  "patch_sizes": [7, 3, 3, 3],
9
  "strides": [4, 2, 2, 2],
@@ -24,4 +24,4 @@
24
  "align_corners": false,
25
  "interpolate_mode": "bilinear"
26
  }
27
- }
 
1
  {
2
  "model_type": "my_segformer",
3
  "in_channels": 3,
4
+ "embed_dims": [64, 128, 320, 512],
5
  "num_stages": 4,
6
+ "num_layers": [3, 6, 40, 3],
7
  "num_heads": [1, 2, 5, 8],
8
  "patch_sizes": [7, 3, 3, 3],
9
  "strides": [4, 2, 2, 2],
 
24
  "align_corners": false,
25
  "interpolate_mode": "bilinear"
26
  }
27
+ }
mix_vision_transformer_config.py CHANGED
@@ -6,9 +6,11 @@ class MySegformerConfig(PretrainedConfig):
6
  def __init__(
7
  self,
8
  in_channels=3,
9
- embed_dims=[32, 64, 160, 256],
 
10
  num_stages=4,
11
- num_layers=[2, 2, 2, 2],
 
12
  num_heads=[1, 2, 5, 8],
13
  patch_sizes=[7, 3, 3, 3],
14
  strides=[4, 2, 2, 2],
@@ -24,8 +26,8 @@ class MySegformerConfig(PretrainedConfig):
24
  **kwargs
25
  ):
26
  super().__init__(**kwargs)
27
- self.embed_dims = embed_dims # einfach int behalten
28
  self.in_channels = in_channels
 
29
  self.num_stages = num_stages
30
  self.num_layers = num_layers
31
  self.num_heads = num_heads
@@ -40,11 +42,12 @@ class MySegformerConfig(PretrainedConfig):
40
  self.out_indices = out_indices
41
  self.num_classes = num_classes
42
 
43
- # optionaler Block für Head-Config (falls decode_head nicht übergeben wurde)
44
  if decode_head is None:
45
  decode_head = {
46
- "in_channels": [64, 128, 320, 512], # [32, 64, 160, 256]
47
- "in_index": list(range(self.num_stages)),# [0, 1, 2, 3]
 
48
  "channels": 256,
49
  "dropout_ratio": 0.1,
50
  "out_channels": self.num_classes,
@@ -52,4 +55,4 @@ class MySegformerConfig(PretrainedConfig):
52
  "interpolate_mode": "bilinear"
53
  }
54
 
55
- self.decode_head = decode_head
 
6
  def __init__(
7
  self,
8
  in_channels=3,
9
+ # Corrected for SegFormer-B5: list of embedding dimensions for each stage
10
+ embed_dims=[64, 128, 320, 512],
11
  num_stages=4,
12
+ # Corrected for SegFormer-B5: number of transformer layers in each stage
13
+ num_layers=[3, 6, 40, 3],
14
  num_heads=[1, 2, 5, 8],
15
  patch_sizes=[7, 3, 3, 3],
16
  strides=[4, 2, 2, 2],
 
26
  **kwargs
27
  ):
28
  super().__init__(**kwargs)
 
29
  self.in_channels = in_channels
30
+ self.embed_dims = embed_dims # This will now be a list, which is correct for SegFormer
31
  self.num_stages = num_stages
32
  self.num_layers = num_layers
33
  self.num_heads = num_heads
 
42
  self.out_indices = out_indices
43
  self.num_classes = num_classes
44
 
45
+ # Optional block for Head-Config (if decode_head not passed)
46
  if decode_head is None:
47
  decode_head = {
48
+ # Corrected for SegFormer-B5: input channels for the decode head from each stage
49
+ "in_channels": [64, 128, 320, 512],
50
+ "in_index": list(range(self.num_stages)),
51
  "channels": 256,
52
  "dropout_ratio": 0.1,
53
  "out_channels": self.num_classes,
 
55
  "interpolate_mode": "bilinear"
56
  }
57
 
58
+ self.decode_head = decode_head