Tim77777767
commited on
Commit
·
c2cafb7
1
Parent(s):
4346c95
Anpassungen der COnfigs für b5 backbone
Browse files- config.json +3 -3
- mix_vision_transformer_config.py +10 -7
config.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"model_type": "my_segformer",
|
3 |
"in_channels": 3,
|
4 |
-
"embed_dims": 64,
|
5 |
"num_stages": 4,
|
6 |
-
"num_layers": [
|
7 |
"num_heads": [1, 2, 5, 8],
|
8 |
"patch_sizes": [7, 3, 3, 3],
|
9 |
"strides": [4, 2, 2, 2],
|
@@ -24,4 +24,4 @@
|
|
24 |
"align_corners": false,
|
25 |
"interpolate_mode": "bilinear"
|
26 |
}
|
27 |
-
}
|
|
|
1 |
{
|
2 |
"model_type": "my_segformer",
|
3 |
"in_channels": 3,
|
4 |
+
"embed_dims": [64, 128, 320, 512],
|
5 |
"num_stages": 4,
|
6 |
+
"num_layers": [3, 6, 40, 3],
|
7 |
"num_heads": [1, 2, 5, 8],
|
8 |
"patch_sizes": [7, 3, 3, 3],
|
9 |
"strides": [4, 2, 2, 2],
|
|
|
24 |
"align_corners": false,
|
25 |
"interpolate_mode": "bilinear"
|
26 |
}
|
27 |
+
}
|
mix_vision_transformer_config.py
CHANGED
@@ -6,9 +6,11 @@ class MySegformerConfig(PretrainedConfig):
|
|
6 |
def __init__(
|
7 |
self,
|
8 |
in_channels=3,
|
9 |
-
|
|
|
10 |
num_stages=4,
|
11 |
-
|
|
|
12 |
num_heads=[1, 2, 5, 8],
|
13 |
patch_sizes=[7, 3, 3, 3],
|
14 |
strides=[4, 2, 2, 2],
|
@@ -24,8 +26,8 @@ class MySegformerConfig(PretrainedConfig):
|
|
24 |
**kwargs
|
25 |
):
|
26 |
super().__init__(**kwargs)
|
27 |
-
self.embed_dims = embed_dims # einfach int behalten
|
28 |
self.in_channels = in_channels
|
|
|
29 |
self.num_stages = num_stages
|
30 |
self.num_layers = num_layers
|
31 |
self.num_heads = num_heads
|
@@ -40,11 +42,12 @@ class MySegformerConfig(PretrainedConfig):
|
|
40 |
self.out_indices = out_indices
|
41 |
self.num_classes = num_classes
|
42 |
|
43 |
-
#
|
44 |
if decode_head is None:
|
45 |
decode_head = {
|
46 |
-
|
47 |
-
"
|
|
|
48 |
"channels": 256,
|
49 |
"dropout_ratio": 0.1,
|
50 |
"out_channels": self.num_classes,
|
@@ -52,4 +55,4 @@ class MySegformerConfig(PretrainedConfig):
|
|
52 |
"interpolate_mode": "bilinear"
|
53 |
}
|
54 |
|
55 |
-
self.decode_head = decode_head
|
|
|
6 |
def __init__(
|
7 |
self,
|
8 |
in_channels=3,
|
9 |
+
# Corrected for SegFormer-B5: list of embedding dimensions for each stage
|
10 |
+
embed_dims=[64, 128, 320, 512],
|
11 |
num_stages=4,
|
12 |
+
# Corrected for SegFormer-B5: number of transformer layers in each stage
|
13 |
+
num_layers=[3, 6, 40, 3],
|
14 |
num_heads=[1, 2, 5, 8],
|
15 |
patch_sizes=[7, 3, 3, 3],
|
16 |
strides=[4, 2, 2, 2],
|
|
|
26 |
**kwargs
|
27 |
):
|
28 |
super().__init__(**kwargs)
|
|
|
29 |
self.in_channels = in_channels
|
30 |
+
self.embed_dims = embed_dims # This will now be a list, which is correct for SegFormer
|
31 |
self.num_stages = num_stages
|
32 |
self.num_layers = num_layers
|
33 |
self.num_heads = num_heads
|
|
|
42 |
self.out_indices = out_indices
|
43 |
self.num_classes = num_classes
|
44 |
|
45 |
+
# Optional block for Head-Config (if decode_head not passed)
|
46 |
if decode_head is None:
|
47 |
decode_head = {
|
48 |
+
# Corrected for SegFormer-B5: input channels for the decode head from each stage
|
49 |
+
"in_channels": [64, 128, 320, 512],
|
50 |
+
"in_index": list(range(self.num_stages)),
|
51 |
"channels": 256,
|
52 |
"dropout_ratio": 0.1,
|
53 |
"out_channels": self.num_classes,
|
|
|
55 |
"interpolate_mode": "bilinear"
|
56 |
}
|
57 |
|
58 |
+
self.decode_head = decode_head
|