Tim77777767 commited on Jul 14

Commit

85ebba9

1 Parent(s): e98bd8c

Re-added .bin with Git LFS, fixed tracking

Files changed (37) hide show

.gitattributes +3 -0
.gitignore +2 -0
build/lib/segformer_plusplus/__init__.py +0 -4
build/lib/segformer_plusplus/build_model.py +0 -108
build/lib/segformer_plusplus/configs/__init__.py +0 -1
build/lib/segformer_plusplus/configs/segformer_mit_b0.py +0 -28
build/lib/segformer_plusplus/configs/segformer_mit_b1.py +0 -8
build/lib/segformer_plusplus/configs/segformer_mit_b2.py +0 -6
build/lib/segformer_plusplus/configs/segformer_mit_b3.py +0 -6
build/lib/segformer_plusplus/configs/segformer_mit_b4.py +0 -6
build/lib/segformer_plusplus/configs/segformer_mit_b5.py +0 -6
build/lib/segformer_plusplus/model/__init__.py +0 -1
build/lib/segformer_plusplus/model/backbone/__init__.py +0 -3
build/lib/segformer_plusplus/model/backbone/mit.py +0 -479
build/lib/segformer_plusplus/model/head/__init__.py +0 -3
build/lib/segformer_plusplus/model/head/segformer_head.py +0 -95
build/lib/segformer_plusplus/random_benchmark.py +0 -61
build/lib/segformer_plusplus/utils/__init__.py +0 -12
build/lib/segformer_plusplus/utils/benchmark.py +0 -76
build/lib/segformer_plusplus/utils/embed.py +0 -330
build/lib/segformer_plusplus/utils/imagenet_weights.py +0 -8
build/lib/segformer_plusplus/utils/registry.py +0 -6
build/lib/segformer_plusplus/utils/shape_convert.py +0 -107
build/lib/segformer_plusplus/utils/tome_presets.py +0 -20
build/lib/segformer_plusplus/utils/wrappers.py +0 -51
segformer_plusplus.egg-info/SOURCES.txt +10 -0
segformer_plusplus.egg-info/requires.txt +3 -0
segformer_plusplus/cityscape/berlin_000543_000019_leftImg8bit.png +3 -0
segformer_plusplus/cityscape_benchmark.py +3 -11
cityscapes_prediction_output_reference.txt → segformer_plusplus/cityscapes_prediction_output.txt +0 -0
segformer_plusplus/cityscapes_prediction_output_reference_b05_nocheckpoint.txt +0 -0
segformer_plusplus/config.json +10 -0
segformer_plusplus/configs/config/utils.py +129 -2
segformer_plusplus/modeling_segformer_plusplus.py +69 -0
segformer_plusplus/pytorch_model.bin +3 -0
segformer_plusplus/start_cityscape_benchmark.py +6 -4
setup.py +12 -1

.gitattributes ADDED Viewed

	@@ -0,0 +1,3 @@

+*.bin filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.png filter=lfs diff=lfs merge=lfs -text

.gitignore CHANGED Viewed

@@ -10,3 +10,5 @@ __pycache__/
 .vscode/
 .idea/
 .DS_Store

 .vscode/
 .idea/
 .DS_Store
+build/
+venv/

build/lib/segformer_plusplus/__init__.py DELETED Viewed

@@ -1,4 +0,0 @@
-from .build_model import create_model, create_custom_model
-from .random_benchmark import random_benchmark
-__all__ = ['create_model', 'create_custom_model', 'random_benchmark']

build/lib/segformer_plusplus/build_model.py DELETED Viewed

@@ -1,108 +0,0 @@
-import os
-from mmengine import registry
-from mmengine.config import Config
-from mmengine.model import BaseModule
-from .utils import MODELS, imagenet_weights
-from .utils import tome_presets
-class SegFormer(BaseModule):
-    """
-    This class represents a SegFormer model that allows for the application of token merging.
-    Attributes:
-         backbone (BaseModule): MixVisionTransformer backbone
-         decode_head (BaseModule): SegFormer head
-    """
-    def __init__(self, cfg):
-        """
-        Initialize the SegFormer model.
-        Args:
-            cfg (Config): an mmengine Config object, which defines the backbone, head and token merging strategy used.
-        """
-        super().__init__()
-        self.backbone = registry.build_model_from_cfg(cfg.backbone, registry=MODELS)
-        self.decode_head = registry.build_model_from_cfg(cfg.decode_head, registry=MODELS)
-    def forward(self, x):
-        """
-        Forward pass of the model.
-        Args:
-            x (torch.Tensor): input tensor of shape [B, C, H, W]
-        Returns:
-            torch.Tensor: output tensor
-        """
-        x = self.backbone(x)
-        x = self.decode_head(x)
-        return x
-def create_model(
-        backbone: str = 'b0',
-        tome_strategy: str = None,
-        out_channels: int = 19,
-        pretrained: bool = False,
-):
-    """
-    Create a SegFormer model using the predefined SegFormer backbones from the MiT series (b0-b5).
-    Args:
-        backbone (str): backbone name (e.g. 'b0')
-        tome_strategy (str | list(dict)): select strategy from presets ('bsm_hq', 'bsm_fast', 'n2d_2x2') or define a
-            custom strategy using a list, that contains of dictionaries, in which the strategies for the stage are
-            defined
-        out_channels (int): number of output channels (e.g. 19 for the cityscapes semantic segmentation task)
-        pretrained: use pretrained (imagenet) weights
-    Returns:
-        BaseModule: SegFormer model
-    """
-    backbone = backbone.lower()
-    assert backbone in [f'b{i}' for i in range(6)]
-    wd = os.path.dirname(os.path.abspath(__file__))
-    cfg = Config.fromfile(os.path.join(wd, 'configs', f'segformer_mit_{backbone}.py'))
-    cfg.decode_head.out_channels = out_channels
-    if tome_strategy is not None:
-        if tome_strategy not in list(tome_presets.keys()):
-            print("Using custom merging strategy.")
-        cfg.backbone.tome_cfg = tome_presets[tome_strategy]
-    # load imagenet weights
-    if pretrained:
-        cfg.backbone.init_cfg = dict(type='Pretrained', checkpoint=imagenet_weights[backbone])
-    return SegFormer(cfg)
-def create_custom_model(
-        model_cfg: Config,
-        tome_strategy: list[dict] = None,
-):
-    """
-    Create a SegFormer model with customizable backbone and head.
-    Args:
-        model_cfg (Config): backbone name (e.g. 'b0')
-        tome_strategy (list(dict)): custom token merging strategy
-    Returns:
-        BaseModule: SegFormer model
-    """
-    if tome_strategy is not None:
-        model_cfg.backbone.tome_cfg = tome_strategy
-    return SegFormer(model_cfg)

build/lib/segformer_plusplus/configs/__init__.py DELETED Viewed

	@@ -1 +0,0 @@
1	- __all__ = []

build/lib/segformer_plusplus/configs/segformer_mit_b0.py DELETED Viewed

@@ -1,28 +0,0 @@
-norm_cfg = dict(type='SyncBN', requires_grad=True)
-backbone = dict(
-    type='MixVisionTransformer',
-    in_channels=3,
-    embed_dims=32,
-    num_stages=4,
-    num_layers=[2, 2, 2, 2],
-    num_heads=[1, 2, 5, 8],
-    patch_sizes=[7, 3, 3, 3],
-    sr_ratios=[8, 4, 2, 1],
-    out_indices=(0, 1, 2, 3),
-    mlp_ratio=4,
-    qkv_bias=True,
-    drop_rate=0.0,
-    attn_drop_rate=0.0,
-    drop_path_rate=0.1
-)
-decode_head = dict(
-    type='SegformerHead',
-    in_channels=[32, 64, 160, 256],
-    in_index=[0, 1, 2, 3],
-    channels=256,
-    dropout_ratio=0.1,
-    out_channels=19,
-    norm_cfg=norm_cfg,
-    align_corners=False,
-    interpolate_mode='bilinear'
-)

build/lib/segformer_plusplus/configs/segformer_mit_b1.py DELETED Viewed

@@ -1,8 +0,0 @@
-_base_ = ['./segformer_mit_b0.py']
-backbone = dict(
-        embed_dims=64,
-)
-decode_head = dict(
-    in_channels=[64, 128, 320, 512]
-)

build/lib/segformer_plusplus/configs/segformer_mit_b2.py DELETED Viewed

@@ -1,6 +0,0 @@
-_base_ = ['./segformer_mit_b1.py']
-backbone = dict(
-        embed_dims=64,
-        num_layers=[3, 4, 6, 3]
-)

build/lib/segformer_plusplus/configs/segformer_mit_b3.py DELETED Viewed

@@ -1,6 +0,0 @@
-_base_ = ['./segformer_mit_b1.py']
-backbone = dict(
-        embed_dims=64,
-        num_layers=[3, 4, 18, 3]
-)

build/lib/segformer_plusplus/configs/segformer_mit_b4.py DELETED Viewed

@@ -1,6 +0,0 @@
-_base_ = ['./segformer_mit_b1.py']
-backbone = dict(
-        embed_dims=64,
-        num_layers=[3, 8, 27, 3]
-)

build/lib/segformer_plusplus/configs/segformer_mit_b5.py DELETED Viewed

@@ -1,6 +0,0 @@
-_base_ = ['./segformer_mit_b1.py']
-backbone = dict(
-        embed_dims=64,
-        num_layers=[3, 6, 40, 3]
-)

build/lib/segformer_plusplus/model/__init__.py DELETED Viewed

	@@ -1 +0,0 @@
1	- __all__ = []

build/lib/segformer_plusplus/model/backbone/__init__.py DELETED Viewed

@@ -1,3 +0,0 @@
-from .mit import MixVisionTransformer
-__all__ = ['MixVisionTransformer']

build/lib/segformer_plusplus/model/backbone/mit.py DELETED Viewed

@@ -1,479 +0,0 @@
-# Copyright (c) OpenMMLab. All rights reserved.
-import math
-import torch
-import torch.nn as nn
-import torch.utils.checkpoint as cp
-from mmcv.cnn import Conv2d, build_activation_layer, build_norm_layer
-from mmcv.cnn.bricks.drop import build_dropout
-from mmcv.cnn.bricks.transformer import MultiheadAttention
-from mmengine.model import BaseModule, ModuleList, Sequential
-from mmengine.model.weight_init import (constant_init, normal_init,
-                                        trunc_normal_init)
-from tomesd.merge import bipartite_soft_matching_random2d
-from ...utils import PatchEmbed
-from ...utils import nchw_to_nlc, nlc_to_nchw
-from ...utils import MODELS
-class MixFFN(BaseModule):
-    """An implementation of MixFFN of Segformer.
-    The differences between MixFFN & FFN:
-        1. Use 1X1 Conv to replace Linear layer.
-        2. Introduce 3X3 Conv to encode positional information.
-    Args:
-        embed_dims (int): The feature dimension. Same as
-            `MultiheadAttention`. Defaults: 256.
-        feedforward_channels (int): The hidden dimension of FFNs.
-            Defaults: 1024.
-        act_cfg (dict, optional): The activation config for FFNs.
-            Default: dict(type='ReLU')
-        ffn_drop (float, optional): Probability of an element to be
-            zeroed in FFN. Default 0.0.
-        dropout_layer (obj:`ConfigDict`): The dropout_layer used
-            when adding the shortcut.
-        init_cfg (obj:`mmcv.ConfigDict`): The Config for initialization.
-            Default: None.
-    """
-    def __init__(self,
-                 embed_dims,
-                 feedforward_channels,
-                 act_cfg=dict(type='GELU'),
-                 ffn_drop=0.,
-                 dropout_layer=None,
-                 init_cfg=None):
-        super().__init__(init_cfg)
-        self.embed_dims = embed_dims
-        self.feedforward_channels = feedforward_channels
-        self.act_cfg = act_cfg
-        self.activate = build_activation_layer(act_cfg)
-        in_channels = embed_dims
-        fc1 = Conv2d(
-            in_channels=in_channels,
-            out_channels=feedforward_channels,
-            kernel_size=1,
-            stride=1,
-            bias=True)
-        # 3x3 depth wise conv to provide positional encode information
-        pe_conv = Conv2d(
-            in_channels=feedforward_channels,
-            out_channels=feedforward_channels,
-            kernel_size=3,
-            stride=1,
-            padding=(3 - 1) // 2,
-            bias=True,
-            groups=feedforward_channels)
-        fc2 = Conv2d(
-            in_channels=feedforward_channels,
-            out_channels=in_channels,
-            kernel_size=1,
-            stride=1,
-            bias=True)
-        drop = nn.Dropout(ffn_drop)
-        layers = [fc1, pe_conv, self.activate, drop, fc2, drop]
-        self.layers = Sequential(*layers)
-        self.dropout_layer = build_dropout(
-            dropout_layer) if dropout_layer else torch.nn.Identity()
-    def forward(self, x, hw_shape, identity=None):
-        out = nlc_to_nchw(x, hw_shape)
-        out = self.layers(out)
-        out = nchw_to_nlc(out)
-        if identity is None:
-            identity = x
-        return identity + self.dropout_layer(out)
-class EfficientMultiheadAttention(MultiheadAttention):
-    """An implementation of Efficient Multi-head Attention of Segformer.
-    This module is modified from MultiheadAttention which is a module from
-    mmcv.cnn.bricks.transformer.
-    Args:
-        embed_dims (int): The embedding dimension.
-        num_heads (int): Parallel attention heads.
-        attn_drop (float): A Dropout layer on attn_output_weights.
-            Default: 0.0.
-        proj_drop (float): A Dropout layer after `nn.MultiheadAttention`.
-            Default: 0.0.
-        dropout_layer (obj:`ConfigDict`): The dropout_layer used
-            when adding the shortcut. Default: None.
-        init_cfg (obj:`mmcv.ConfigDict`): The Config for initialization.
-            Default: None.
-        batch_first (bool): Key, Query and Value are shape of
-            (batch, n, embed_dim)
-            or (n, batch, embed_dim). Default: False.
-        qkv_bias (bool): enable bias for qkv if True. Default True.
-        norm_cfg (dict): Config dict for normalization layer.
-            Default: dict(type='LN').
-        sr_ratio (int): The ratio of spatial reduction of Efficient Multi-head
-            Attention of Segformer. Default: 1.
-    """
-    def __init__(self,
-                 embed_dims,
-                 num_heads,
-                 attn_drop=0.,
-                 proj_drop=0.,
-                 dropout_layer=None,
-                 init_cfg=None,
-                 batch_first=True,
-                 qkv_bias=False,
-                 tome_cfg=dict(),
-                 norm_cfg=dict(type='LN'),
-                 sr_ratio=1):
-        super().__init__(
-            embed_dims,
-            num_heads,
-            attn_drop,
-            proj_drop,
-            dropout_layer=dropout_layer,
-            init_cfg=init_cfg,
-            batch_first=batch_first,
-            bias=qkv_bias)
-        self.q_mode = tome_cfg.get('q_mode')
-        self.kv_mode = tome_cfg.get('kv_mode')
-        self.tome_cfg = tome_cfg
-        self.sr_ratio = sr_ratio
-        if sr_ratio > 1:
-            self.sr = Conv2d(
-                in_channels=embed_dims,
-                out_channels=embed_dims,
-                kernel_size=sr_ratio,
-                stride=sr_ratio)
-            # The ret[0] of build_norm_layer is norm name.
-            self.norm = build_norm_layer(norm_cfg, embed_dims)[1]
-    def forward(self, x, hw_shape, identity=None):
-        x_q = x
-        if self.sr_ratio > 1:
-            x_kv = nlc_to_nchw(x, hw_shape)
-            x_kv = self.sr(x_kv)
-            x_kv = nchw_to_nlc(x_kv)
-            x_kv = self.norm(x_kv)
-        else:
-            x_kv = x
-        # 2D Neighbour Merging KV
-        if self.kv_mode == 'n2d':
-            kv_hw_shape = (int(hw_shape[0] / self.sr_ratio), int(hw_shape[1] / self.sr_ratio))
-            x_kv = nlc_to_nchw(x_kv, kv_hw_shape)
-            x_kv = torch.nn.functional.avg_pool2d(x_kv, kernel_size=self.tome_cfg['kv_s'],
-                                                  stride=self.tome_cfg['kv_s'],
-                                                  ceil_mode=True)
-            x_kv = nchw_to_nlc(x_kv)
-        # Bipartite Soft Matching (tomesd) KV
-        if self.kv_mode == 'bsm':
-            w_kv = int(hw_shape[1] / self.sr_ratio)
-            h_kv = int(hw_shape[0] / self.sr_ratio)
-            merge, unmerge = bipartite_soft_matching_random2d(metric=x_kv, w=w_kv, h=h_kv,
-                                                              r=int(x_kv.size()[1] * self.tome_cfg['kv_r']),
-                                                              sx=self.tome_cfg['kv_sx'], sy=self.tome_cfg['kv_sy'],
-                                                              no_rand=True)
-            x_kv = merge(x_kv)
-        if identity is None:
-            identity = x_q
-        # 1D Neighbor Merging Q
-        if self.q_mode == 'n1d':
-            x_q = x_q.transpose(-2, -1)
-            x_q = torch.nn.functional.avg_pool1d(x_q, kernel_size=self.tome_cfg['q_s'],
-                                                 stride=self.tome_cfg['q_s'],
-                                                 ceil_mode=True)
-            x_q = x_q.transpose(-2, -1)
-        # 2D Neighbor Merging Q
-        if self.q_mode == 'n2d':
-            reduced_hw = (int(torch.ceil(torch.tensor(hw_shape[0] / self.tome_cfg['q_s'][0]))),
-                          int(torch.ceil(torch.tensor(hw_shape[1] / self.tome_cfg['q_s'][1]))))
-            x_q = nlc_to_nchw(x_q, hw_shape)
-            x_q = torch.nn.functional.avg_pool2d(x_q, kernel_size=self.tome_cfg['q_s'],
-                                                 stride=self.tome_cfg['q_s'],
-                                                 ceil_mode=True)
-            x_q = nchw_to_nlc(x_q)
-        # Bipartite Soft Matching (tomesd) Q
-        if self.q_mode == 'bsm':
-            merge, unmerge = bipartite_soft_matching_random2d(metric=x_q, w=hw_shape[1], h=hw_shape[0],
-                                                              r=int(x_q.size()[1] * self.tome_cfg['q_r']),
-                                                              sx=self.tome_cfg['q_sx'], sy=self.tome_cfg['q_sy'],
-                                                              no_rand=True)
-            x_q = merge(x_q)
-        # Because the dataflow('key', 'query', 'value') of
-        # ``torch.nn.MultiheadAttention`` is (num_query, batch,
-        # embed_dims), We should adjust the shape of dataflow from
-        # batch_first (batch, num_query, embed_dims) to num_query_first
-        # (num_query ,batch, embed_dims), and recover ``attn_output``
-        # from num_query_first to batch_first.
-        if self.batch_first:
-            x_q = x_q.transpose(0, 1)
-            x_kv = x_kv.transpose(0, 1)
-        out = self.attn(query=x_q, key=x_kv, value=x_kv)[0]
-        if self.batch_first:
-            out = out.transpose(0, 1)
-        # Unmerging BSM (tome+tomesd)
-        if self.q_mode == 'bsm':
-            out = unmerge(out)
-        # Unmerging 1D Neighbour Merging
-        if self.q_mode == 'n1d':
-            out = out.transpose(-2, -1)
-            out = torch.nn.functional.interpolate(out, size=identity.size()[-2])
-            out = out.transpose(-2, -1)
-        # Unmerging 2D Neighbor Merging
-        if self.q_mode == 'n2d':
-            out = nlc_to_nchw(out, reduced_hw)
-            out = torch.nn.functional.interpolate(out, size=hw_shape)
-            out = nchw_to_nlc(out)
-        return identity + self.dropout_layer(self.proj_drop(out))
-class TransformerEncoderLayer(BaseModule):
-    """Implements one encoder layer in Segformer.
-    Args:
-        embed_dims (int): The feature dimension.
-        num_heads (int): Parallel attention heads.
-        feedforward_channels (int): The hidden dimension for FFNs.
-        drop_rate (float): Probability of an element to be zeroed.
-            after the feed forward layer. Default 0.0.
-        attn_drop_rate (float): The drop out rate for attention layer.
-            Default 0.0.
-        drop_path_rate (float): stochastic depth rate. Default 0.0.
-        qkv_bias (bool): enable bias for qkv if True.
-            Default: True.
-        act_cfg (dict): The activation config for FFNs.
-            Default: dict(type='GELU').
-        norm_cfg (dict): Config dict for normalization layer.
-            Default: dict(type='LN').
-        batch_first (bool): Key, Query and Value are shape of
-            (batch, n, embed_dim)
-            or (n, batch, embed_dim). Default: False.
-        init_cfg (dict, optional): Initialization config dict.
-            Default:None.
-        sr_ratio (int): The ratio of spatial reduction of Efficient Multi-head
-            Attention of Segformer. Default: 1.
-        with_cp (bool): Use checkpoint or not. Using checkpoint will save
-            some memory while slowing down the training speed. Default: False.
-    """
-    def __init__(self,
-                 embed_dims,
-                 num_heads,
-                 feedforward_channels,
-                 drop_rate=0.,
-                 attn_drop_rate=0.,
-                 drop_path_rate=0.,
-                 qkv_bias=True,
-                 tome_cfg=dict(),
-                 act_cfg=dict(type='GELU'),
-                 norm_cfg=dict(type='LN'),
-                 batch_first=True,
-                 sr_ratio=1,
-                 with_cp=False):
-        super().__init__()
-        # The ret[0] of build_norm_layer is norm name.
-        self.norm1 = build_norm_layer(norm_cfg, embed_dims)[1]
-        self.attn = EfficientMultiheadAttention(
-            embed_dims=embed_dims,
-            num_heads=num_heads,
-            attn_drop=attn_drop_rate,
-            proj_drop=drop_rate,
-            dropout_layer=dict(type='DropPath', drop_prob=drop_path_rate),
-            batch_first=batch_first,
-            qkv_bias=qkv_bias,
-            tome_cfg=tome_cfg,
-            norm_cfg=norm_cfg,
-            sr_ratio=sr_ratio)
-        # The ret[0] of build_norm_layer is norm name.
-        self.norm2 = build_norm_layer(norm_cfg, embed_dims)[1]
-        self.ffn = MixFFN(
-            embed_dims=embed_dims,
-            feedforward_channels=feedforward_channels,
-            ffn_drop=drop_rate,
-            dropout_layer=dict(type='DropPath', drop_prob=drop_path_rate),
-            act_cfg=act_cfg)
-        self.with_cp = with_cp
-    def forward(self, x, hw_shape):
-        def _inner_forward(x):
-            x = self.attn(self.norm1(x), hw_shape, identity=x)
-            x = self.ffn(self.norm2(x), hw_shape, identity=x)
-            return x
-        if self.with_cp and x.requires_grad:
-            x = cp.checkpoint(_inner_forward, x)
-        else:
-            x = _inner_forward(x)
-        return x
-@MODELS.register_module()
-class MixVisionTransformer(BaseModule):
-    """The backbone of Segformer.
-    This backbone is the implementation of `SegFormer: Simple and
-    Efficient Design for Semantic Segmentation with
-    Transformers <https://arxiv.org/abs/2105.15203>`_.
-    Args:
-        in_channels (int): Number of input channels. Default: 3.
-        embed_dims (int): Embedding dimension. Default: 768.
-        num_stags (int): The num of stages. Default: 4.
-        num_layers (Sequence[int]): The layer number of each transformer encode
-            layer. Default: [3, 4, 6, 3].
-        num_heads (Sequence[int]): The attention heads of each transformer
-            encode layer. Default: [1, 2, 4, 8].
-        patch_sizes (Sequence[int]): The patch_size of each overlapped patch
-            embedding. Default: [7, 3, 3, 3].
-        strides (Sequence[int]): The stride of each overlapped patch embedding.
-            Default: [4, 2, 2, 2].
-        sr_ratios (Sequence[int]): The spatial reduction rate of each
-            transformer encode layer. Default: [8, 4, 2, 1].
-        out_indices (Sequence[int] | int): Output from which stages.
-            Default: (0, 1, 2, 3).
-        mlp_ratio (int): ratio of mlp hidden dim to embedding dim.
-            Default: 4.
-        qkv_bias (bool): Enable bias for qkv if True. Default: True.
-        drop_rate (float): Probability of an element to be zeroed.
-            Default 0.0
-        attn_drop_rate (float): The drop out rate for attention layer.
-            Default 0.0
-        drop_path_rate (float): stochastic depth rate. Default 0.0
-        norm_cfg (dict): Config dict for normalization layer.
-            Default: dict(type='LN')
-        act_cfg (dict): The activation config for FFNs.
-            Default: dict(type='GELU').
-        pretrained (str, optional): model pretrained path. Default: None.
-        init_cfg (dict or list[dict], optional): Initialization config dict.
-            Default: None.
-        with_cp (bool): Use checkpoint or not. Using checkpoint will save
-            some memory while slowing down the training speed. Default: False.
-    """
-    def __init__(self,
-                 in_channels=3,
-                 embed_dims=64,
-                 num_stages=4,
-                 num_layers=[3, 4, 6, 3],
-                 num_heads=[1, 2, 4, 8],
-                 patch_sizes=[7, 3, 3, 3],
-                 strides=[4, 2, 2, 2],
-                 sr_ratios=[8, 4, 2, 1],
-                 out_indices=(0, 1, 2, 3),
-                 mlp_ratio=4,
-                 qkv_bias=True,
-                 drop_rate=0.,
-                 attn_drop_rate=0.,
-                 drop_path_rate=0.,
-                 tome_cfg=[dict(), dict(), dict(), dict()],
-                 act_cfg=dict(type='GELU'),
-                 norm_cfg=dict(type='LN', eps=1e-6),
-                 init_cfg=None,
-                 with_cp=False,
-                 down_sample=False):
-        super().__init__(init_cfg=init_cfg)
-        self.embed_dims = embed_dims
-        self.num_stages = num_stages
-        self.num_layers = num_layers
-        self.num_heads = num_heads
-        self.patch_sizes = patch_sizes
-        self.strides = strides
-        self.sr_ratios = sr_ratios
-        self.with_cp = with_cp
-        self.down_sample = down_sample
-        assert num_stages == len(num_layers) == len(num_heads) \
-               == len(patch_sizes) == len(strides) == len(sr_ratios)
-        self.out_indices = out_indices
-        assert max(out_indices) < self.num_stages
-        # transformer encoder
-        dpr = [
-            x.item()
-            for x in torch.linspace(0, drop_path_rate, sum(num_layers))
-        ]  # stochastic num_layer decay rule
-        cur = 0
-        self.layers = ModuleList()
-        for i, num_layer in enumerate(num_layers):
-            embed_dims_i = embed_dims * num_heads[i]
-            patch_embed = PatchEmbed(
-                in_channels=in_channels,
-                embed_dims=embed_dims_i,
-                kernel_size=patch_sizes[i],
-                stride=strides[i],
-                padding=patch_sizes[i] // 2,
-                norm_cfg=norm_cfg)
-            layer = ModuleList([
-                TransformerEncoderLayer(
-                    embed_dims=embed_dims_i,
-                    num_heads=num_heads[i],
-                    feedforward_channels=mlp_ratio * embed_dims_i,
-                    drop_rate=drop_rate,
-                    attn_drop_rate=attn_drop_rate,
-                    drop_path_rate=dpr[cur + idx],
-                    qkv_bias=qkv_bias,
-                    tome_cfg=tome_cfg[i],
-                    act_cfg=act_cfg,
-                    norm_cfg=norm_cfg,
-                    with_cp=with_cp,
-                    sr_ratio=sr_ratios[i]) for idx in range(num_layer)
-            ])
-            in_channels = embed_dims_i
-            # The ret[0] of build_norm_layer is norm name.
-            norm = build_norm_layer(norm_cfg, embed_dims_i)[1]
-            self.layers.append(ModuleList([patch_embed, layer, norm]))
-            cur += num_layer
-    def init_weights(self):
-        if self.init_cfg is None:
-            for m in self.modules():
-                if isinstance(m, nn.Linear):
-                    trunc_normal_init(m, std=.02, bias=0.)
-                elif isinstance(m, nn.LayerNorm):
-                    constant_init(m, val=1.0, bias=0.)
-                elif isinstance(m, nn.Conv2d):
-                    fan_out = m.kernel_size[0] * m.kernel_size[
-                        1] * m.out_channels
-                    fan_out //= m.groups
-                    normal_init(
-                        m, mean=0, std=math.sqrt(2.0 / fan_out), bias=0)
-        else:
-            super().init_weights()
-    def forward(self, x):
-        if self.down_sample:
-            x = torch.nn.functional.interpolate(x, scale_factor=(0.5, 0.5))
-        outs = []
-        for i, layer in enumerate(self.layers):
-            x, hw_shape = layer[0](x)
-            for block in layer[1]:
-                x = block(x, hw_shape)
-            x = layer[2](x)
-            x = nlc_to_nchw(x, hw_shape)
-            if i in self.out_indices:
-                outs.append(x)
-        return outs

build/lib/segformer_plusplus/model/head/__init__.py DELETED Viewed

@@ -1,3 +0,0 @@
-from .segformer_head import SegformerHead
-__all__ = ['SegformerHead']

build/lib/segformer_plusplus/model/head/segformer_head.py DELETED Viewed

@@ -1,95 +0,0 @@
-# Copyright (c) OpenMMLab. All rights reserved.
-import torch
-import torch.nn as nn
-from mmcv.cnn import ConvModule
-from mmengine.model import BaseModule
-from ...utils import MODELS
-from ...utils import resize
-@MODELS.register_module()
-class SegformerHead(BaseModule):
-    """The all mlp Head of segformer.
-    This head is the implementation of
-    `Segformer <https://arxiv.org/abs/2105.15203>` _.
-    Args:
-        interpolate_mode: The interpolate mode of MLP head upsample operation.
-            Default: 'bilinear'.
-    """
-    def __init__(self,
-                 in_channels=[32, 64, 160, 256],
-                 in_index=[0, 1, 2, 3],
-                 channels=256,
-                 dropout_ratio=0.1,
-                 out_channels=19,
-                 norm_cfg=None,
-                 align_corners=False,
-                 interpolate_mode='bilinear'):
-        super().__init__()
-        self.in_channels = in_channels
-        self.in_index = in_index
-        self.channels = channels
-        self.dropout_ratio = dropout_ratio
-        self.out_channels = out_channels
-        self.norm_cfg = norm_cfg
-        self.align_corners = align_corners
-        self.interpolate_mode = interpolate_mode
-        self.act_cfg = dict(type='ReLU')
-        self.conv_seg = nn.Conv2d(channels, self.out_channels, kernel_size=1)
-        if dropout_ratio > 0:
-            self.dropout = nn.Dropout2d(dropout_ratio)
-        else:
-            self.dropout = None
-        num_inputs = len(self.in_channels)
-        assert num_inputs == len(self.in_index)
-        self.convs = nn.ModuleList()
-        for i in range(num_inputs):
-            self.convs.append(
-                ConvModule(
-                    in_channels=self.in_channels[i],
-                    out_channels=self.channels,
-                    kernel_size=1,
-                    stride=1,
-                    norm_cfg=self.norm_cfg,
-                    act_cfg=self.act_cfg))
-        self.fusion_conv = ConvModule(
-            in_channels=self.channels * num_inputs,
-            out_channels=self.channels,
-            kernel_size=1,
-            norm_cfg=self.norm_cfg)
-    def cls_seg(self, feat):
-        """Classify each pixel."""
-        if self.dropout is not None:
-            feat = self.dropout(feat)
-        output = self.conv_seg(feat)
-        return output
-    def forward(self, inputs):
-        # Receive 4 stage backbone feature map: 1/4, 1/8, 1/16, 1/32
-        outs = []
-        for idx in range(len(inputs)):
-            x = inputs[idx]
-            conv = self.convs[idx]
-            outs.append(
-                resize(
-                    input=conv(x),
-                    size=inputs[0].shape[2:],
-                    mode=self.interpolate_mode,
-                    align_corners=self.align_corners))
-        out = self.fusion_conv(torch.cat(outs, dim=1))
-        out = self.cls_seg(out)
-        return out

build/lib/segformer_plusplus/random_benchmark.py DELETED Viewed

@@ -1,61 +0,0 @@
-from typing import Union, List, Tuple
-import numpy as np
-import torch
-from .utils import benchmark
-device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
-def random_benchmark(
-        model: torch.nn.Module,
-        batch_size: Union[int, List[int]] = 1,
-        image_size: Union[Tuple[int], List[Tuple[int]]] = (3, 1024, 1024),
-):
-    """
-    Calculate the FPS of a given model using randomly generated tensors.
-    Args:
-        model: instance of a model (e.g. SegFormer)
-        batch_size: the batch size(s) at which to calculate the FPS (e.g. 1 or [1, 2, 4])
-        image_size: the size of the images to use (e.g. (3, 1024, 1024))
-    Returns: the FPS values calculated for all image sizes and batch sizes in the form of a dictionary
-    """
-    if isinstance(batch_size, int):
-        batch_size = [batch_size]
-    if isinstance(image_size, tuple):
-        image_size = [image_size]
-    values = {}
-    throughput_values = []
-    for i in image_size:
-        # fill with fps for each batch size
-        fps = []
-        for b in batch_size:
-            for _ in range(4):
-                # Baseline benchmark
-                if i[1] >= 1024:
-                    r = 16
-                else:
-                    r = 32
-                baseline_throughput = benchmark(
-                    model.to(device),
-                    device=device,
-                    verbose=True,
-                    runs=r,
-                    batch_size=b,
-                    input_size=i
-                )
-                throughput_values.append(baseline_throughput)
-            throughput_values = np.asarray(throughput_values)
-            throughput = np.around(np.mean(throughput_values), decimals=2)
-            print('Im_size:', i, 'Batch_size:', b, 'Mean:', throughput, 'Std:',
-                  np.around(np.std(throughput_values), decimals=2))
-            throughput_values = []
-            fps.append({b: throughput})
-        values[i] = fps
-    return values

build/lib/segformer_plusplus/utils/__init__.py DELETED Viewed

@@ -1,12 +0,0 @@
-# Copyright (c) OpenMMLab. All rights reserved.
-from .embed import PatchEmbed
-from .shape_convert import nchw_to_nlc, nlc_to_nchw
-from .wrappers import resize
-from .tome_presets import tome_presets
-from .registry import MODELS
-from .imagenet_weights import imagenet_weights
-from .benchmark import benchmark
-__all__ = [
-    'PatchEmbed', 'nchw_to_nlc', 'nlc_to_nchw', 'resize', 'tome_presets', 'MODELS', 'imagenet_weights', 'benchmark'
-]

build/lib/segformer_plusplus/utils/benchmark.py DELETED Viewed

@@ -1,76 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-# Source: https://github.com/facebookresearch/ToMe/blob/main/tome/utils.py
-# --------------------------------------------------------
-import time
-from typing import Tuple
-import torch
-from tqdm import tqdm
-def benchmark(
-        model: torch.nn.Module,
-        device: torch.device = 0,
-        input_size: Tuple[int] = (3, 224, 224),
-        batch_size: int = 64,
-        runs: int = 40,
-        throw_out: float = 0.25,
-        use_fp16: bool = False,
-        verbose: bool = False,
-) -> float:
-    """
-    Benchmark the given model with random inputs at the given batch size.
-    Args:
-     - model: the module to benchmark
-     - device: the device to use for benchmarking
-     - input_size: the input size to pass to the model (channels, h, w)
-     - batch_size: the batch size to use for evaluation
-     - runs: the number of total runs to do
-     - throw_out: the percentage of runs to throw out at the start of testing
-     - use_fp16: whether or not to benchmark with float16 and autocast
-     - verbose: whether or not to use tqdm to print progress / print throughput at end
-    Returns:
-     - the throughput measured in images / second
-    """
-    if not isinstance(device, torch.device):
-        device = torch.device(device)
-    is_cuda = torch.device(device).type == "cuda"
-    model = model.eval().to(device)
-    input = torch.rand(batch_size, *input_size, device=device)
-    if use_fp16:
-        input = input.half()
-    warm_up = int(runs * throw_out)
-    total = 0
-    start = time.time()
-    with torch.autocast(device.type, enabled=use_fp16):
-        with torch.no_grad():
-            for i in tqdm(range(runs), disable=not verbose, desc="Benchmarking"):
-                if i == warm_up:
-                    if is_cuda:
-                        torch.cuda.synchronize()
-                    total = 0
-                    start = time.time()
-                model(input)
-                total += batch_size
-    if is_cuda:
-        torch.cuda.synchronize()
-    end = time.time()
-    elapsed = end - start
-    throughput = total / elapsed
-    if verbose:
-        print(f"Throughput: {throughput:.2f} im/s")
-    return throughput

build/lib/segformer_plusplus/utils/embed.py DELETED Viewed

@@ -1,330 +0,0 @@
-# Copyright (c) OpenMMLab. All rights reserved.
-import math
-from typing import Sequence
-import torch.nn as nn
-import torch.nn.functional as F
-from mmcv.cnn import build_conv_layer, build_norm_layer
-from mmengine.model import BaseModule
-from mmengine.utils import to_2tuple
-class AdaptivePadding(nn.Module):
-    """Applies padding to input (if needed) so that input can get fully covered
-    by filter you specified. It supports two modes "same" and "corner". The
-    "same" mode is same with "SAME" padding mode in TensorFlow, pad zero around
-    input. The "corner"  mode would pad zero to bottom right.
-    Args:
-        kernel_size (int | tuple): Size of the kernel:
-        stride (int | tuple): Stride of the filter. Default: 1:
-        dilation (int | tuple): Spacing between kernel elements.
-            Default: 1.
-        padding (str): Support "same" and "corner", "corner" mode
-            would pad zero to bottom right, and "same" mode would
-            pad zero around input. Default: "corner".
-    Example:
-        >>> kernel_size = 16
-        >>> stride = 16
-        >>> dilation = 1
-        >>> input = torch.rand(1, 1, 15, 17)
-        >>> adap_pad = AdaptivePadding(
-        >>>     kernel_size=kernel_size,
-        >>>     stride=stride,
-        >>>     dilation=dilation,
-        >>>     padding="corner")
-        >>> out = adap_pad(input)
-        >>> assert (out.shape[2], out.shape[3]) == (16, 32)
-        >>> input = torch.rand(1, 1, 16, 17)
-        >>> out = adap_pad(input)
-        >>> assert (out.shape[2], out.shape[3]) == (16, 32)
-    """
-    def __init__(self, kernel_size=1, stride=1, dilation=1, padding='corner'):
-        super().__init__()
-        assert padding in ('same', 'corner')
-        kernel_size = to_2tuple(kernel_size)
-        stride = to_2tuple(stride)
-        dilation = to_2tuple(dilation)
-        self.padding = padding
-        self.kernel_size = kernel_size
-        self.stride = stride
-        self.dilation = dilation
-    def get_pad_shape(self, input_shape):
-        input_h, input_w = input_shape
-        kernel_h, kernel_w = self.kernel_size
-        stride_h, stride_w = self.stride
-        output_h = math.ceil(input_h / stride_h)
-        output_w = math.ceil(input_w / stride_w)
-        pad_h = max((output_h - 1) * stride_h +
-                    (kernel_h - 1) * self.dilation[0] + 1 - input_h, 0)
-        pad_w = max((output_w - 1) * stride_w +
-                    (kernel_w - 1) * self.dilation[1] + 1 - input_w, 0)
-        return pad_h, pad_w
-    def forward(self, x):
-        pad_h, pad_w = self.get_pad_shape(x.size()[-2:])
-        if pad_h > 0 or pad_w > 0:
-            if self.padding == 'corner':
-                x = F.pad(x, [0, pad_w, 0, pad_h])
-            elif self.padding == 'same':
-                x = F.pad(x, [
-                    pad_w // 2, pad_w - pad_w // 2, pad_h // 2,
-                    pad_h - pad_h // 2
-                ])
-        return x
-class PatchEmbed(BaseModule):
-    """Image to Patch Embedding.
-    We use a conv layer to implement PatchEmbed.
-    Args:
-        in_channels (int): The num of input channels. Default: 3
-        embed_dims (int): The dimensions of embedding. Default: 768
-        conv_type (str): The config dict for embedding
-            conv layer type selection. Default: "Conv2d".
-        kernel_size (int): The kernel_size of embedding conv. Default: 16.
-        stride (int, optional): The slide stride of embedding conv.
-            Default: None (Would be set as `kernel_size`).
-        padding (int | tuple | string ): The padding length of
-            embedding conv. When it is a string, it means the mode
-            of adaptive padding, support "same" and "corner" now.
-            Default: "corner".
-        dilation (int): The dilation rate of embedding conv. Default: 1.
-        bias (bool): Bias of embed conv. Default: True.
-        norm_cfg (dict, optional): Config dict for normalization layer.
-            Default: None.
-        input_size (int | tuple | None): The size of input, which will be
-            used to calculate the out size. Only work when `dynamic_size`
-            is False. Default: None.
-        init_cfg (`mmengine.ConfigDict`, optional): The Config for
-            initialization. Default: None.
-    """
-    def __init__(self,
-                 in_channels=3,
-                 embed_dims=768,
-                 conv_type='Conv2d',
-                 kernel_size=16,
-                 stride=None,
-                 padding='corner',
-                 dilation=1,
-                 bias=True,
-                 norm_cfg=None,
-                 input_size=None,
-                 init_cfg=None):
-        super().__init__(init_cfg=init_cfg)
-        self.embed_dims = embed_dims
-        if stride is None:
-            stride = kernel_size
-        kernel_size = to_2tuple(kernel_size)
-        stride = to_2tuple(stride)
-        dilation = to_2tuple(dilation)
-        if isinstance(padding, str):
-            self.adap_padding = AdaptivePadding(
-                kernel_size=kernel_size,
-                stride=stride,
-                dilation=dilation,
-                padding=padding)
-            # disable the padding of conv
-            padding = 0
-        else:
-            self.adap_padding = None
-        padding = to_2tuple(padding)
-        self.projection = build_conv_layer(
-            dict(type=conv_type),
-            in_channels=in_channels,
-            out_channels=embed_dims,
-            kernel_size=kernel_size,
-            stride=stride,
-            padding=padding,
-            dilation=dilation,
-            bias=bias)
-        if norm_cfg is not None:
-            self.norm = build_norm_layer(norm_cfg, embed_dims)[1]
-        else:
-            self.norm = None
-        if input_size:
-            input_size = to_2tuple(input_size)
-            # `init_out_size` would be used outside to
-            # calculate the num_patches
-            # when `use_abs_pos_embed` outside
-            self.init_input_size = input_size
-            if self.adap_padding:
-                pad_h, pad_w = self.adap_padding.get_pad_shape(input_size)
-                input_h, input_w = input_size
-                input_h = input_h + pad_h
-                input_w = input_w + pad_w
-                input_size = (input_h, input_w)
-            # https://pytorch.org/docs/stable/generated/torch.nn.Conv2d.html
-            h_out = (input_size[0] + 2 * padding[0] - dilation[0] *
-                     (kernel_size[0] - 1) - 1) // stride[0] + 1
-            w_out = (input_size[1] + 2 * padding[1] - dilation[1] *
-                     (kernel_size[1] - 1) - 1) // stride[1] + 1
-            self.init_out_size = (h_out, w_out)
-        else:
-            self.init_input_size = None
-            self.init_out_size = None
-    def forward(self, x):
-        """
-        Args:
-            x (Tensor): Has shape (B, C, H, W). In most case, C is 3.
-        Returns:
-            tuple: Contains merged results and its spatial shape.
-                - x (Tensor): Has shape (B, out_h * out_w, embed_dims)
-                - out_size (tuple[int]): Spatial shape of x, arrange as
-                    (out_h, out_w).
-        """
-        if self.adap_padding:
-            x = self.adap_padding(x)
-        x = self.projection(x)
-        out_size = (x.shape[2], x.shape[3])
-        x = x.flatten(2).transpose(1, 2)
-        if self.norm is not None:
-            x = self.norm(x)
-        return x, out_size
-class PatchMerging(BaseModule):
-    """Merge patch feature map.
-    This layer groups feature map by kernel_size, and applies norm and linear
-    layers to the grouped feature map. Our implementation uses `nn.Unfold` to
-    merge patch, which is about 25% faster than original implementation.
-    Instead, we need to modify pretrained models for compatibility.
-    Args:
-        in_channels (int): The num of input channels.
-        out_channels (int): The num of output channels.
-        kernel_size (int | tuple, optional): the kernel size in the unfold
-            layer. Defaults to 2.
-        stride (int | tuple, optional): the stride of the sliding blocks in the
-            unfold layer. Default: None. (Would be set as `kernel_size`)
-        padding (int | tuple | string ): The padding length of
-            embedding conv. When it is a string, it means the mode
-            of adaptive padding, support "same" and "corner" now.
-            Default: "corner".
-        dilation (int | tuple, optional): dilation parameter in the unfold
-            layer. Default: 1.
-        bias (bool, optional): Whether to add bias in linear layer or not.
-            Defaults: False.
-        norm_cfg (dict, optional): Config dict for normalization layer.
-            Default: dict(type='LN').
-        init_cfg (dict, optional): The extra config for initialization.
-            Default: None.
-    """
-    def __init__(self,
-                 in_channels,
-                 out_channels,
-                 kernel_size=2,
-                 stride=None,
-                 padding='corner',
-                 dilation=1,
-                 bias=False,
-                 norm_cfg=dict(type='LN'),
-                 init_cfg=None):
-        super().__init__(init_cfg=init_cfg)
-        self.in_channels = in_channels
-        self.out_channels = out_channels
-        if stride:
-            stride = stride
-        else:
-            stride = kernel_size
-        kernel_size = to_2tuple(kernel_size)
-        stride = to_2tuple(stride)
-        dilation = to_2tuple(dilation)
-        if isinstance(padding, str):
-            self.adap_padding = AdaptivePadding(
-                kernel_size=kernel_size,
-                stride=stride,
-                dilation=dilation,
-                padding=padding)
-            # disable the padding of unfold
-            padding = 0
-        else:
-            self.adap_padding = None
-        padding = to_2tuple(padding)
-        self.sampler = nn.Unfold(
-            kernel_size=kernel_size,
-            dilation=dilation,
-            padding=padding,
-            stride=stride)
-        sample_dim = kernel_size[0] * kernel_size[1] * in_channels
-        if norm_cfg is not None:
-            self.norm = build_norm_layer(norm_cfg, sample_dim)[1]
-        else:
-            self.norm = None
-        self.reduction = nn.Linear(sample_dim, out_channels, bias=bias)
-    def forward(self, x, input_size):
-        """
-        Args:
-            x (Tensor): Has shape (B, H*W, C_in).
-            input_size (tuple[int]): The spatial shape of x, arrange as (H, W).
-                Default: None.
-        Returns:
-            tuple: Contains merged results and its spatial shape.
-                - x (Tensor): Has shape (B, Merged_H * Merged_W, C_out)
-                - out_size (tuple[int]): Spatial shape of x, arrange as
-                    (Merged_H, Merged_W).
-        """
-        B, L, C = x.shape
-        assert isinstance(input_size, Sequence), f'Expect ' \
-                                                 f'input_size is ' \
-                                                 f'`Sequence` ' \
-                                                 f'but get {input_size}'
-        H, W = input_size
-        assert L == H * W, 'input feature has wrong size'
-        x = x.view(B, H, W, C).permute([0, 3, 1, 2])  # B, C, H, W
-        # Use nn.Unfold to merge patch. About 25% faster than original method,
-        # but need to modify pretrained model for compatibility
-        if self.adap_padding:
-            x = self.adap_padding(x)
-            H, W = x.shape[-2:]
-        x = self.sampler(x)
-        # if kernel_size=2 and stride=2, x should has shape (B, 4*C, H/2*W/2)
-        out_h = (H + 2 * self.sampler.padding[0] - self.sampler.dilation[0] *
-                 (self.sampler.kernel_size[0] - 1) -
-                 1) // self.sampler.stride[0] + 1
-        out_w = (W + 2 * self.sampler.padding[1] - self.sampler.dilation[1] *
-                 (self.sampler.kernel_size[1] - 1) -
-                 1) // self.sampler.stride[1] + 1
-        output_size = (out_h, out_w)
-        x = x.transpose(1, 2)  # B, H/2*W/2, 4*C
-        x = self.norm(x) if self.norm else x
-        x = self.reduction(x)
-        return x, output_size

build/lib/segformer_plusplus/utils/imagenet_weights.py DELETED Viewed

@@ -1,8 +0,0 @@
-imagenet_weights = {
-    'b0': 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/segformer/mit_b0_20220624-7e0fe6dd.pth',
-    'b1': 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/segformer/mit_b1_20220624-02e5a6a1.pth',
-    'b2': 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/segformer/mit_b2_20220624-66e8bf70.pth',
-    'b3': 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/segformer/mit_b3_20220624-13b1141c.pth',
-    'b4': 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/segformer/mit_b4_20220624-d588d980.pth',
-    'b5': 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/segformer/mit_b5_20220624-658746d9.pth'
-}

build/lib/segformer_plusplus/utils/registry.py DELETED Viewed

@@ -1,6 +0,0 @@
-from mmengine import Registry
-MODELS = Registry(
-    'models',
-    locations=['segformer_plusplus.model.backbone', 'segformer_plusplus.model.head']
-)

build/lib/segformer_plusplus/utils/shape_convert.py DELETED Viewed

@@ -1,107 +0,0 @@
-# Copyright (c) OpenMMLab. All rights reserved.
-def nlc_to_nchw(x, hw_shape):
-    """Convert [N, L, C] shape tensor to [N, C, H, W] shape tensor.
-    Args:
-        x (Tensor): The input tensor of shape [N, L, C] before conversion.
-        hw_shape (Sequence[int]): The height and width of output feature map.
-    Returns:
-        Tensor: The output tensor of shape [N, C, H, W] after conversion.
-    """
-    H, W = hw_shape
-    assert len(x.shape) == 3
-    B, L, C = x.shape
-    assert L == H * W, 'The seq_len doesn\'t match H, W'
-    return x.transpose(1, 2).reshape(B, C, H, W)
-def nchw_to_nlc(x):
-    """Flatten [N, C, H, W] shape tensor to [N, L, C] shape tensor.
-    Args:
-        x (Tensor): The input tensor of shape [N, C, H, W] before conversion.
-    Returns:
-        Tensor: The output tensor of shape [N, L, C] after conversion.
-    """
-    assert len(x.shape) == 4
-    return x.flatten(2).transpose(1, 2).contiguous()
-def nchw2nlc2nchw(module, x, contiguous=False, **kwargs):
-    """Flatten [N, C, H, W] shape tensor `x` to [N, L, C] shape tensor. Use the
-    reshaped tensor as the input of `module`, and the convert the output of
-    `module`, whose shape is.
-    [N, L, C], to [N, C, H, W].
-    Args:
-        module (Callable): A callable object the takes a tensor
-            with shape [N, L, C] as input.
-        x (Tensor): The input tensor of shape [N, C, H, W].
-                contiguous:
-        contiguous (Bool): Whether to make the tensor contiguous
-            after each shape transform.
-    Returns:
-        Tensor: The output tensor of shape [N, C, H, W].
-    Example:
-        >>> import torch
-        >>> import torch.nn as nn
-        >>> norm = nn.LayerNorm(4)
-        >>> feature_map = torch.rand(4, 4, 5, 5)
-        >>> output = nchw2nlc2nchw(norm, feature_map)
-    """
-    B, C, H, W = x.shape
-    if not contiguous:
-        x = x.flatten(2).transpose(1, 2)
-        x = module(x, **kwargs)
-        x = x.transpose(1, 2).reshape(B, C, H, W)
-    else:
-        x = x.flatten(2).transpose(1, 2).contiguous()
-        x = module(x, **kwargs)
-        x = x.transpose(1, 2).reshape(B, C, H, W).contiguous()
-    return x
-def nlc2nchw2nlc(module, x, hw_shape, contiguous=False, **kwargs):
-    """Convert [N, L, C] shape tensor `x` to [N, C, H, W] shape tensor. Use the
-    reshaped tensor as the input of `module`, and convert the output of
-    `module`, whose shape is.
-    [N, C, H, W], to [N, L, C].
-    Args:
-        module (Callable): A callable object the takes a tensor
-            with shape [N, C, H, W] as input.
-        x (Tensor): The input tensor of shape [N, L, C].
-        hw_shape: (Sequence[int]): The height and width of the
-            feature map with shape [N, C, H, W].
-        contiguous (Bool): Whether to make the tensor contiguous
-            after each shape transform.
-    Returns:
-        Tensor: The output tensor of shape [N, L, C].
-    Example:
-        >>> import torch
-        >>> import torch.nn as nn
-        >>> conv = nn.Conv2d(16, 16, 3, 1, 1)
-        >>> feature_map = torch.rand(4, 25, 16)
-        >>> output = nlc2nchw2nlc(conv, feature_map, (5, 5))
-    """
-    H, W = hw_shape
-    assert len(x.shape) == 3
-    B, L, C = x.shape
-    assert L == H * W, 'The seq_len doesn\'t match H, W'
-    if not contiguous:
-        x = x.transpose(1, 2).reshape(B, C, H, W)
-        x = module(x, **kwargs)
-        x = x.flatten(2).transpose(1, 2)
-    else:
-        x = x.transpose(1, 2).reshape(B, C, H, W).contiguous()
-        x = module(x, **kwargs)
-        x = x.flatten(2).transpose(1, 2).contiguous()
-    return x

build/lib/segformer_plusplus/utils/tome_presets.py DELETED Viewed

@@ -1,20 +0,0 @@
-tome_presets = {
-    'bsm_hq': [
-        dict(q_mode=None, kv_mode='bsm', kv_r=0.6, kv_sx=2, kv_sy=2),
-        dict(q_mode=None, kv_mode='bsm', kv_r=0.6, kv_sx=2, kv_sy=2),
-        dict(q_mode='bsm', kv_mode=None, q_r=0.8, q_sx=4, q_sy=4),
-        dict(q_mode='bsm', kv_mode=None, q_r=0.8, q_sx=4, q_sy=4)
-    ],
-    'bsm_fast': [
-        dict(q_mode=None, kv_mode='bsm_r2D', kv_r=0.9, kv_sx=4, kv_sy=4),
-        dict(q_mode=None, kv_mode='bsm_r2D', kv_r=0.9, kv_sx=4, kv_sy=4),
-        dict(q_mode='bsm_r2D', kv_mode=None, q_r=0.9, q_sx=4, q_sy=4),
-        dict(q_mode='bsm_r2D', kv_mode=None, q_r=0.9, q_sx=4, q_sy=4)
-    ],
-    'n2d_2x2': [
-        dict(q_mode='neighbor_2D', kv_mode=None, q_s=(2, 2)),
-        dict(q_mode='neighbor_2D', kv_mode=None, q_s=(2, 2)),
-        dict(q_mode='neighbor_2D', kv_mode=None, q_s=(2, 2)),
-        dict(q_mode='neighbor_2D', kv_mode=None, q_s=(2, 2))
-    ]
-}

build/lib/segformer_plusplus/utils/wrappers.py DELETED Viewed

@@ -1,51 +0,0 @@
-# Copyright (c) OpenMMLab. All rights reserved.
-import warnings
-import torch.nn as nn
-import torch.nn.functional as F
-def resize(input,
-           size=None,
-           scale_factor=None,
-           mode='nearest',
-           align_corners=None,
-           warning=True):
-    if warning:
-        if size is not None and align_corners:
-            input_h, input_w = tuple(int(x) for x in input.shape[2:])
-            output_h, output_w = tuple(int(x) for x in size)
-            if output_h > input_h or output_w > output_h:
-                if ((output_h > 1 and output_w > 1 and input_h > 1
-                     and input_w > 1) and (output_h - 1) % (input_h - 1)
-                        and (output_w - 1) % (input_w - 1)):
-                    warnings.warn(
-                        f'When align_corners={align_corners}, '
-                        'the output would more aligned if '
-                        f'input size {(input_h, input_w)} is `x+1` and '
-                        f'out size {(output_h, output_w)} is `nx+1`')
-    return F.interpolate(input, size, scale_factor, mode, align_corners)
-class Upsample(nn.Module):
-    def __init__(self,
-                 size=None,
-                 scale_factor=None,
-                 mode='nearest',
-                 align_corners=None):
-        super().__init__()
-        self.size = size
-        if isinstance(scale_factor, tuple):
-            self.scale_factor = tuple(float(factor) for factor in scale_factor)
-        else:
-            self.scale_factor = float(scale_factor) if scale_factor else None
-        self.mode = mode
-        self.align_corners = align_corners
-    def forward(self, x):
-        if not self.size:
-            size = [int(t * self.scale_factor) for t in x.shape[-2:]]
-        else:
-            size = self.size
-        return resize(x, size, None, self.mode, self.align_corners)

segformer_plusplus.egg-info/SOURCES.txt CHANGED Viewed

@@ -1,7 +1,10 @@
 setup.py
 segformer_plusplus/__init__.py
 segformer_plusplus/build_model.py
 segformer_plusplus/random_benchmark.py
 segformer_plusplus.egg-info/PKG-INFO
 segformer_plusplus.egg-info/SOURCES.txt
 segformer_plusplus.egg-info/dependency_links.txt
@@ -15,15 +18,22 @@ segformer_plusplus/configs/segformer_mit_b3.py
 segformer_plusplus/configs/segformer_mit_b4.py
 segformer_plusplus/configs/segformer_mit_b5.py
 segformer_plusplus/model/__init__.py
 segformer_plusplus/model/backbone/__init__.py
 segformer_plusplus/model/backbone/mit.py
 segformer_plusplus/model/head/__init__.py
 segformer_plusplus/model/head/segformer_head.py
 segformer_plusplus/utils/__init__.py
 segformer_plusplus/utils/benchmark.py
 segformer_plusplus/utils/embed.py
 segformer_plusplus/utils/imagenet_weights.py
 segformer_plusplus/utils/registry.py
 segformer_plusplus/utils/shape_convert.py
 segformer_plusplus/utils/tome_presets.py
 segformer_plusplus/utils/wrappers.py

 setup.py
 segformer_plusplus/__init__.py
 segformer_plusplus/build_model.py
+segformer_plusplus/cityscape_benchmark.py
 segformer_plusplus/random_benchmark.py
+segformer_plusplus/start_cityscape_benchmark.py
+segformer_plusplus/start_random_benchmark.py
 segformer_plusplus.egg-info/PKG-INFO
 segformer_plusplus.egg-info/SOURCES.txt
 segformer_plusplus.egg-info/dependency_links.txt
 segformer_plusplus/configs/segformer_mit_b4.py
 segformer_plusplus/configs/segformer_mit_b5.py
 segformer_plusplus/model/__init__.py
+segformer_plusplus/model/base_module.py
+segformer_plusplus/model/utils.py
+segformer_plusplus/model/weight_init.py
 segformer_plusplus/model/backbone/__init__.py
 segformer_plusplus/model/backbone/mit.py
 segformer_plusplus/model/head/__init__.py
 segformer_plusplus/model/head/segformer_head.py
 segformer_plusplus/utils/__init__.py
+segformer_plusplus/utils/activation.py
 segformer_plusplus/utils/benchmark.py
+segformer_plusplus/utils/build_functions.py
 segformer_plusplus/utils/embed.py
 segformer_plusplus/utils/imagenet_weights.py
+segformer_plusplus/utils/manager.py
 segformer_plusplus/utils/registry.py
 segformer_plusplus/utils/shape_convert.py
 segformer_plusplus/utils/tome_presets.py
+segformer_plusplus/utils/version_utils.py
 segformer_plusplus/utils/wrappers.py

segformer_plusplus.egg-info/requires.txt CHANGED Viewed

@@ -1,2 +1,5 @@
 tomesd
 torch>=2.0.1

+numpy
+omegaconf
+pyyaml
 tomesd
 torch>=2.0.1

segformer_plusplus/cityscape/berlin_000543_000019_leftImg8bit.png ADDED Viewed

Git LFS Details

SHA256: 3d616adab2c462fdee7f47a2de927436aebfb73843c38c3e3fbc85a6220955d4
Pointer size: 132 Bytes
Size of remote file: 2.37 MB

segformer_plusplus/cityscape_benchmark.py CHANGED Viewed

@@ -14,6 +14,8 @@ print(f"Using device: {device}")
 if device.type == 'cuda':
     print(f"CUDA Device Name: {torch.cuda.get_device_name(torch.cuda.current_device())}")
 def cityscape_benchmark(
         model: torch.nn.Module,
@@ -96,17 +98,7 @@ def cityscape_benchmark(
     if save_output:
         with torch.no_grad():
-            with open("model_output_log.txt", "w") as f:
-                f.write("=== Model Input Info ===\n")
-                f.write(f"Input tensor:\n{img_tensor}\n")
-                f.write(f"Input shape: {img_tensor.shape}\n")
-                f.write(f"Input stats: mean = {img_tensor.mean().item()}, std = {img_tensor.std().item()}\n\n")
-                output = model(img_tensor)
-                f.write("=== Raw Model Output ===\n")
-                f.write(f"{output}\n\n")
             pred = torch.argmax(output, dim=1).squeeze(0).cpu().numpy()
             # Speichere Prediction als Text ab

 if device.type == 'cuda':
     print(f"CUDA Device Name: {torch.cuda.get_device_name(torch.cuda.current_device())}")
+torch.manual_seed(42)
+torch.cuda.manual_seed_all(42)
 def cityscape_benchmark(
         model: torch.nn.Module,
     if save_output:
         with torch.no_grad():
+            output = model(img_tensor)
             pred = torch.argmax(output, dim=1).squeeze(0).cpu().numpy()
             # Speichere Prediction als Text ab

cityscapes_prediction_output_reference.txt → segformer_plusplus/cityscapes_prediction_output.txt RENAMED Viewed

File without changes

segformer_plusplus/cityscapes_prediction_output_reference_b05_nocheckpoint.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

segformer_plusplus/config.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+  "model_type": "segformerplusplus",
+  "architectures": ["SegFormerPlusPlus"],
+  "backbone": "b5",
+  "supported_backbones": ["b0", "b1", "b2", "b3", "b4", "b5"],
+  "head": "bsm_hq",
+  "supported_heads": ["bsm_hq", "bsm_fast", "n2d_2x2"],
+  "out_channels": 19,
+  "num_labels": 19
+}

segformer_plusplus/configs/config/utils.py CHANGED Viewed

@@ -10,11 +10,17 @@ from importlib import import_module as real_import_module
 import json
 import pickle
 from pathlib import Path
-from mim.utils import package2module
 import yaml
 from omegaconf import OmegaConf
 PYTHON_ROOT_DIR = osp.dirname(osp.dirname(sys.executable))
 SYSTEM_PYTHON_PREFIX = '/usr/lib/python'
@@ -644,4 +650,125 @@ def dump(obj, file=None, file_format=None, **kwargs):
 def check_file_exist(filename, msg_tmpl='file "{}" does not exist'):
     if not osp.isfile(filename):
-        raise FileNotFoundError(msg_tmpl.format(filename))

 import json
 import pickle
 from pathlib import Path
+import itertools
 import yaml
 from omegaconf import OmegaConf
+from pkg_resources.extern import packaging
+__import__('pkg_resources.extern.packaging.version')
+__import__('pkg_resources.extern.packaging.specifiers')
+__import__('pkg_resources.extern.packaging.requirements')
+__import__('pkg_resources.extern.packaging.markers')
 PYTHON_ROOT_DIR = osp.dirname(osp.dirname(sys.executable))
 SYSTEM_PYTHON_PREFIX = '/usr/lib/python'
 def check_file_exist(filename, msg_tmpl='file "{}" does not exist'):
     if not osp.isfile(filename):
+        raise FileNotFoundError(msg_tmpl.format(filename))
+def package2module(package: str):
+    """Infer module name from package.
+    Args:
+        package (str): Package to infer module name.
+    """
+    pkg = get_distribution(package)
+    if pkg.has_metadata('top_level.txt'):
+        module_name = pkg.get_metadata('top_level.txt').split('\n')[0]
+        return module_name
+    else:
+        raise ValueError(
+            highlighted_error(f'can not infer the module name of {package}'))
+def get_distribution(dist):
+    """Return a current distribution object for a Requirement or string"""
+    if isinstance(dist, str):
+        dist = Requirement.parse(dist)
+    return dist
+def highlighted_error(msg: Union[str, Exception]) -> str:
+    return click.style(msg, fg='red', bold=True)  # type: ignore
+class Requirement(packaging.requirements.Requirement):
+    def __init__(self, requirement_string):
+        """DO NOT CALL THIS UNDOCUMENTED METHOD; use Requirement.parse()!"""
+        super(Requirement, self).__init__(requirement_string)
+        self.unsafe_name = self.name
+        project_name = safe_name(self.name)
+        self.project_name, self.key = project_name, project_name.lower()
+        self.specs = [
+            (spec.operator, spec.version) for spec in self.specifier]
+        self.extras = tuple(map(safe_extra, self.extras))
+        self.hashCmp = (
+            self.key,
+            self.url,
+            self.specifier,
+            frozenset(self.extras),
+            str(self.marker) if self.marker else None,
+        )
+        self.__hash = hash(self.hashCmp)
+    def __eq__(self, other):
+        return (
+            isinstance(other, Requirement) and
+            self.hashCmp == other.hashCmp
+        )
+    def __ne__(self, other):
+        return not self == other
+    def __contains__(self, item):
+        if item.key != self.key:
+            return False
+        item = item.version
+        # Allow prereleases always in order to match the previous behavior of
+        # this method. In the future this should be smarter and follow PEP 440
+        # more accurately.
+        return self.specifier.contains(item, prereleases=True)
+    def __hash__(self):
+        return self.__hash
+    def __repr__(self):
+        return "Requirement.parse(%r)" % str(self)
+    @staticmethod
+    def parse(s):
+        req, = parse_requirements(s)
+        return req
+def parse_requirements(strs):
+    """Yield ``Requirement`` objects for each specification in `strs`
+    `strs` must be a string, or a (possibly-nested) iterable thereof.
+    """
+    # create a steppable iterator, so we can handle \-continuations
+    lines = iter(yield_lines(strs))
+    for line in lines:
+        # Drop comments -- a hash without a space may be in a URL.
+        if ' #' in line:
+            line = line[:line.find(' #')]
+        # If there is a line continuation, drop it, and append the next line.
+        if line.endswith('\\'):
+            line = line[:-2].strip()
+            try:
+                line += next(lines)
+            except StopIteration:
+                return
+        yield Requirement(line)
+def yield_lines(iterable):
+    """Yield valid lines of a string or iterable"""
+    return itertools.chain.from_iterable(map(yield_lines, iterable))
+def safe_extra(extra):
+    """Convert an arbitrary string to a standard 'extra' name
+    Any runs of non-alphanumeric characters are replaced with a single '_',
+    and the result is always lowercased.
+    """
+    return re.sub('[^A-Za-z0-9.-]+', '_', extra).lower()
+def safe_name(name):
+    """Convert an arbitrary string to a standard distribution name
+    Any runs of non-alphanumeric/. characters are replaced with a single '-'.
+    """
+    return re.sub('[^A-Za-z0-9.]+', '-', name)

segformer_plusplus/modeling_segformer_plusplus.py ADDED Viewed

	@@ -0,0 +1,69 @@

+# modeling_segformer_plusplus.py
+from typing import Optional, Tuple
+import torch
+import torch.nn as nn
+from transformers import PreTrainedModel, PretrainedConfig
+from transformers.modeling_outputs import SemanticSegmenterOutput
+# Falls du SegFormer direkt importieren willst, musst du sicherstellen,
+# dass diese Klasse im selben Repo verfügbar ist.
+from segformer_plusplus.model import create_model
+class SegformerPlusPlusConfig(PretrainedConfig):
+    model_type = "segformer_plusplus"
+    def __init__(
+        self,
+        backbone: str = "b5",
+        tome_strategy: Optional[str] = "bsm_hq",
+        num_labels: int = 19,
+        id2label: Optional[dict] = None,
+        label2id: Optional[dict] = None,
+        **kwargs,
+    ):
+        self.backbone = backbone
+        self.tome_strategy = tome_strategy
+        self.num_labels = num_labels
+        if id2label is None:
+            id2label = {i: f"class_{i}" for i in range(num_labels)}
+        if label2id is None:
+            label2id = {v: k for k, v in id2label.items()}
+        self.id2label = id2label
+        self.label2id = label2id
+        super().__init__(**kwargs)
+class SegformerPlusPlusForSemanticSegmentation(PreTrainedModel):
+    config_class = SegformerPlusPlusConfig
+    def __init__(self, config: SegformerPlusPlusConfig):
+        super().__init__(config)
+        self.segformer = create_model(
+            backbone=config.backbone,
+            tome_strategy=config.tome_strategy,
+            out_channels=config.num_labels,
+            pretrained=False,  # Kein Pretrained hier – wird über .from_pretrained geladen
+        )
+    def forward(
+        self,
+        pixel_values: torch.FloatTensor,
+        labels: Optional[torch.LongTensor] = None,
+    ) -> SemanticSegmenterOutput:
+        logits = self.segformer(pixel_values)
+        loss = None
+        if labels is not None:
+            loss_fct = nn.CrossEntropyLoss(ignore_index=255)
+            loss = loss_fct(logits, labels.long())
+        return SemanticSegmenterOutput(
+            loss=loss,
+            logits=logits,
+        )

segformer_plusplus/pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2e064fa2fb7d618208c2542e76c543b7cb552a3d8997a0c6c4cc0a14da86ba58
+size 328287002

segformer_plusplus/start_cityscape_benchmark.py CHANGED Viewed

@@ -7,7 +7,7 @@ from .build_model import create_model
 from .cityscape_benchmark import cityscape_benchmark
 parser = argparse.ArgumentParser(description="Segformer Benchmarking Script")
-parser.add_argument('--backbone', type=str, default='b0', choices=['b0', 'b1', 'b2', 'b3', 'b4', 'b5'], help='Model backbone version')
 parser.add_argument('--head', type=str, default='bsm_hq', choices=['bsm_hq', 'bsm_fast', 'n2d_2x2'], help='Model head type')
 parser.add_argument('--checkpoint', type=str, default=None, help='Path to .pth checkpoint file (optional)')
 args = parser.parse_args()
@@ -22,13 +22,15 @@ if args.checkpoint:
 else:
     print("No checkpoint provided – using model as initialized.")
-image_path = os.path.expanduser('~/SegformerPlusPlus/mmsegmentation/data/cityscapes/leftImg8bit/test/berlin/berlin_000543_000019_leftImg8bit.png')
 result = cityscape_benchmark(model, image_path)
 print("Cityscapes Benchmark Results:", result)
-reference_txt_path = os.path.expanduser('~/SegformerPlusPlus/model/cityscapes_prediction_output_reference.txt')
-generated_txt_path = os.path.expanduser('~/SegformerPlusPlus/model/cityscapes_prediction_output.txt')
 if os.path.exists(reference_txt_path) and os.path.exists(generated_txt_path):
     ref_arr = np.loadtxt(reference_txt_path, dtype=int)

 from .cityscape_benchmark import cityscape_benchmark
 parser = argparse.ArgumentParser(description="Segformer Benchmarking Script")
+parser.add_argument('--backbone', type=str, default='b5', choices=['b0', 'b1', 'b2', 'b3', 'b4', 'b5'], help='Model backbone version')
 parser.add_argument('--head', type=str, default='bsm_hq', choices=['bsm_hq', 'bsm_fast', 'n2d_2x2'], help='Model head type')
 parser.add_argument('--checkpoint', type=str, default=None, help='Path to .pth checkpoint file (optional)')
 args = parser.parse_args()
 else:
     print("No checkpoint provided – using model as initialized.")
+cwd = os.getcwd()
+image_path = os.path.join(cwd, 'cityscape', 'berlin_000543_000019_leftImg8bit.png')
 result = cityscape_benchmark(model, image_path)
 print("Cityscapes Benchmark Results:", result)
+reference_txt_path = os.path.join(cwd, 'cityscapes_prediction_output_reference_b05_nocheckpoint.txt')
+generated_txt_path = os.path.join(cwd, 'cityscapes_prediction_output.txt')
 if os.path.exists(reference_txt_path) and os.path.exists(generated_txt_path):
     ref_arr = np.loadtxt(reference_txt_path, dtype=int)

setup.py CHANGED Viewed

@@ -5,7 +5,18 @@ setup(
     version="0.2",
     author="Marco Kantonis",
     description="Segformer++: Efficient Token-Merging Strategies for High-Resolution Semantic Segmentation",
-    install_requires=['torch>=2.0.1', 'tomesd','omegaconf', 'pyyaml'],
     packages=find_packages(),
     license='MIT',
     long_description="https://arxiv.org/abs/2405.14467"

     version="0.2",
     author="Marco Kantonis",
     description="Segformer++: Efficient Token-Merging Strategies for High-Resolution Semantic Segmentation",
+    install_requires=[
+        'torch>=2.0.1',
+        'tomesd',
+        'omegaconf',
+        'pyyaml',
+        'numpy',
+        'rich',
+        'yapf',
+        'addict',
+        'tqdm',
+        'packaging'
+    ],
     packages=find_packages(),
     license='MIT',
     long_description="https://arxiv.org/abs/2405.14467"