Upload folder using huggingface_hub

Browse files

Files changed (6) hide show

ersvr/models/ersvr.py +49 -0
ersvr/models/feature_alignment.py +24 -0
ersvr/models/mbd.py +28 -0
ersvr/models/sr_network.py +44 -0
ersvr/models/student.py +59 -0
ersvr/models/upsampling.py +33 -0

ersvr/models/ersvr.py ADDED Viewed

	@@ -0,0 +1,49 @@

+import torch.nn as nn
+import torch.nn.functional as F
+from einops import rearrange
+from .feature_alignment import FeatureAlignmentBlock
+from .sr_network import SRNetwork
+class ERSVR(nn.Module):
+    """Real-time Video Super Resolution Network using Recurrent Multi-Branch Dilated Convolutions"""
+    def __init__(self, scale_factor=4):
+        super(ERSVR, self).__init__()
+        self.scale_factor = scale_factor
+        # Feature alignment block
+        self.feature_alignment = FeatureAlignmentBlock(in_channels=9, out_channels=64)
+        # SR network
+        self.sr_network = SRNetwork(in_channels=64, out_channels=3)
+    def forward(self, x):
+        # Input shape: (B, 3, 3, H, W) - batch of 3 RGB frames
+        batch_size, num_frames, channels, height, width = x.shape
+        # Rearrange input to (B, 9, H, W)
+        x = rearrange(x, 'b n c h w -> b (n c) h w')
+        # Extract center frame for residual connection
+        center_frame = x[:, 3:6, :, :]  # RGB channels of center frame
+        # Bicubic upsampling of center frame for residual connection
+        bicubic = F.interpolate(
+            center_frame,
+            scale_factor=self.scale_factor,
+            mode='bicubic',
+            align_corners=False
+        )
+        # Feature alignment
+        features = self.feature_alignment(x)
+        # SR network
+        output = self.sr_network(features, bicubic)
+        # Ensure output and bicubic have the same dimensions
+        if output.shape != bicubic.shape:
+            print(f"Output shape: {output.shape}, Bicubic shape: {bicubic.shape}")
+            raise ValueError("Output and bicubic tensors must have the same dimensions")
+        return output

ersvr/models/feature_alignment.py ADDED Viewed

	@@ -0,0 +1,24 @@

+import torch.nn as nn
+from .mbd import MBDModule
+class FeatureAlignmentBlock(nn.Module):
+    """Feature Alignment Block for processing concatenated frames"""
+    def __init__(self, in_channels=9, out_channels=64):
+        super(FeatureAlignmentBlock, self).__init__()
+        self.conv_layers = nn.Sequential(
+            nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1),
+            nn.ReLU(inplace=True)
+        )
+        self.mbd = MBDModule(out_channels, out_channels)
+    def forward(self, x):
+        # Input shape: (B, 9, H, W) - concatenated frames
+        x = self.conv_layers(x)
+        x = self.mbd(x)
+        return x

ersvr/models/mbd.py ADDED Viewed

	@@ -0,0 +1,28 @@

+import torch
+import torch.nn as nn
+class MBDModule(nn.Module):
+    """Multi-Branch Dilated Convolution Module"""
+    def __init__(self, in_channels, out_channels):
+        super(MBDModule, self).__init__()
+        self.pointwise = nn.Conv2d(in_channels, out_channels, kernel_size=1)
+        self.dilated_convs = nn.ModuleList([
+            nn.Conv2d(out_channels, out_channels, kernel_size=3,
+                     padding=d, dilation=d) for d in [1, 2, 4]
+        ])
+        self.fusion = nn.Conv2d(out_channels * 3, out_channels, kernel_size=1)
+    def forward(self, x):
+        x = self.pointwise(x)
+        dilated_outputs = []
+        for conv in self.dilated_convs:
+            dilated_outputs.append(conv(x))
+        x = torch.cat(dilated_outputs, dim=1)
+        x = self.fusion(x)
+        return x

ersvr/models/sr_network.py ADDED Viewed

	@@ -0,0 +1,44 @@

+import torch
+import torch.nn as nn
+from .upsampling import UpsamplingBlock
+class SRNetwork(nn.Module):
+    """Super Resolution Network with ESPCN-like backbone"""
+    def __init__(self, in_channels=64, out_channels=3):
+        super(SRNetwork, self).__init__()
+        self.conv_layers = nn.Sequential(
+            nn.Conv2d(in_channels, 64, kernel_size=3, padding=1),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(64, 64, kernel_size=3, padding=1),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(64, 64, kernel_size=3, padding=1),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(64, 64, kernel_size=3, padding=1),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(64, 64, kernel_size=3, padding=1),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(64, 64, kernel_size=3, padding=1),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(64, 64, kernel_size=3, padding=1),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(64, 64, kernel_size=3, padding=1),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(64, 64, kernel_size=3, padding=1),
+            nn.ReLU(inplace=True)
+        )
+        self.upsampling = UpsamplingBlock(64)
+        self.final_conv = nn.Conv2d(64, out_channels, kernel_size=3, padding=1)
+    def forward(self, x, bicubic):
+        x = self.conv_layers(x)
+        print(f"Before upsampling: {x.shape}")
+        x = self.upsampling(x)
+        print(f"After upsampling: {x.shape}")
+        print(f"Bicubic shape: {bicubic.shape}")
+        x = self.final_conv(x)
+        x = x + bicubic
+        return x

ersvr/models/student.py ADDED Viewed

	@@ -0,0 +1,59 @@

+import torch.nn as nn
+class DepthwiseSeparableConv(nn.Module):
+    """
+    Depthwise Separable Convolution Block for efficiency.
+    Consists of a depthwise convolution followed by a pointwise convolution.
+    """
+    def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, padding=1):
+        super().__init__()
+        self.depthwise = nn.Conv2d(in_channels, in_channels, kernel_size, stride, padding, groups=in_channels, bias=False)
+        self.pointwise = nn.Conv2d(in_channels, out_channels, 1, 1, 0, bias=False)
+        self.bn = nn.BatchNorm2d(out_channels)
+        self.relu = nn.ReLU(inplace=True)
+    def forward(self, x):
+        x = self.depthwise(x)
+        x = self.pointwise(x)
+        x = self.bn(x)
+        x = self.relu(x)
+        return x
+class StudentSRNet(nn.Module):
+    """
+    Ultra-lightweight Student Model for Video Super-Resolution.
+    - Input: (B, 3, 3, H, W)  # 3 frames, 3 channels each
+    - Output: (B, 3, H*4, W*4)  # Super-resolved center frame
+    Designed for real-time, mobile/edge deployment.
+    """
+    def __init__(self, scale_factor=4):
+        super().__init__()
+        self.scale_factor = scale_factor
+        self.input_conv = nn.Conv2d(9, 16, 3, padding=1)
+        self.block1 = DepthwiseSeparableConv(16, 32)
+        self.block2 = DepthwiseSeparableConv(32, 32)
+        self.block3 = DepthwiseSeparableConv(32, 16)
+        self.upsample1 = nn.Sequential(
+            nn.Conv2d(16, 64, 3, padding=1),
+            nn.PixelShuffle(2),
+            nn.ReLU(inplace=True)
+        )
+        self.upsample2 = nn.Sequential(
+            nn.Conv2d(16, 64, 3, padding=1),
+            nn.PixelShuffle(2),
+            nn.ReLU(inplace=True)
+        )
+        self.output_conv = nn.Conv2d(16, 3, 3, padding=1)
+    def forward(self, x):
+        # x: (B, 3, 3, H, W) -> (B, 9, H, W)
+        b, n, c, h, w = x.shape
+        x = x.reshape(b, n * c, h, w)
+        x = self.input_conv(x)
+        x = self.block1(x)
+        x = self.block2(x)
+        x = self.block3(x)
+        x = self.upsample1(x)
+        x = self.upsample2(x)
+        x = self.output_conv(x)
+        return x

ersvr/models/upsampling.py ADDED Viewed

	@@ -0,0 +1,33 @@

+import torch.nn as nn
+class SubpixelUpsampling(nn.Module):
+    """Subpixel Upsampling Module using PixelShuffle"""
+    def __init__(self, in_channels, scale_factor=2):
+        super(SubpixelUpsampling, self).__init__()
+        self.scale_factor = scale_factor
+        self.conv = nn.Conv2d(
+            in_channels,
+            in_channels * (scale_factor ** 2),
+            kernel_size=3,
+            padding=1
+        )
+        self.pixel_shuffle = nn.PixelShuffle(scale_factor)
+    def forward(self, x):
+        x = self.conv(x)
+        x = self.pixel_shuffle(x)
+        return x
+class UpsamplingBlock(nn.Module):
+    """Block for 4x upsampling using two SubpixelUpsampling modules"""
+    def __init__(self, in_channels):
+        super(UpsamplingBlock, self).__init__()
+        self.upsample1 = SubpixelUpsampling(in_channels)
+        self.upsample2 = SubpixelUpsampling(in_channels)
+    def forward(self, x):
+        x = self.upsample1(x)
+        x = self.upsample2(x)
+        return x