nevreal commited on
Commit
3d3516b
·
verified ·
1 Parent(s): f728a0f

Delete uvr5_pack

Browse files
Files changed (44) hide show
  1. uvr5_pack/__pycache__/utils.cpython-39.pyc +0 -0
  2. uvr5_pack/lib_v5/__pycache__/layers_123821KB.cpython-39.pyc +0 -0
  3. uvr5_pack/lib_v5/__pycache__/model_param_init.cpython-39.pyc +0 -0
  4. uvr5_pack/lib_v5/__pycache__/nets_61968KB.cpython-39.pyc +0 -0
  5. uvr5_pack/lib_v5/__pycache__/spec_utils.cpython-39.pyc +0 -0
  6. uvr5_pack/lib_v5/dataset.py +0 -170
  7. uvr5_pack/lib_v5/layers.py +0 -116
  8. uvr5_pack/lib_v5/layers_123812KB .py +0 -116
  9. uvr5_pack/lib_v5/layers_123821KB.py +0 -116
  10. uvr5_pack/lib_v5/layers_33966KB.py +0 -122
  11. uvr5_pack/lib_v5/layers_537227KB.py +0 -122
  12. uvr5_pack/lib_v5/layers_537238KB.py +0 -122
  13. uvr5_pack/lib_v5/model_param_init.py +0 -60
  14. uvr5_pack/lib_v5/modelparams/1band_sr16000_hl512.json +0 -19
  15. uvr5_pack/lib_v5/modelparams/1band_sr32000_hl512.json +0 -19
  16. uvr5_pack/lib_v5/modelparams/1band_sr33075_hl384.json +0 -19
  17. uvr5_pack/lib_v5/modelparams/1band_sr44100_hl1024.json +0 -19
  18. uvr5_pack/lib_v5/modelparams/1band_sr44100_hl256.json +0 -19
  19. uvr5_pack/lib_v5/modelparams/1band_sr44100_hl512.json +0 -19
  20. uvr5_pack/lib_v5/modelparams/1band_sr44100_hl512_cut.json +0 -19
  21. uvr5_pack/lib_v5/modelparams/2band_32000.json +0 -30
  22. uvr5_pack/lib_v5/modelparams/2band_44100_lofi.json +0 -30
  23. uvr5_pack/lib_v5/modelparams/2band_48000.json +0 -30
  24. uvr5_pack/lib_v5/modelparams/3band_44100.json +0 -42
  25. uvr5_pack/lib_v5/modelparams/3band_44100_mid.json +0 -43
  26. uvr5_pack/lib_v5/modelparams/3band_44100_msb2.json +0 -43
  27. uvr5_pack/lib_v5/modelparams/4band_44100.json +0 -54
  28. uvr5_pack/lib_v5/modelparams/4band_44100_mid.json +0 -55
  29. uvr5_pack/lib_v5/modelparams/4band_44100_msb.json +0 -55
  30. uvr5_pack/lib_v5/modelparams/4band_44100_msb2.json +0 -55
  31. uvr5_pack/lib_v5/modelparams/4band_44100_reverse.json +0 -55
  32. uvr5_pack/lib_v5/modelparams/4band_44100_sw.json +0 -55
  33. uvr5_pack/lib_v5/modelparams/4band_v2.json +0 -54
  34. uvr5_pack/lib_v5/modelparams/4band_v2_sn.json +0 -55
  35. uvr5_pack/lib_v5/modelparams/ensemble.json +0 -43
  36. uvr5_pack/lib_v5/nets.py +0 -113
  37. uvr5_pack/lib_v5/nets_123812KB.py +0 -112
  38. uvr5_pack/lib_v5/nets_123821KB.py +0 -112
  39. uvr5_pack/lib_v5/nets_33966KB.py +0 -112
  40. uvr5_pack/lib_v5/nets_537227KB.py +0 -113
  41. uvr5_pack/lib_v5/nets_537238KB.py +0 -113
  42. uvr5_pack/lib_v5/nets_61968KB.py +0 -112
  43. uvr5_pack/lib_v5/spec_utils.py +0 -485
  44. uvr5_pack/utils.py +0 -242
uvr5_pack/__pycache__/utils.cpython-39.pyc DELETED
Binary file (6.87 kB)
 
uvr5_pack/lib_v5/__pycache__/layers_123821KB.cpython-39.pyc DELETED
Binary file (4.14 kB)
 
uvr5_pack/lib_v5/__pycache__/model_param_init.cpython-39.pyc DELETED
Binary file (1.63 kB)
 
uvr5_pack/lib_v5/__pycache__/nets_61968KB.cpython-39.pyc DELETED
Binary file (3.46 kB)
 
uvr5_pack/lib_v5/__pycache__/spec_utils.cpython-39.pyc DELETED
Binary file (13.3 kB)
 
uvr5_pack/lib_v5/dataset.py DELETED
@@ -1,170 +0,0 @@
1
- import os
2
- import random
3
-
4
- import numpy as np
5
- import torch
6
- import torch.utils.data
7
- from tqdm import tqdm
8
-
9
- from uvr5_pack.lib_v5 import spec_utils
10
-
11
-
12
- class VocalRemoverValidationSet(torch.utils.data.Dataset):
13
-
14
- def __init__(self, patch_list):
15
- self.patch_list = patch_list
16
-
17
- def __len__(self):
18
- return len(self.patch_list)
19
-
20
- def __getitem__(self, idx):
21
- path = self.patch_list[idx]
22
- data = np.load(path)
23
-
24
- X, y = data['X'], data['y']
25
-
26
- X_mag = np.abs(X)
27
- y_mag = np.abs(y)
28
-
29
- return X_mag, y_mag
30
-
31
-
32
- def make_pair(mix_dir, inst_dir):
33
- input_exts = ['.wav', '.m4a', '.mp3', '.mp4', '.flac']
34
-
35
- X_list = sorted([
36
- os.path.join(mix_dir, fname)
37
- for fname in os.listdir(mix_dir)
38
- if os.path.splitext(fname)[1] in input_exts])
39
- y_list = sorted([
40
- os.path.join(inst_dir, fname)
41
- for fname in os.listdir(inst_dir)
42
- if os.path.splitext(fname)[1] in input_exts])
43
-
44
- filelist = list(zip(X_list, y_list))
45
-
46
- return filelist
47
-
48
-
49
- def train_val_split(dataset_dir, split_mode, val_rate, val_filelist):
50
- if split_mode == 'random':
51
- filelist = make_pair(
52
- os.path.join(dataset_dir, 'mixtures'),
53
- os.path.join(dataset_dir, 'instruments'))
54
-
55
- random.shuffle(filelist)
56
-
57
- if len(val_filelist) == 0:
58
- val_size = int(len(filelist) * val_rate)
59
- train_filelist = filelist[:-val_size]
60
- val_filelist = filelist[-val_size:]
61
- else:
62
- train_filelist = [
63
- pair for pair in filelist
64
- if list(pair) not in val_filelist]
65
- elif split_mode == 'subdirs':
66
- if len(val_filelist) != 0:
67
- raise ValueError('The `val_filelist` option is not available in `subdirs` mode')
68
-
69
- train_filelist = make_pair(
70
- os.path.join(dataset_dir, 'training/mixtures'),
71
- os.path.join(dataset_dir, 'training/instruments'))
72
-
73
- val_filelist = make_pair(
74
- os.path.join(dataset_dir, 'validation/mixtures'),
75
- os.path.join(dataset_dir, 'validation/instruments'))
76
-
77
- return train_filelist, val_filelist
78
-
79
-
80
- def augment(X, y, reduction_rate, reduction_mask, mixup_rate, mixup_alpha):
81
- perm = np.random.permutation(len(X))
82
- for i, idx in enumerate(tqdm(perm)):
83
- if np.random.uniform() < reduction_rate:
84
- y[idx] = spec_utils.reduce_vocal_aggressively(X[idx], y[idx], reduction_mask)
85
-
86
- if np.random.uniform() < 0.5:
87
- # swap channel
88
- X[idx] = X[idx, ::-1]
89
- y[idx] = y[idx, ::-1]
90
- if np.random.uniform() < 0.02:
91
- # mono
92
- X[idx] = X[idx].mean(axis=0, keepdims=True)
93
- y[idx] = y[idx].mean(axis=0, keepdims=True)
94
- if np.random.uniform() < 0.02:
95
- # inst
96
- X[idx] = y[idx]
97
-
98
- if np.random.uniform() < mixup_rate and i < len(perm) - 1:
99
- lam = np.random.beta(mixup_alpha, mixup_alpha)
100
- X[idx] = lam * X[idx] + (1 - lam) * X[perm[i + 1]]
101
- y[idx] = lam * y[idx] + (1 - lam) * y[perm[i + 1]]
102
-
103
- return X, y
104
-
105
-
106
- def make_padding(width, cropsize, offset):
107
- left = offset
108
- roi_size = cropsize - left * 2
109
- if roi_size == 0:
110
- roi_size = cropsize
111
- right = roi_size - (width % roi_size) + left
112
-
113
- return left, right, roi_size
114
-
115
-
116
- def make_training_set(filelist, cropsize, patches, sr, hop_length, n_fft, offset):
117
- len_dataset = patches * len(filelist)
118
-
119
- X_dataset = np.zeros(
120
- (len_dataset, 2, n_fft // 2 + 1, cropsize), dtype=np.complex64)
121
- y_dataset = np.zeros(
122
- (len_dataset, 2, n_fft // 2 + 1, cropsize), dtype=np.complex64)
123
-
124
- for i, (X_path, y_path) in enumerate(tqdm(filelist)):
125
- X, y = spec_utils.cache_or_load(X_path, y_path, sr, hop_length, n_fft)
126
- coef = np.max([np.abs(X).max(), np.abs(y).max()])
127
- X, y = X / coef, y / coef
128
-
129
- l, r, roi_size = make_padding(X.shape[2], cropsize, offset)
130
- X_pad = np.pad(X, ((0, 0), (0, 0), (l, r)), mode='constant')
131
- y_pad = np.pad(y, ((0, 0), (0, 0), (l, r)), mode='constant')
132
-
133
- starts = np.random.randint(0, X_pad.shape[2] - cropsize, patches)
134
- ends = starts + cropsize
135
- for j in range(patches):
136
- idx = i * patches + j
137
- X_dataset[idx] = X_pad[:, :, starts[j]:ends[j]]
138
- y_dataset[idx] = y_pad[:, :, starts[j]:ends[j]]
139
-
140
- return X_dataset, y_dataset
141
-
142
-
143
- def make_validation_set(filelist, cropsize, sr, hop_length, n_fft, offset):
144
- patch_list = []
145
- patch_dir = 'cs{}_sr{}_hl{}_nf{}_of{}'.format(cropsize, sr, hop_length, n_fft, offset)
146
- os.makedirs(patch_dir, exist_ok=True)
147
-
148
- for i, (X_path, y_path) in enumerate(tqdm(filelist)):
149
- basename = os.path.splitext(os.path.basename(X_path))[0]
150
-
151
- X, y = spec_utils.cache_or_load(X_path, y_path, sr, hop_length, n_fft)
152
- coef = np.max([np.abs(X).max(), np.abs(y).max()])
153
- X, y = X / coef, y / coef
154
-
155
- l, r, roi_size = make_padding(X.shape[2], cropsize, offset)
156
- X_pad = np.pad(X, ((0, 0), (0, 0), (l, r)), mode='constant')
157
- y_pad = np.pad(y, ((0, 0), (0, 0), (l, r)), mode='constant')
158
-
159
- len_dataset = int(np.ceil(X.shape[2] / roi_size))
160
- for j in range(len_dataset):
161
- outpath = os.path.join(patch_dir, '{}_p{}.npz'.format(basename, j))
162
- start = j * roi_size
163
- if not os.path.exists(outpath):
164
- np.savez(
165
- outpath,
166
- X=X_pad[:, :, start:start + cropsize],
167
- y=y_pad[:, :, start:start + cropsize])
168
- patch_list.append(outpath)
169
-
170
- return VocalRemoverValidationSet(patch_list)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
uvr5_pack/lib_v5/layers.py DELETED
@@ -1,116 +0,0 @@
1
- import torch
2
- from torch import nn
3
- import torch.nn.functional as F
4
-
5
- from uvr5_pack.lib_v5 import spec_utils
6
-
7
-
8
- class Conv2DBNActiv(nn.Module):
9
-
10
- def __init__(self, nin, nout, ksize=3, stride=1, pad=1, dilation=1, activ=nn.ReLU):
11
- super(Conv2DBNActiv, self).__init__()
12
- self.conv = nn.Sequential(
13
- nn.Conv2d(
14
- nin, nout,
15
- kernel_size=ksize,
16
- stride=stride,
17
- padding=pad,
18
- dilation=dilation,
19
- bias=False),
20
- nn.BatchNorm2d(nout),
21
- activ()
22
- )
23
-
24
- def __call__(self, x):
25
- return self.conv(x)
26
-
27
-
28
- class SeperableConv2DBNActiv(nn.Module):
29
-
30
- def __init__(self, nin, nout, ksize=3, stride=1, pad=1, dilation=1, activ=nn.ReLU):
31
- super(SeperableConv2DBNActiv, self).__init__()
32
- self.conv = nn.Sequential(
33
- nn.Conv2d(
34
- nin, nin,
35
- kernel_size=ksize,
36
- stride=stride,
37
- padding=pad,
38
- dilation=dilation,
39
- groups=nin,
40
- bias=False),
41
- nn.Conv2d(
42
- nin, nout,
43
- kernel_size=1,
44
- bias=False),
45
- nn.BatchNorm2d(nout),
46
- activ()
47
- )
48
-
49
- def __call__(self, x):
50
- return self.conv(x)
51
-
52
-
53
- class Encoder(nn.Module):
54
-
55
- def __init__(self, nin, nout, ksize=3, stride=1, pad=1, activ=nn.LeakyReLU):
56
- super(Encoder, self).__init__()
57
- self.conv1 = Conv2DBNActiv(nin, nout, ksize, 1, pad, activ=activ)
58
- self.conv2 = Conv2DBNActiv(nout, nout, ksize, stride, pad, activ=activ)
59
-
60
- def __call__(self, x):
61
- skip = self.conv1(x)
62
- h = self.conv2(skip)
63
-
64
- return h, skip
65
-
66
-
67
- class Decoder(nn.Module):
68
-
69
- def __init__(self, nin, nout, ksize=3, stride=1, pad=1, activ=nn.ReLU, dropout=False):
70
- super(Decoder, self).__init__()
71
- self.conv = Conv2DBNActiv(nin, nout, ksize, 1, pad, activ=activ)
72
- self.dropout = nn.Dropout2d(0.1) if dropout else None
73
-
74
- def __call__(self, x, skip=None):
75
- x = F.interpolate(x, scale_factor=2, mode='bilinear', align_corners=True)
76
- if skip is not None:
77
- skip = spec_utils.crop_center(skip, x)
78
- x = torch.cat([x, skip], dim=1)
79
- h = self.conv(x)
80
-
81
- if self.dropout is not None:
82
- h = self.dropout(h)
83
-
84
- return h
85
-
86
-
87
- class ASPPModule(nn.Module):
88
-
89
- def __init__(self, nin, nout, dilations=(4, 8, 16), activ=nn.ReLU):
90
- super(ASPPModule, self).__init__()
91
- self.conv1 = nn.Sequential(
92
- nn.AdaptiveAvgPool2d((1, None)),
93
- Conv2DBNActiv(nin, nin, 1, 1, 0, activ=activ)
94
- )
95
- self.conv2 = Conv2DBNActiv(nin, nin, 1, 1, 0, activ=activ)
96
- self.conv3 = SeperableConv2DBNActiv(
97
- nin, nin, 3, 1, dilations[0], dilations[0], activ=activ)
98
- self.conv4 = SeperableConv2DBNActiv(
99
- nin, nin, 3, 1, dilations[1], dilations[1], activ=activ)
100
- self.conv5 = SeperableConv2DBNActiv(
101
- nin, nin, 3, 1, dilations[2], dilations[2], activ=activ)
102
- self.bottleneck = nn.Sequential(
103
- Conv2DBNActiv(nin * 5, nout, 1, 1, 0, activ=activ),
104
- nn.Dropout2d(0.1)
105
- )
106
-
107
- def forward(self, x):
108
- _, _, h, w = x.size()
109
- feat1 = F.interpolate(self.conv1(x), size=(h, w), mode='bilinear', align_corners=True)
110
- feat2 = self.conv2(x)
111
- feat3 = self.conv3(x)
112
- feat4 = self.conv4(x)
113
- feat5 = self.conv5(x)
114
- out = torch.cat((feat1, feat2, feat3, feat4, feat5), dim=1)
115
- bottle = self.bottleneck(out)
116
- return bottle
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
uvr5_pack/lib_v5/layers_123812KB .py DELETED
@@ -1,116 +0,0 @@
1
- import torch
2
- from torch import nn
3
- import torch.nn.functional as F
4
-
5
- from uvr5_pack.lib_v5 import spec_utils
6
-
7
-
8
- class Conv2DBNActiv(nn.Module):
9
-
10
- def __init__(self, nin, nout, ksize=3, stride=1, pad=1, dilation=1, activ=nn.ReLU):
11
- super(Conv2DBNActiv, self).__init__()
12
- self.conv = nn.Sequential(
13
- nn.Conv2d(
14
- nin, nout,
15
- kernel_size=ksize,
16
- stride=stride,
17
- padding=pad,
18
- dilation=dilation,
19
- bias=False),
20
- nn.BatchNorm2d(nout),
21
- activ()
22
- )
23
-
24
- def __call__(self, x):
25
- return self.conv(x)
26
-
27
-
28
- class SeperableConv2DBNActiv(nn.Module):
29
-
30
- def __init__(self, nin, nout, ksize=3, stride=1, pad=1, dilation=1, activ=nn.ReLU):
31
- super(SeperableConv2DBNActiv, self).__init__()
32
- self.conv = nn.Sequential(
33
- nn.Conv2d(
34
- nin, nin,
35
- kernel_size=ksize,
36
- stride=stride,
37
- padding=pad,
38
- dilation=dilation,
39
- groups=nin,
40
- bias=False),
41
- nn.Conv2d(
42
- nin, nout,
43
- kernel_size=1,
44
- bias=False),
45
- nn.BatchNorm2d(nout),
46
- activ()
47
- )
48
-
49
- def __call__(self, x):
50
- return self.conv(x)
51
-
52
-
53
- class Encoder(nn.Module):
54
-
55
- def __init__(self, nin, nout, ksize=3, stride=1, pad=1, activ=nn.LeakyReLU):
56
- super(Encoder, self).__init__()
57
- self.conv1 = Conv2DBNActiv(nin, nout, ksize, 1, pad, activ=activ)
58
- self.conv2 = Conv2DBNActiv(nout, nout, ksize, stride, pad, activ=activ)
59
-
60
- def __call__(self, x):
61
- skip = self.conv1(x)
62
- h = self.conv2(skip)
63
-
64
- return h, skip
65
-
66
-
67
- class Decoder(nn.Module):
68
-
69
- def __init__(self, nin, nout, ksize=3, stride=1, pad=1, activ=nn.ReLU, dropout=False):
70
- super(Decoder, self).__init__()
71
- self.conv = Conv2DBNActiv(nin, nout, ksize, 1, pad, activ=activ)
72
- self.dropout = nn.Dropout2d(0.1) if dropout else None
73
-
74
- def __call__(self, x, skip=None):
75
- x = F.interpolate(x, scale_factor=2, mode='bilinear', align_corners=True)
76
- if skip is not None:
77
- skip = spec_utils.crop_center(skip, x)
78
- x = torch.cat([x, skip], dim=1)
79
- h = self.conv(x)
80
-
81
- if self.dropout is not None:
82
- h = self.dropout(h)
83
-
84
- return h
85
-
86
-
87
- class ASPPModule(nn.Module):
88
-
89
- def __init__(self, nin, nout, dilations=(4, 8, 16), activ=nn.ReLU):
90
- super(ASPPModule, self).__init__()
91
- self.conv1 = nn.Sequential(
92
- nn.AdaptiveAvgPool2d((1, None)),
93
- Conv2DBNActiv(nin, nin, 1, 1, 0, activ=activ)
94
- )
95
- self.conv2 = Conv2DBNActiv(nin, nin, 1, 1, 0, activ=activ)
96
- self.conv3 = SeperableConv2DBNActiv(
97
- nin, nin, 3, 1, dilations[0], dilations[0], activ=activ)
98
- self.conv4 = SeperableConv2DBNActiv(
99
- nin, nin, 3, 1, dilations[1], dilations[1], activ=activ)
100
- self.conv5 = SeperableConv2DBNActiv(
101
- nin, nin, 3, 1, dilations[2], dilations[2], activ=activ)
102
- self.bottleneck = nn.Sequential(
103
- Conv2DBNActiv(nin * 5, nout, 1, 1, 0, activ=activ),
104
- nn.Dropout2d(0.1)
105
- )
106
-
107
- def forward(self, x):
108
- _, _, h, w = x.size()
109
- feat1 = F.interpolate(self.conv1(x), size=(h, w), mode='bilinear', align_corners=True)
110
- feat2 = self.conv2(x)
111
- feat3 = self.conv3(x)
112
- feat4 = self.conv4(x)
113
- feat5 = self.conv5(x)
114
- out = torch.cat((feat1, feat2, feat3, feat4, feat5), dim=1)
115
- bottle = self.bottleneck(out)
116
- return bottle
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
uvr5_pack/lib_v5/layers_123821KB.py DELETED
@@ -1,116 +0,0 @@
1
- import torch
2
- from torch import nn
3
- import torch.nn.functional as F
4
-
5
- from uvr5_pack.lib_v5 import spec_utils
6
-
7
-
8
- class Conv2DBNActiv(nn.Module):
9
-
10
- def __init__(self, nin, nout, ksize=3, stride=1, pad=1, dilation=1, activ=nn.ReLU):
11
- super(Conv2DBNActiv, self).__init__()
12
- self.conv = nn.Sequential(
13
- nn.Conv2d(
14
- nin, nout,
15
- kernel_size=ksize,
16
- stride=stride,
17
- padding=pad,
18
- dilation=dilation,
19
- bias=False),
20
- nn.BatchNorm2d(nout),
21
- activ()
22
- )
23
-
24
- def __call__(self, x):
25
- return self.conv(x)
26
-
27
-
28
- class SeperableConv2DBNActiv(nn.Module):
29
-
30
- def __init__(self, nin, nout, ksize=3, stride=1, pad=1, dilation=1, activ=nn.ReLU):
31
- super(SeperableConv2DBNActiv, self).__init__()
32
- self.conv = nn.Sequential(
33
- nn.Conv2d(
34
- nin, nin,
35
- kernel_size=ksize,
36
- stride=stride,
37
- padding=pad,
38
- dilation=dilation,
39
- groups=nin,
40
- bias=False),
41
- nn.Conv2d(
42
- nin, nout,
43
- kernel_size=1,
44
- bias=False),
45
- nn.BatchNorm2d(nout),
46
- activ()
47
- )
48
-
49
- def __call__(self, x):
50
- return self.conv(x)
51
-
52
-
53
- class Encoder(nn.Module):
54
-
55
- def __init__(self, nin, nout, ksize=3, stride=1, pad=1, activ=nn.LeakyReLU):
56
- super(Encoder, self).__init__()
57
- self.conv1 = Conv2DBNActiv(nin, nout, ksize, 1, pad, activ=activ)
58
- self.conv2 = Conv2DBNActiv(nout, nout, ksize, stride, pad, activ=activ)
59
-
60
- def __call__(self, x):
61
- skip = self.conv1(x)
62
- h = self.conv2(skip)
63
-
64
- return h, skip
65
-
66
-
67
- class Decoder(nn.Module):
68
-
69
- def __init__(self, nin, nout, ksize=3, stride=1, pad=1, activ=nn.ReLU, dropout=False):
70
- super(Decoder, self).__init__()
71
- self.conv = Conv2DBNActiv(nin, nout, ksize, 1, pad, activ=activ)
72
- self.dropout = nn.Dropout2d(0.1) if dropout else None
73
-
74
- def __call__(self, x, skip=None):
75
- x = F.interpolate(x, scale_factor=2, mode='bilinear', align_corners=True)
76
- if skip is not None:
77
- skip = spec_utils.crop_center(skip, x)
78
- x = torch.cat([x, skip], dim=1)
79
- h = self.conv(x)
80
-
81
- if self.dropout is not None:
82
- h = self.dropout(h)
83
-
84
- return h
85
-
86
-
87
- class ASPPModule(nn.Module):
88
-
89
- def __init__(self, nin, nout, dilations=(4, 8, 16), activ=nn.ReLU):
90
- super(ASPPModule, self).__init__()
91
- self.conv1 = nn.Sequential(
92
- nn.AdaptiveAvgPool2d((1, None)),
93
- Conv2DBNActiv(nin, nin, 1, 1, 0, activ=activ)
94
- )
95
- self.conv2 = Conv2DBNActiv(nin, nin, 1, 1, 0, activ=activ)
96
- self.conv3 = SeperableConv2DBNActiv(
97
- nin, nin, 3, 1, dilations[0], dilations[0], activ=activ)
98
- self.conv4 = SeperableConv2DBNActiv(
99
- nin, nin, 3, 1, dilations[1], dilations[1], activ=activ)
100
- self.conv5 = SeperableConv2DBNActiv(
101
- nin, nin, 3, 1, dilations[2], dilations[2], activ=activ)
102
- self.bottleneck = nn.Sequential(
103
- Conv2DBNActiv(nin * 5, nout, 1, 1, 0, activ=activ),
104
- nn.Dropout2d(0.1)
105
- )
106
-
107
- def forward(self, x):
108
- _, _, h, w = x.size()
109
- feat1 = F.interpolate(self.conv1(x), size=(h, w), mode='bilinear', align_corners=True)
110
- feat2 = self.conv2(x)
111
- feat3 = self.conv3(x)
112
- feat4 = self.conv4(x)
113
- feat5 = self.conv5(x)
114
- out = torch.cat((feat1, feat2, feat3, feat4, feat5), dim=1)
115
- bottle = self.bottleneck(out)
116
- return bottle
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
uvr5_pack/lib_v5/layers_33966KB.py DELETED
@@ -1,122 +0,0 @@
1
- import torch
2
- from torch import nn
3
- import torch.nn.functional as F
4
-
5
- from uvr5_pack.lib_v5 import spec_utils
6
-
7
-
8
- class Conv2DBNActiv(nn.Module):
9
-
10
- def __init__(self, nin, nout, ksize=3, stride=1, pad=1, dilation=1, activ=nn.ReLU):
11
- super(Conv2DBNActiv, self).__init__()
12
- self.conv = nn.Sequential(
13
- nn.Conv2d(
14
- nin, nout,
15
- kernel_size=ksize,
16
- stride=stride,
17
- padding=pad,
18
- dilation=dilation,
19
- bias=False),
20
- nn.BatchNorm2d(nout),
21
- activ()
22
- )
23
-
24
- def __call__(self, x):
25
- return self.conv(x)
26
-
27
-
28
- class SeperableConv2DBNActiv(nn.Module):
29
-
30
- def __init__(self, nin, nout, ksize=3, stride=1, pad=1, dilation=1, activ=nn.ReLU):
31
- super(SeperableConv2DBNActiv, self).__init__()
32
- self.conv = nn.Sequential(
33
- nn.Conv2d(
34
- nin, nin,
35
- kernel_size=ksize,
36
- stride=stride,
37
- padding=pad,
38
- dilation=dilation,
39
- groups=nin,
40
- bias=False),
41
- nn.Conv2d(
42
- nin, nout,
43
- kernel_size=1,
44
- bias=False),
45
- nn.BatchNorm2d(nout),
46
- activ()
47
- )
48
-
49
- def __call__(self, x):
50
- return self.conv(x)
51
-
52
-
53
- class Encoder(nn.Module):
54
-
55
- def __init__(self, nin, nout, ksize=3, stride=1, pad=1, activ=nn.LeakyReLU):
56
- super(Encoder, self).__init__()
57
- self.conv1 = Conv2DBNActiv(nin, nout, ksize, 1, pad, activ=activ)
58
- self.conv2 = Conv2DBNActiv(nout, nout, ksize, stride, pad, activ=activ)
59
-
60
- def __call__(self, x):
61
- skip = self.conv1(x)
62
- h = self.conv2(skip)
63
-
64
- return h, skip
65
-
66
-
67
- class Decoder(nn.Module):
68
-
69
- def __init__(self, nin, nout, ksize=3, stride=1, pad=1, activ=nn.ReLU, dropout=False):
70
- super(Decoder, self).__init__()
71
- self.conv = Conv2DBNActiv(nin, nout, ksize, 1, pad, activ=activ)
72
- self.dropout = nn.Dropout2d(0.1) if dropout else None
73
-
74
- def __call__(self, x, skip=None):
75
- x = F.interpolate(x, scale_factor=2, mode='bilinear', align_corners=True)
76
- if skip is not None:
77
- skip = spec_utils.crop_center(skip, x)
78
- x = torch.cat([x, skip], dim=1)
79
- h = self.conv(x)
80
-
81
- if self.dropout is not None:
82
- h = self.dropout(h)
83
-
84
- return h
85
-
86
-
87
- class ASPPModule(nn.Module):
88
-
89
- def __init__(self, nin, nout, dilations=(4, 8, 16, 32, 64), activ=nn.ReLU):
90
- super(ASPPModule, self).__init__()
91
- self.conv1 = nn.Sequential(
92
- nn.AdaptiveAvgPool2d((1, None)),
93
- Conv2DBNActiv(nin, nin, 1, 1, 0, activ=activ)
94
- )
95
- self.conv2 = Conv2DBNActiv(nin, nin, 1, 1, 0, activ=activ)
96
- self.conv3 = SeperableConv2DBNActiv(
97
- nin, nin, 3, 1, dilations[0], dilations[0], activ=activ)
98
- self.conv4 = SeperableConv2DBNActiv(
99
- nin, nin, 3, 1, dilations[1], dilations[1], activ=activ)
100
- self.conv5 = SeperableConv2DBNActiv(
101
- nin, nin, 3, 1, dilations[2], dilations[2], activ=activ)
102
- self.conv6 = SeperableConv2DBNActiv(
103
- nin, nin, 3, 1, dilations[2], dilations[2], activ=activ)
104
- self.conv7 = SeperableConv2DBNActiv(
105
- nin, nin, 3, 1, dilations[2], dilations[2], activ=activ)
106
- self.bottleneck = nn.Sequential(
107
- Conv2DBNActiv(nin * 7, nout, 1, 1, 0, activ=activ),
108
- nn.Dropout2d(0.1)
109
- )
110
-
111
- def forward(self, x):
112
- _, _, h, w = x.size()
113
- feat1 = F.interpolate(self.conv1(x), size=(h, w), mode='bilinear', align_corners=True)
114
- feat2 = self.conv2(x)
115
- feat3 = self.conv3(x)
116
- feat4 = self.conv4(x)
117
- feat5 = self.conv5(x)
118
- feat6 = self.conv6(x)
119
- feat7 = self.conv7(x)
120
- out = torch.cat((feat1, feat2, feat3, feat4, feat5, feat6, feat7), dim=1)
121
- bottle = self.bottleneck(out)
122
- return bottle
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
uvr5_pack/lib_v5/layers_537227KB.py DELETED
@@ -1,122 +0,0 @@
1
- import torch
2
- from torch import nn
3
- import torch.nn.functional as F
4
-
5
- from uvr5_pack.lib_v5 import spec_utils
6
-
7
-
8
- class Conv2DBNActiv(nn.Module):
9
-
10
- def __init__(self, nin, nout, ksize=3, stride=1, pad=1, dilation=1, activ=nn.ReLU):
11
- super(Conv2DBNActiv, self).__init__()
12
- self.conv = nn.Sequential(
13
- nn.Conv2d(
14
- nin, nout,
15
- kernel_size=ksize,
16
- stride=stride,
17
- padding=pad,
18
- dilation=dilation,
19
- bias=False),
20
- nn.BatchNorm2d(nout),
21
- activ()
22
- )
23
-
24
- def __call__(self, x):
25
- return self.conv(x)
26
-
27
-
28
- class SeperableConv2DBNActiv(nn.Module):
29
-
30
- def __init__(self, nin, nout, ksize=3, stride=1, pad=1, dilation=1, activ=nn.ReLU):
31
- super(SeperableConv2DBNActiv, self).__init__()
32
- self.conv = nn.Sequential(
33
- nn.Conv2d(
34
- nin, nin,
35
- kernel_size=ksize,
36
- stride=stride,
37
- padding=pad,
38
- dilation=dilation,
39
- groups=nin,
40
- bias=False),
41
- nn.Conv2d(
42
- nin, nout,
43
- kernel_size=1,
44
- bias=False),
45
- nn.BatchNorm2d(nout),
46
- activ()
47
- )
48
-
49
- def __call__(self, x):
50
- return self.conv(x)
51
-
52
-
53
- class Encoder(nn.Module):
54
-
55
- def __init__(self, nin, nout, ksize=3, stride=1, pad=1, activ=nn.LeakyReLU):
56
- super(Encoder, self).__init__()
57
- self.conv1 = Conv2DBNActiv(nin, nout, ksize, 1, pad, activ=activ)
58
- self.conv2 = Conv2DBNActiv(nout, nout, ksize, stride, pad, activ=activ)
59
-
60
- def __call__(self, x):
61
- skip = self.conv1(x)
62
- h = self.conv2(skip)
63
-
64
- return h, skip
65
-
66
-
67
- class Decoder(nn.Module):
68
-
69
- def __init__(self, nin, nout, ksize=3, stride=1, pad=1, activ=nn.ReLU, dropout=False):
70
- super(Decoder, self).__init__()
71
- self.conv = Conv2DBNActiv(nin, nout, ksize, 1, pad, activ=activ)
72
- self.dropout = nn.Dropout2d(0.1) if dropout else None
73
-
74
- def __call__(self, x, skip=None):
75
- x = F.interpolate(x, scale_factor=2, mode='bilinear', align_corners=True)
76
- if skip is not None:
77
- skip = spec_utils.crop_center(skip, x)
78
- x = torch.cat([x, skip], dim=1)
79
- h = self.conv(x)
80
-
81
- if self.dropout is not None:
82
- h = self.dropout(h)
83
-
84
- return h
85
-
86
-
87
- class ASPPModule(nn.Module):
88
-
89
- def __init__(self, nin, nout, dilations=(4, 8, 16, 32, 64), activ=nn.ReLU):
90
- super(ASPPModule, self).__init__()
91
- self.conv1 = nn.Sequential(
92
- nn.AdaptiveAvgPool2d((1, None)),
93
- Conv2DBNActiv(nin, nin, 1, 1, 0, activ=activ)
94
- )
95
- self.conv2 = Conv2DBNActiv(nin, nin, 1, 1, 0, activ=activ)
96
- self.conv3 = SeperableConv2DBNActiv(
97
- nin, nin, 3, 1, dilations[0], dilations[0], activ=activ)
98
- self.conv4 = SeperableConv2DBNActiv(
99
- nin, nin, 3, 1, dilations[1], dilations[1], activ=activ)
100
- self.conv5 = SeperableConv2DBNActiv(
101
- nin, nin, 3, 1, dilations[2], dilations[2], activ=activ)
102
- self.conv6 = SeperableConv2DBNActiv(
103
- nin, nin, 3, 1, dilations[2], dilations[2], activ=activ)
104
- self.conv7 = SeperableConv2DBNActiv(
105
- nin, nin, 3, 1, dilations[2], dilations[2], activ=activ)
106
- self.bottleneck = nn.Sequential(
107
- Conv2DBNActiv(nin * 7, nout, 1, 1, 0, activ=activ),
108
- nn.Dropout2d(0.1)
109
- )
110
-
111
- def forward(self, x):
112
- _, _, h, w = x.size()
113
- feat1 = F.interpolate(self.conv1(x), size=(h, w), mode='bilinear', align_corners=True)
114
- feat2 = self.conv2(x)
115
- feat3 = self.conv3(x)
116
- feat4 = self.conv4(x)
117
- feat5 = self.conv5(x)
118
- feat6 = self.conv6(x)
119
- feat7 = self.conv7(x)
120
- out = torch.cat((feat1, feat2, feat3, feat4, feat5, feat6, feat7), dim=1)
121
- bottle = self.bottleneck(out)
122
- return bottle
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
uvr5_pack/lib_v5/layers_537238KB.py DELETED
@@ -1,122 +0,0 @@
1
- import torch
2
- from torch import nn
3
- import torch.nn.functional as F
4
-
5
- from uvr5_pack.lib_v5 import spec_utils
6
-
7
-
8
- class Conv2DBNActiv(nn.Module):
9
-
10
- def __init__(self, nin, nout, ksize=3, stride=1, pad=1, dilation=1, activ=nn.ReLU):
11
- super(Conv2DBNActiv, self).__init__()
12
- self.conv = nn.Sequential(
13
- nn.Conv2d(
14
- nin, nout,
15
- kernel_size=ksize,
16
- stride=stride,
17
- padding=pad,
18
- dilation=dilation,
19
- bias=False),
20
- nn.BatchNorm2d(nout),
21
- activ()
22
- )
23
-
24
- def __call__(self, x):
25
- return self.conv(x)
26
-
27
-
28
- class SeperableConv2DBNActiv(nn.Module):
29
-
30
- def __init__(self, nin, nout, ksize=3, stride=1, pad=1, dilation=1, activ=nn.ReLU):
31
- super(SeperableConv2DBNActiv, self).__init__()
32
- self.conv = nn.Sequential(
33
- nn.Conv2d(
34
- nin, nin,
35
- kernel_size=ksize,
36
- stride=stride,
37
- padding=pad,
38
- dilation=dilation,
39
- groups=nin,
40
- bias=False),
41
- nn.Conv2d(
42
- nin, nout,
43
- kernel_size=1,
44
- bias=False),
45
- nn.BatchNorm2d(nout),
46
- activ()
47
- )
48
-
49
- def __call__(self, x):
50
- return self.conv(x)
51
-
52
-
53
- class Encoder(nn.Module):
54
-
55
- def __init__(self, nin, nout, ksize=3, stride=1, pad=1, activ=nn.LeakyReLU):
56
- super(Encoder, self).__init__()
57
- self.conv1 = Conv2DBNActiv(nin, nout, ksize, 1, pad, activ=activ)
58
- self.conv2 = Conv2DBNActiv(nout, nout, ksize, stride, pad, activ=activ)
59
-
60
- def __call__(self, x):
61
- skip = self.conv1(x)
62
- h = self.conv2(skip)
63
-
64
- return h, skip
65
-
66
-
67
- class Decoder(nn.Module):
68
-
69
- def __init__(self, nin, nout, ksize=3, stride=1, pad=1, activ=nn.ReLU, dropout=False):
70
- super(Decoder, self).__init__()
71
- self.conv = Conv2DBNActiv(nin, nout, ksize, 1, pad, activ=activ)
72
- self.dropout = nn.Dropout2d(0.1) if dropout else None
73
-
74
- def __call__(self, x, skip=None):
75
- x = F.interpolate(x, scale_factor=2, mode='bilinear', align_corners=True)
76
- if skip is not None:
77
- skip = spec_utils.crop_center(skip, x)
78
- x = torch.cat([x, skip], dim=1)
79
- h = self.conv(x)
80
-
81
- if self.dropout is not None:
82
- h = self.dropout(h)
83
-
84
- return h
85
-
86
-
87
- class ASPPModule(nn.Module):
88
-
89
- def __init__(self, nin, nout, dilations=(4, 8, 16, 32, 64), activ=nn.ReLU):
90
- super(ASPPModule, self).__init__()
91
- self.conv1 = nn.Sequential(
92
- nn.AdaptiveAvgPool2d((1, None)),
93
- Conv2DBNActiv(nin, nin, 1, 1, 0, activ=activ)
94
- )
95
- self.conv2 = Conv2DBNActiv(nin, nin, 1, 1, 0, activ=activ)
96
- self.conv3 = SeperableConv2DBNActiv(
97
- nin, nin, 3, 1, dilations[0], dilations[0], activ=activ)
98
- self.conv4 = SeperableConv2DBNActiv(
99
- nin, nin, 3, 1, dilations[1], dilations[1], activ=activ)
100
- self.conv5 = SeperableConv2DBNActiv(
101
- nin, nin, 3, 1, dilations[2], dilations[2], activ=activ)
102
- self.conv6 = SeperableConv2DBNActiv(
103
- nin, nin, 3, 1, dilations[2], dilations[2], activ=activ)
104
- self.conv7 = SeperableConv2DBNActiv(
105
- nin, nin, 3, 1, dilations[2], dilations[2], activ=activ)
106
- self.bottleneck = nn.Sequential(
107
- Conv2DBNActiv(nin * 7, nout, 1, 1, 0, activ=activ),
108
- nn.Dropout2d(0.1)
109
- )
110
-
111
- def forward(self, x):
112
- _, _, h, w = x.size()
113
- feat1 = F.interpolate(self.conv1(x), size=(h, w), mode='bilinear', align_corners=True)
114
- feat2 = self.conv2(x)
115
- feat3 = self.conv3(x)
116
- feat4 = self.conv4(x)
117
- feat5 = self.conv5(x)
118
- feat6 = self.conv6(x)
119
- feat7 = self.conv7(x)
120
- out = torch.cat((feat1, feat2, feat3, feat4, feat5, feat6, feat7), dim=1)
121
- bottle = self.bottleneck(out)
122
- return bottle
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
uvr5_pack/lib_v5/model_param_init.py DELETED
@@ -1,60 +0,0 @@
1
- import json
2
- import os
3
- import pathlib
4
-
5
- default_param = {}
6
- default_param['bins'] = 768
7
- default_param['unstable_bins'] = 9 # training only
8
- default_param['reduction_bins'] = 762 # training only
9
- default_param['sr'] = 44100
10
- default_param['pre_filter_start'] = 757
11
- default_param['pre_filter_stop'] = 768
12
- default_param['band'] = {}
13
-
14
-
15
- default_param['band'][1] = {
16
- 'sr': 11025,
17
- 'hl': 128,
18
- 'n_fft': 960,
19
- 'crop_start': 0,
20
- 'crop_stop': 245,
21
- 'lpf_start': 61, # inference only
22
- 'res_type': 'polyphase'
23
- }
24
-
25
- default_param['band'][2] = {
26
- 'sr': 44100,
27
- 'hl': 512,
28
- 'n_fft': 1536,
29
- 'crop_start': 24,
30
- 'crop_stop': 547,
31
- 'hpf_start': 81, # inference only
32
- 'res_type': 'sinc_best'
33
- }
34
-
35
-
36
- def int_keys(d):
37
- r = {}
38
- for k, v in d:
39
- if k.isdigit():
40
- k = int(k)
41
- r[k] = v
42
- return r
43
-
44
-
45
- class ModelParameters(object):
46
- def __init__(self, config_path=''):
47
- if '.pth' == pathlib.Path(config_path).suffix:
48
- import zipfile
49
-
50
- with zipfile.ZipFile(config_path, 'r') as zip:
51
- self.param = json.loads(zip.read('param.json'), object_pairs_hook=int_keys)
52
- elif '.json' == pathlib.Path(config_path).suffix:
53
- with open(config_path, 'r') as f:
54
- self.param = json.loads(f.read(), object_pairs_hook=int_keys)
55
- else:
56
- self.param = default_param
57
-
58
- for k in ['mid_side', 'mid_side_b', 'mid_side_b2', 'stereo_w', 'stereo_n', 'reverse']:
59
- if not k in self.param:
60
- self.param[k] = False
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
uvr5_pack/lib_v5/modelparams/1band_sr16000_hl512.json DELETED
@@ -1,19 +0,0 @@
1
- {
2
- "bins": 1024,
3
- "unstable_bins": 0,
4
- "reduction_bins": 0,
5
- "band": {
6
- "1": {
7
- "sr": 16000,
8
- "hl": 512,
9
- "n_fft": 2048,
10
- "crop_start": 0,
11
- "crop_stop": 1024,
12
- "hpf_start": -1,
13
- "res_type": "sinc_best"
14
- }
15
- },
16
- "sr": 16000,
17
- "pre_filter_start": 1023,
18
- "pre_filter_stop": 1024
19
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
uvr5_pack/lib_v5/modelparams/1band_sr32000_hl512.json DELETED
@@ -1,19 +0,0 @@
1
- {
2
- "bins": 1024,
3
- "unstable_bins": 0,
4
- "reduction_bins": 0,
5
- "band": {
6
- "1": {
7
- "sr": 32000,
8
- "hl": 512,
9
- "n_fft": 2048,
10
- "crop_start": 0,
11
- "crop_stop": 1024,
12
- "hpf_start": -1,
13
- "res_type": "kaiser_fast"
14
- }
15
- },
16
- "sr": 32000,
17
- "pre_filter_start": 1000,
18
- "pre_filter_stop": 1021
19
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
uvr5_pack/lib_v5/modelparams/1band_sr33075_hl384.json DELETED
@@ -1,19 +0,0 @@
1
- {
2
- "bins": 1024,
3
- "unstable_bins": 0,
4
- "reduction_bins": 0,
5
- "band": {
6
- "1": {
7
- "sr": 33075,
8
- "hl": 384,
9
- "n_fft": 2048,
10
- "crop_start": 0,
11
- "crop_stop": 1024,
12
- "hpf_start": -1,
13
- "res_type": "sinc_best"
14
- }
15
- },
16
- "sr": 33075,
17
- "pre_filter_start": 1000,
18
- "pre_filter_stop": 1021
19
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
uvr5_pack/lib_v5/modelparams/1band_sr44100_hl1024.json DELETED
@@ -1,19 +0,0 @@
1
- {
2
- "bins": 1024,
3
- "unstable_bins": 0,
4
- "reduction_bins": 0,
5
- "band": {
6
- "1": {
7
- "sr": 44100,
8
- "hl": 1024,
9
- "n_fft": 2048,
10
- "crop_start": 0,
11
- "crop_stop": 1024,
12
- "hpf_start": -1,
13
- "res_type": "sinc_best"
14
- }
15
- },
16
- "sr": 44100,
17
- "pre_filter_start": 1023,
18
- "pre_filter_stop": 1024
19
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
uvr5_pack/lib_v5/modelparams/1band_sr44100_hl256.json DELETED
@@ -1,19 +0,0 @@
1
- {
2
- "bins": 256,
3
- "unstable_bins": 0,
4
- "reduction_bins": 0,
5
- "band": {
6
- "1": {
7
- "sr": 44100,
8
- "hl": 256,
9
- "n_fft": 512,
10
- "crop_start": 0,
11
- "crop_stop": 256,
12
- "hpf_start": -1,
13
- "res_type": "sinc_best"
14
- }
15
- },
16
- "sr": 44100,
17
- "pre_filter_start": 256,
18
- "pre_filter_stop": 256
19
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
uvr5_pack/lib_v5/modelparams/1band_sr44100_hl512.json DELETED
@@ -1,19 +0,0 @@
1
- {
2
- "bins": 1024,
3
- "unstable_bins": 0,
4
- "reduction_bins": 0,
5
- "band": {
6
- "1": {
7
- "sr": 44100,
8
- "hl": 512,
9
- "n_fft": 2048,
10
- "crop_start": 0,
11
- "crop_stop": 1024,
12
- "hpf_start": -1,
13
- "res_type": "sinc_best"
14
- }
15
- },
16
- "sr": 44100,
17
- "pre_filter_start": 1023,
18
- "pre_filter_stop": 1024
19
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
uvr5_pack/lib_v5/modelparams/1band_sr44100_hl512_cut.json DELETED
@@ -1,19 +0,0 @@
1
- {
2
- "bins": 1024,
3
- "unstable_bins": 0,
4
- "reduction_bins": 0,
5
- "band": {
6
- "1": {
7
- "sr": 44100,
8
- "hl": 512,
9
- "n_fft": 2048,
10
- "crop_start": 0,
11
- "crop_stop": 700,
12
- "hpf_start": -1,
13
- "res_type": "sinc_best"
14
- }
15
- },
16
- "sr": 44100,
17
- "pre_filter_start": 1023,
18
- "pre_filter_stop": 700
19
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
uvr5_pack/lib_v5/modelparams/2band_32000.json DELETED
@@ -1,30 +0,0 @@
1
- {
2
- "bins": 768,
3
- "unstable_bins": 7,
4
- "reduction_bins": 705,
5
- "band": {
6
- "1": {
7
- "sr": 6000,
8
- "hl": 66,
9
- "n_fft": 512,
10
- "crop_start": 0,
11
- "crop_stop": 240,
12
- "lpf_start": 60,
13
- "lpf_stop": 118,
14
- "res_type": "sinc_fastest"
15
- },
16
- "2": {
17
- "sr": 32000,
18
- "hl": 352,
19
- "n_fft": 1024,
20
- "crop_start": 22,
21
- "crop_stop": 505,
22
- "hpf_start": 44,
23
- "hpf_stop": 23,
24
- "res_type": "sinc_medium"
25
- }
26
- },
27
- "sr": 32000,
28
- "pre_filter_start": 710,
29
- "pre_filter_stop": 731
30
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
uvr5_pack/lib_v5/modelparams/2band_44100_lofi.json DELETED
@@ -1,30 +0,0 @@
1
- {
2
- "bins": 512,
3
- "unstable_bins": 7,
4
- "reduction_bins": 510,
5
- "band": {
6
- "1": {
7
- "sr": 11025,
8
- "hl": 160,
9
- "n_fft": 768,
10
- "crop_start": 0,
11
- "crop_stop": 192,
12
- "lpf_start": 41,
13
- "lpf_stop": 139,
14
- "res_type": "sinc_fastest"
15
- },
16
- "2": {
17
- "sr": 44100,
18
- "hl": 640,
19
- "n_fft": 1024,
20
- "crop_start": 10,
21
- "crop_stop": 320,
22
- "hpf_start": 47,
23
- "hpf_stop": 15,
24
- "res_type": "sinc_medium"
25
- }
26
- },
27
- "sr": 44100,
28
- "pre_filter_start": 510,
29
- "pre_filter_stop": 512
30
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
uvr5_pack/lib_v5/modelparams/2band_48000.json DELETED
@@ -1,30 +0,0 @@
1
- {
2
- "bins": 768,
3
- "unstable_bins": 7,
4
- "reduction_bins": 705,
5
- "band": {
6
- "1": {
7
- "sr": 6000,
8
- "hl": 66,
9
- "n_fft": 512,
10
- "crop_start": 0,
11
- "crop_stop": 240,
12
- "lpf_start": 60,
13
- "lpf_stop": 240,
14
- "res_type": "sinc_fastest"
15
- },
16
- "2": {
17
- "sr": 48000,
18
- "hl": 528,
19
- "n_fft": 1536,
20
- "crop_start": 22,
21
- "crop_stop": 505,
22
- "hpf_start": 82,
23
- "hpf_stop": 22,
24
- "res_type": "sinc_medium"
25
- }
26
- },
27
- "sr": 48000,
28
- "pre_filter_start": 710,
29
- "pre_filter_stop": 731
30
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
uvr5_pack/lib_v5/modelparams/3band_44100.json DELETED
@@ -1,42 +0,0 @@
1
- {
2
- "bins": 768,
3
- "unstable_bins": 5,
4
- "reduction_bins": 733,
5
- "band": {
6
- "1": {
7
- "sr": 11025,
8
- "hl": 128,
9
- "n_fft": 768,
10
- "crop_start": 0,
11
- "crop_stop": 278,
12
- "lpf_start": 28,
13
- "lpf_stop": 140,
14
- "res_type": "polyphase"
15
- },
16
- "2": {
17
- "sr": 22050,
18
- "hl": 256,
19
- "n_fft": 768,
20
- "crop_start": 14,
21
- "crop_stop": 322,
22
- "hpf_start": 70,
23
- "hpf_stop": 14,
24
- "lpf_start": 283,
25
- "lpf_stop": 314,
26
- "res_type": "polyphase"
27
- },
28
- "3": {
29
- "sr": 44100,
30
- "hl": 512,
31
- "n_fft": 768,
32
- "crop_start": 131,
33
- "crop_stop": 313,
34
- "hpf_start": 154,
35
- "hpf_stop": 141,
36
- "res_type": "sinc_medium"
37
- }
38
- },
39
- "sr": 44100,
40
- "pre_filter_start": 757,
41
- "pre_filter_stop": 768
42
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
uvr5_pack/lib_v5/modelparams/3band_44100_mid.json DELETED
@@ -1,43 +0,0 @@
1
- {
2
- "mid_side": true,
3
- "bins": 768,
4
- "unstable_bins": 5,
5
- "reduction_bins": 733,
6
- "band": {
7
- "1": {
8
- "sr": 11025,
9
- "hl": 128,
10
- "n_fft": 768,
11
- "crop_start": 0,
12
- "crop_stop": 278,
13
- "lpf_start": 28,
14
- "lpf_stop": 140,
15
- "res_type": "polyphase"
16
- },
17
- "2": {
18
- "sr": 22050,
19
- "hl": 256,
20
- "n_fft": 768,
21
- "crop_start": 14,
22
- "crop_stop": 322,
23
- "hpf_start": 70,
24
- "hpf_stop": 14,
25
- "lpf_start": 283,
26
- "lpf_stop": 314,
27
- "res_type": "polyphase"
28
- },
29
- "3": {
30
- "sr": 44100,
31
- "hl": 512,
32
- "n_fft": 768,
33
- "crop_start": 131,
34
- "crop_stop": 313,
35
- "hpf_start": 154,
36
- "hpf_stop": 141,
37
- "res_type": "sinc_medium"
38
- }
39
- },
40
- "sr": 44100,
41
- "pre_filter_start": 757,
42
- "pre_filter_stop": 768
43
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
uvr5_pack/lib_v5/modelparams/3band_44100_msb2.json DELETED
@@ -1,43 +0,0 @@
1
- {
2
- "mid_side_b2": true,
3
- "bins": 640,
4
- "unstable_bins": 7,
5
- "reduction_bins": 565,
6
- "band": {
7
- "1": {
8
- "sr": 11025,
9
- "hl": 108,
10
- "n_fft": 1024,
11
- "crop_start": 0,
12
- "crop_stop": 187,
13
- "lpf_start": 92,
14
- "lpf_stop": 186,
15
- "res_type": "polyphase"
16
- },
17
- "2": {
18
- "sr": 22050,
19
- "hl": 216,
20
- "n_fft": 768,
21
- "crop_start": 0,
22
- "crop_stop": 212,
23
- "hpf_start": 68,
24
- "hpf_stop": 34,
25
- "lpf_start": 174,
26
- "lpf_stop": 209,
27
- "res_type": "polyphase"
28
- },
29
- "3": {
30
- "sr": 44100,
31
- "hl": 432,
32
- "n_fft": 640,
33
- "crop_start": 66,
34
- "crop_stop": 307,
35
- "hpf_start": 86,
36
- "hpf_stop": 72,
37
- "res_type": "kaiser_fast"
38
- }
39
- },
40
- "sr": 44100,
41
- "pre_filter_start": 639,
42
- "pre_filter_stop": 640
43
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
uvr5_pack/lib_v5/modelparams/4band_44100.json DELETED
@@ -1,54 +0,0 @@
1
- {
2
- "bins": 768,
3
- "unstable_bins": 7,
4
- "reduction_bins": 668,
5
- "band": {
6
- "1": {
7
- "sr": 11025,
8
- "hl": 128,
9
- "n_fft": 1024,
10
- "crop_start": 0,
11
- "crop_stop": 186,
12
- "lpf_start": 37,
13
- "lpf_stop": 73,
14
- "res_type": "polyphase"
15
- },
16
- "2": {
17
- "sr": 11025,
18
- "hl": 128,
19
- "n_fft": 512,
20
- "crop_start": 4,
21
- "crop_stop": 185,
22
- "hpf_start": 36,
23
- "hpf_stop": 18,
24
- "lpf_start": 93,
25
- "lpf_stop": 185,
26
- "res_type": "polyphase"
27
- },
28
- "3": {
29
- "sr": 22050,
30
- "hl": 256,
31
- "n_fft": 512,
32
- "crop_start": 46,
33
- "crop_stop": 186,
34
- "hpf_start": 93,
35
- "hpf_stop": 46,
36
- "lpf_start": 164,
37
- "lpf_stop": 186,
38
- "res_type": "polyphase"
39
- },
40
- "4": {
41
- "sr": 44100,
42
- "hl": 512,
43
- "n_fft": 768,
44
- "crop_start": 121,
45
- "crop_stop": 382,
46
- "hpf_start": 138,
47
- "hpf_stop": 123,
48
- "res_type": "sinc_medium"
49
- }
50
- },
51
- "sr": 44100,
52
- "pre_filter_start": 740,
53
- "pre_filter_stop": 768
54
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
uvr5_pack/lib_v5/modelparams/4band_44100_mid.json DELETED
@@ -1,55 +0,0 @@
1
- {
2
- "bins": 768,
3
- "unstable_bins": 7,
4
- "mid_side": true,
5
- "reduction_bins": 668,
6
- "band": {
7
- "1": {
8
- "sr": 11025,
9
- "hl": 128,
10
- "n_fft": 1024,
11
- "crop_start": 0,
12
- "crop_stop": 186,
13
- "lpf_start": 37,
14
- "lpf_stop": 73,
15
- "res_type": "polyphase"
16
- },
17
- "2": {
18
- "sr": 11025,
19
- "hl": 128,
20
- "n_fft": 512,
21
- "crop_start": 4,
22
- "crop_stop": 185,
23
- "hpf_start": 36,
24
- "hpf_stop": 18,
25
- "lpf_start": 93,
26
- "lpf_stop": 185,
27
- "res_type": "polyphase"
28
- },
29
- "3": {
30
- "sr": 22050,
31
- "hl": 256,
32
- "n_fft": 512,
33
- "crop_start": 46,
34
- "crop_stop": 186,
35
- "hpf_start": 93,
36
- "hpf_stop": 46,
37
- "lpf_start": 164,
38
- "lpf_stop": 186,
39
- "res_type": "polyphase"
40
- },
41
- "4": {
42
- "sr": 44100,
43
- "hl": 512,
44
- "n_fft": 768,
45
- "crop_start": 121,
46
- "crop_stop": 382,
47
- "hpf_start": 138,
48
- "hpf_stop": 123,
49
- "res_type": "sinc_medium"
50
- }
51
- },
52
- "sr": 44100,
53
- "pre_filter_start": 740,
54
- "pre_filter_stop": 768
55
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
uvr5_pack/lib_v5/modelparams/4band_44100_msb.json DELETED
@@ -1,55 +0,0 @@
1
- {
2
- "mid_side_b": true,
3
- "bins": 768,
4
- "unstable_bins": 7,
5
- "reduction_bins": 668,
6
- "band": {
7
- "1": {
8
- "sr": 11025,
9
- "hl": 128,
10
- "n_fft": 1024,
11
- "crop_start": 0,
12
- "crop_stop": 186,
13
- "lpf_start": 37,
14
- "lpf_stop": 73,
15
- "res_type": "polyphase"
16
- },
17
- "2": {
18
- "sr": 11025,
19
- "hl": 128,
20
- "n_fft": 512,
21
- "crop_start": 4,
22
- "crop_stop": 185,
23
- "hpf_start": 36,
24
- "hpf_stop": 18,
25
- "lpf_start": 93,
26
- "lpf_stop": 185,
27
- "res_type": "polyphase"
28
- },
29
- "3": {
30
- "sr": 22050,
31
- "hl": 256,
32
- "n_fft": 512,
33
- "crop_start": 46,
34
- "crop_stop": 186,
35
- "hpf_start": 93,
36
- "hpf_stop": 46,
37
- "lpf_start": 164,
38
- "lpf_stop": 186,
39
- "res_type": "polyphase"
40
- },
41
- "4": {
42
- "sr": 44100,
43
- "hl": 512,
44
- "n_fft": 768,
45
- "crop_start": 121,
46
- "crop_stop": 382,
47
- "hpf_start": 138,
48
- "hpf_stop": 123,
49
- "res_type": "sinc_medium"
50
- }
51
- },
52
- "sr": 44100,
53
- "pre_filter_start": 740,
54
- "pre_filter_stop": 768
55
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
uvr5_pack/lib_v5/modelparams/4band_44100_msb2.json DELETED
@@ -1,55 +0,0 @@
1
- {
2
- "mid_side_b": true,
3
- "bins": 768,
4
- "unstable_bins": 7,
5
- "reduction_bins": 668,
6
- "band": {
7
- "1": {
8
- "sr": 11025,
9
- "hl": 128,
10
- "n_fft": 1024,
11
- "crop_start": 0,
12
- "crop_stop": 186,
13
- "lpf_start": 37,
14
- "lpf_stop": 73,
15
- "res_type": "polyphase"
16
- },
17
- "2": {
18
- "sr": 11025,
19
- "hl": 128,
20
- "n_fft": 512,
21
- "crop_start": 4,
22
- "crop_stop": 185,
23
- "hpf_start": 36,
24
- "hpf_stop": 18,
25
- "lpf_start": 93,
26
- "lpf_stop": 185,
27
- "res_type": "polyphase"
28
- },
29
- "3": {
30
- "sr": 22050,
31
- "hl": 256,
32
- "n_fft": 512,
33
- "crop_start": 46,
34
- "crop_stop": 186,
35
- "hpf_start": 93,
36
- "hpf_stop": 46,
37
- "lpf_start": 164,
38
- "lpf_stop": 186,
39
- "res_type": "polyphase"
40
- },
41
- "4": {
42
- "sr": 44100,
43
- "hl": 512,
44
- "n_fft": 768,
45
- "crop_start": 121,
46
- "crop_stop": 382,
47
- "hpf_start": 138,
48
- "hpf_stop": 123,
49
- "res_type": "sinc_medium"
50
- }
51
- },
52
- "sr": 44100,
53
- "pre_filter_start": 740,
54
- "pre_filter_stop": 768
55
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
uvr5_pack/lib_v5/modelparams/4band_44100_reverse.json DELETED
@@ -1,55 +0,0 @@
1
- {
2
- "reverse": true,
3
- "bins": 768,
4
- "unstable_bins": 7,
5
- "reduction_bins": 668,
6
- "band": {
7
- "1": {
8
- "sr": 11025,
9
- "hl": 128,
10
- "n_fft": 1024,
11
- "crop_start": 0,
12
- "crop_stop": 186,
13
- "lpf_start": 37,
14
- "lpf_stop": 73,
15
- "res_type": "polyphase"
16
- },
17
- "2": {
18
- "sr": 11025,
19
- "hl": 128,
20
- "n_fft": 512,
21
- "crop_start": 4,
22
- "crop_stop": 185,
23
- "hpf_start": 36,
24
- "hpf_stop": 18,
25
- "lpf_start": 93,
26
- "lpf_stop": 185,
27
- "res_type": "polyphase"
28
- },
29
- "3": {
30
- "sr": 22050,
31
- "hl": 256,
32
- "n_fft": 512,
33
- "crop_start": 46,
34
- "crop_stop": 186,
35
- "hpf_start": 93,
36
- "hpf_stop": 46,
37
- "lpf_start": 164,
38
- "lpf_stop": 186,
39
- "res_type": "polyphase"
40
- },
41
- "4": {
42
- "sr": 44100,
43
- "hl": 512,
44
- "n_fft": 768,
45
- "crop_start": 121,
46
- "crop_stop": 382,
47
- "hpf_start": 138,
48
- "hpf_stop": 123,
49
- "res_type": "sinc_medium"
50
- }
51
- },
52
- "sr": 44100,
53
- "pre_filter_start": 740,
54
- "pre_filter_stop": 768
55
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
uvr5_pack/lib_v5/modelparams/4band_44100_sw.json DELETED
@@ -1,55 +0,0 @@
1
- {
2
- "stereo_w": true,
3
- "bins": 768,
4
- "unstable_bins": 7,
5
- "reduction_bins": 668,
6
- "band": {
7
- "1": {
8
- "sr": 11025,
9
- "hl": 128,
10
- "n_fft": 1024,
11
- "crop_start": 0,
12
- "crop_stop": 186,
13
- "lpf_start": 37,
14
- "lpf_stop": 73,
15
- "res_type": "polyphase"
16
- },
17
- "2": {
18
- "sr": 11025,
19
- "hl": 128,
20
- "n_fft": 512,
21
- "crop_start": 4,
22
- "crop_stop": 185,
23
- "hpf_start": 36,
24
- "hpf_stop": 18,
25
- "lpf_start": 93,
26
- "lpf_stop": 185,
27
- "res_type": "polyphase"
28
- },
29
- "3": {
30
- "sr": 22050,
31
- "hl": 256,
32
- "n_fft": 512,
33
- "crop_start": 46,
34
- "crop_stop": 186,
35
- "hpf_start": 93,
36
- "hpf_stop": 46,
37
- "lpf_start": 164,
38
- "lpf_stop": 186,
39
- "res_type": "polyphase"
40
- },
41
- "4": {
42
- "sr": 44100,
43
- "hl": 512,
44
- "n_fft": 768,
45
- "crop_start": 121,
46
- "crop_stop": 382,
47
- "hpf_start": 138,
48
- "hpf_stop": 123,
49
- "res_type": "sinc_medium"
50
- }
51
- },
52
- "sr": 44100,
53
- "pre_filter_start": 740,
54
- "pre_filter_stop": 768
55
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
uvr5_pack/lib_v5/modelparams/4band_v2.json DELETED
@@ -1,54 +0,0 @@
1
- {
2
- "bins": 672,
3
- "unstable_bins": 8,
4
- "reduction_bins": 637,
5
- "band": {
6
- "1": {
7
- "sr": 7350,
8
- "hl": 80,
9
- "n_fft": 640,
10
- "crop_start": 0,
11
- "crop_stop": 85,
12
- "lpf_start": 25,
13
- "lpf_stop": 53,
14
- "res_type": "polyphase"
15
- },
16
- "2": {
17
- "sr": 7350,
18
- "hl": 80,
19
- "n_fft": 320,
20
- "crop_start": 4,
21
- "crop_stop": 87,
22
- "hpf_start": 25,
23
- "hpf_stop": 12,
24
- "lpf_start": 31,
25
- "lpf_stop": 62,
26
- "res_type": "polyphase"
27
- },
28
- "3": {
29
- "sr": 14700,
30
- "hl": 160,
31
- "n_fft": 512,
32
- "crop_start": 17,
33
- "crop_stop": 216,
34
- "hpf_start": 48,
35
- "hpf_stop": 24,
36
- "lpf_start": 139,
37
- "lpf_stop": 210,
38
- "res_type": "polyphase"
39
- },
40
- "4": {
41
- "sr": 44100,
42
- "hl": 480,
43
- "n_fft": 960,
44
- "crop_start": 78,
45
- "crop_stop": 383,
46
- "hpf_start": 130,
47
- "hpf_stop": 86,
48
- "res_type": "kaiser_fast"
49
- }
50
- },
51
- "sr": 44100,
52
- "pre_filter_start": 668,
53
- "pre_filter_stop": 672
54
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
uvr5_pack/lib_v5/modelparams/4band_v2_sn.json DELETED
@@ -1,55 +0,0 @@
1
- {
2
- "bins": 672,
3
- "unstable_bins": 8,
4
- "reduction_bins": 637,
5
- "band": {
6
- "1": {
7
- "sr": 7350,
8
- "hl": 80,
9
- "n_fft": 640,
10
- "crop_start": 0,
11
- "crop_stop": 85,
12
- "lpf_start": 25,
13
- "lpf_stop": 53,
14
- "res_type": "polyphase"
15
- },
16
- "2": {
17
- "sr": 7350,
18
- "hl": 80,
19
- "n_fft": 320,
20
- "crop_start": 4,
21
- "crop_stop": 87,
22
- "hpf_start": 25,
23
- "hpf_stop": 12,
24
- "lpf_start": 31,
25
- "lpf_stop": 62,
26
- "res_type": "polyphase"
27
- },
28
- "3": {
29
- "sr": 14700,
30
- "hl": 160,
31
- "n_fft": 512,
32
- "crop_start": 17,
33
- "crop_stop": 216,
34
- "hpf_start": 48,
35
- "hpf_stop": 24,
36
- "lpf_start": 139,
37
- "lpf_stop": 210,
38
- "res_type": "polyphase"
39
- },
40
- "4": {
41
- "sr": 44100,
42
- "hl": 480,
43
- "n_fft": 960,
44
- "crop_start": 78,
45
- "crop_stop": 383,
46
- "hpf_start": 130,
47
- "hpf_stop": 86,
48
- "convert_channels": "stereo_n",
49
- "res_type": "kaiser_fast"
50
- }
51
- },
52
- "sr": 44100,
53
- "pre_filter_start": 668,
54
- "pre_filter_stop": 672
55
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
uvr5_pack/lib_v5/modelparams/ensemble.json DELETED
@@ -1,43 +0,0 @@
1
- {
2
- "mid_side_b2": true,
3
- "bins": 1280,
4
- "unstable_bins": 7,
5
- "reduction_bins": 565,
6
- "band": {
7
- "1": {
8
- "sr": 11025,
9
- "hl": 108,
10
- "n_fft": 2048,
11
- "crop_start": 0,
12
- "crop_stop": 374,
13
- "lpf_start": 92,
14
- "lpf_stop": 186,
15
- "res_type": "polyphase"
16
- },
17
- "2": {
18
- "sr": 22050,
19
- "hl": 216,
20
- "n_fft": 1536,
21
- "crop_start": 0,
22
- "crop_stop": 424,
23
- "hpf_start": 68,
24
- "hpf_stop": 34,
25
- "lpf_start": 348,
26
- "lpf_stop": 418,
27
- "res_type": "polyphase"
28
- },
29
- "3": {
30
- "sr": 44100,
31
- "hl": 432,
32
- "n_fft": 1280,
33
- "crop_start": 132,
34
- "crop_stop": 614,
35
- "hpf_start": 172,
36
- "hpf_stop": 144,
37
- "res_type": "polyphase"
38
- }
39
- },
40
- "sr": 44100,
41
- "pre_filter_start": 1280,
42
- "pre_filter_stop": 1280
43
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
uvr5_pack/lib_v5/nets.py DELETED
@@ -1,113 +0,0 @@
1
- import torch
2
- from torch import nn
3
- import torch.nn.functional as F
4
-
5
- from uvr5_pack.lib_v5 import layers
6
- from uvr5_pack.lib_v5 import spec_utils
7
-
8
-
9
- class BaseASPPNet(nn.Module):
10
-
11
- def __init__(self, nin, ch, dilations=(4, 8, 16)):
12
- super(BaseASPPNet, self).__init__()
13
- self.enc1 = layers.Encoder(nin, ch, 3, 2, 1)
14
- self.enc2 = layers.Encoder(ch, ch * 2, 3, 2, 1)
15
- self.enc3 = layers.Encoder(ch * 2, ch * 4, 3, 2, 1)
16
- self.enc4 = layers.Encoder(ch * 4, ch * 8, 3, 2, 1)
17
-
18
- self.aspp = layers.ASPPModule(ch * 8, ch * 16, dilations)
19
-
20
- self.dec4 = layers.Decoder(ch * (8 + 16), ch * 8, 3, 1, 1)
21
- self.dec3 = layers.Decoder(ch * (4 + 8), ch * 4, 3, 1, 1)
22
- self.dec2 = layers.Decoder(ch * (2 + 4), ch * 2, 3, 1, 1)
23
- self.dec1 = layers.Decoder(ch * (1 + 2), ch, 3, 1, 1)
24
-
25
- def __call__(self, x):
26
- h, e1 = self.enc1(x)
27
- h, e2 = self.enc2(h)
28
- h, e3 = self.enc3(h)
29
- h, e4 = self.enc4(h)
30
-
31
- h = self.aspp(h)
32
-
33
- h = self.dec4(h, e4)
34
- h = self.dec3(h, e3)
35
- h = self.dec2(h, e2)
36
- h = self.dec1(h, e1)
37
-
38
- return h
39
-
40
-
41
- class CascadedASPPNet(nn.Module):
42
-
43
- def __init__(self, n_fft):
44
- super(CascadedASPPNet, self).__init__()
45
- self.stg1_low_band_net = BaseASPPNet(2, 16)
46
- self.stg1_high_band_net = BaseASPPNet(2, 16)
47
-
48
- self.stg2_bridge = layers.Conv2DBNActiv(18, 8, 1, 1, 0)
49
- self.stg2_full_band_net = BaseASPPNet(8, 16)
50
-
51
- self.stg3_bridge = layers.Conv2DBNActiv(34, 16, 1, 1, 0)
52
- self.stg3_full_band_net = BaseASPPNet(16, 32)
53
-
54
- self.out = nn.Conv2d(32, 2, 1, bias=False)
55
- self.aux1_out = nn.Conv2d(16, 2, 1, bias=False)
56
- self.aux2_out = nn.Conv2d(16, 2, 1, bias=False)
57
-
58
- self.max_bin = n_fft // 2
59
- self.output_bin = n_fft // 2 + 1
60
-
61
- self.offset = 128
62
-
63
- def forward(self, x, aggressiveness=None):
64
- mix = x.detach()
65
- x = x.clone()
66
-
67
- x = x[:, :, :self.max_bin]
68
-
69
- bandw = x.size()[2] // 2
70
- aux1 = torch.cat([
71
- self.stg1_low_band_net(x[:, :, :bandw]),
72
- self.stg1_high_band_net(x[:, :, bandw:])
73
- ], dim=2)
74
-
75
- h = torch.cat([x, aux1], dim=1)
76
- aux2 = self.stg2_full_band_net(self.stg2_bridge(h))
77
-
78
- h = torch.cat([x, aux1, aux2], dim=1)
79
- h = self.stg3_full_band_net(self.stg3_bridge(h))
80
-
81
- mask = torch.sigmoid(self.out(h))
82
- mask = F.pad(
83
- input=mask,
84
- pad=(0, 0, 0, self.output_bin - mask.size()[2]),
85
- mode='replicate')
86
-
87
- if self.training:
88
- aux1 = torch.sigmoid(self.aux1_out(aux1))
89
- aux1 = F.pad(
90
- input=aux1,
91
- pad=(0, 0, 0, self.output_bin - aux1.size()[2]),
92
- mode='replicate')
93
- aux2 = torch.sigmoid(self.aux2_out(aux2))
94
- aux2 = F.pad(
95
- input=aux2,
96
- pad=(0, 0, 0, self.output_bin - aux2.size()[2]),
97
- mode='replicate')
98
- return mask * mix, aux1 * mix, aux2 * mix
99
- else:
100
- if aggressiveness:
101
- mask[:, :, :aggressiveness['split_bin']] = torch.pow(mask[:, :, :aggressiveness['split_bin']], 1 + aggressiveness['value'] / 3)
102
- mask[:, :, aggressiveness['split_bin']:] = torch.pow(mask[:, :, aggressiveness['split_bin']:], 1 + aggressiveness['value'])
103
-
104
- return mask * mix
105
-
106
- def predict(self, x_mag, aggressiveness=None):
107
- h = self.forward(x_mag, aggressiveness)
108
-
109
- if self.offset > 0:
110
- h = h[:, :, :, self.offset:-self.offset]
111
- assert h.size()[3] > 0
112
-
113
- return h
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
uvr5_pack/lib_v5/nets_123812KB.py DELETED
@@ -1,112 +0,0 @@
1
- import torch
2
- from torch import nn
3
- import torch.nn.functional as F
4
-
5
- from uvr5_pack.lib_v5 import layers_123821KB as layers
6
-
7
-
8
- class BaseASPPNet(nn.Module):
9
-
10
- def __init__(self, nin, ch, dilations=(4, 8, 16)):
11
- super(BaseASPPNet, self).__init__()
12
- self.enc1 = layers.Encoder(nin, ch, 3, 2, 1)
13
- self.enc2 = layers.Encoder(ch, ch * 2, 3, 2, 1)
14
- self.enc3 = layers.Encoder(ch * 2, ch * 4, 3, 2, 1)
15
- self.enc4 = layers.Encoder(ch * 4, ch * 8, 3, 2, 1)
16
-
17
- self.aspp = layers.ASPPModule(ch * 8, ch * 16, dilations)
18
-
19
- self.dec4 = layers.Decoder(ch * (8 + 16), ch * 8, 3, 1, 1)
20
- self.dec3 = layers.Decoder(ch * (4 + 8), ch * 4, 3, 1, 1)
21
- self.dec2 = layers.Decoder(ch * (2 + 4), ch * 2, 3, 1, 1)
22
- self.dec1 = layers.Decoder(ch * (1 + 2), ch, 3, 1, 1)
23
-
24
- def __call__(self, x):
25
- h, e1 = self.enc1(x)
26
- h, e2 = self.enc2(h)
27
- h, e3 = self.enc3(h)
28
- h, e4 = self.enc4(h)
29
-
30
- h = self.aspp(h)
31
-
32
- h = self.dec4(h, e4)
33
- h = self.dec3(h, e3)
34
- h = self.dec2(h, e2)
35
- h = self.dec1(h, e1)
36
-
37
- return h
38
-
39
-
40
- class CascadedASPPNet(nn.Module):
41
-
42
- def __init__(self, n_fft):
43
- super(CascadedASPPNet, self).__init__()
44
- self.stg1_low_band_net = BaseASPPNet(2, 32)
45
- self.stg1_high_band_net = BaseASPPNet(2, 32)
46
-
47
- self.stg2_bridge = layers.Conv2DBNActiv(34, 16, 1, 1, 0)
48
- self.stg2_full_band_net = BaseASPPNet(16, 32)
49
-
50
- self.stg3_bridge = layers.Conv2DBNActiv(66, 32, 1, 1, 0)
51
- self.stg3_full_band_net = BaseASPPNet(32, 64)
52
-
53
- self.out = nn.Conv2d(64, 2, 1, bias=False)
54
- self.aux1_out = nn.Conv2d(32, 2, 1, bias=False)
55
- self.aux2_out = nn.Conv2d(32, 2, 1, bias=False)
56
-
57
- self.max_bin = n_fft // 2
58
- self.output_bin = n_fft // 2 + 1
59
-
60
- self.offset = 128
61
-
62
- def forward(self, x, aggressiveness=None):
63
- mix = x.detach()
64
- x = x.clone()
65
-
66
- x = x[:, :, :self.max_bin]
67
-
68
- bandw = x.size()[2] // 2
69
- aux1 = torch.cat([
70
- self.stg1_low_band_net(x[:, :, :bandw]),
71
- self.stg1_high_band_net(x[:, :, bandw:])
72
- ], dim=2)
73
-
74
- h = torch.cat([x, aux1], dim=1)
75
- aux2 = self.stg2_full_band_net(self.stg2_bridge(h))
76
-
77
- h = torch.cat([x, aux1, aux2], dim=1)
78
- h = self.stg3_full_band_net(self.stg3_bridge(h))
79
-
80
- mask = torch.sigmoid(self.out(h))
81
- mask = F.pad(
82
- input=mask,
83
- pad=(0, 0, 0, self.output_bin - mask.size()[2]),
84
- mode='replicate')
85
-
86
- if self.training:
87
- aux1 = torch.sigmoid(self.aux1_out(aux1))
88
- aux1 = F.pad(
89
- input=aux1,
90
- pad=(0, 0, 0, self.output_bin - aux1.size()[2]),
91
- mode='replicate')
92
- aux2 = torch.sigmoid(self.aux2_out(aux2))
93
- aux2 = F.pad(
94
- input=aux2,
95
- pad=(0, 0, 0, self.output_bin - aux2.size()[2]),
96
- mode='replicate')
97
- return mask * mix, aux1 * mix, aux2 * mix
98
- else:
99
- if aggressiveness:
100
- mask[:, :, :aggressiveness['split_bin']] = torch.pow(mask[:, :, :aggressiveness['split_bin']], 1 + aggressiveness['value'] / 3)
101
- mask[:, :, aggressiveness['split_bin']:] = torch.pow(mask[:, :, aggressiveness['split_bin']:], 1 + aggressiveness['value'])
102
-
103
- return mask * mix
104
-
105
- def predict(self, x_mag, aggressiveness=None):
106
- h = self.forward(x_mag, aggressiveness)
107
-
108
- if self.offset > 0:
109
- h = h[:, :, :, self.offset:-self.offset]
110
- assert h.size()[3] > 0
111
-
112
- return h
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
uvr5_pack/lib_v5/nets_123821KB.py DELETED
@@ -1,112 +0,0 @@
1
- import torch
2
- from torch import nn
3
- import torch.nn.functional as F
4
-
5
- from uvr5_pack.lib_v5 import layers_123821KB as layers
6
-
7
-
8
- class BaseASPPNet(nn.Module):
9
-
10
- def __init__(self, nin, ch, dilations=(4, 8, 16)):
11
- super(BaseASPPNet, self).__init__()
12
- self.enc1 = layers.Encoder(nin, ch, 3, 2, 1)
13
- self.enc2 = layers.Encoder(ch, ch * 2, 3, 2, 1)
14
- self.enc3 = layers.Encoder(ch * 2, ch * 4, 3, 2, 1)
15
- self.enc4 = layers.Encoder(ch * 4, ch * 8, 3, 2, 1)
16
-
17
- self.aspp = layers.ASPPModule(ch * 8, ch * 16, dilations)
18
-
19
- self.dec4 = layers.Decoder(ch * (8 + 16), ch * 8, 3, 1, 1)
20
- self.dec3 = layers.Decoder(ch * (4 + 8), ch * 4, 3, 1, 1)
21
- self.dec2 = layers.Decoder(ch * (2 + 4), ch * 2, 3, 1, 1)
22
- self.dec1 = layers.Decoder(ch * (1 + 2), ch, 3, 1, 1)
23
-
24
- def __call__(self, x):
25
- h, e1 = self.enc1(x)
26
- h, e2 = self.enc2(h)
27
- h, e3 = self.enc3(h)
28
- h, e4 = self.enc4(h)
29
-
30
- h = self.aspp(h)
31
-
32
- h = self.dec4(h, e4)
33
- h = self.dec3(h, e3)
34
- h = self.dec2(h, e2)
35
- h = self.dec1(h, e1)
36
-
37
- return h
38
-
39
-
40
- class CascadedASPPNet(nn.Module):
41
-
42
- def __init__(self, n_fft):
43
- super(CascadedASPPNet, self).__init__()
44
- self.stg1_low_band_net = BaseASPPNet(2, 32)
45
- self.stg1_high_band_net = BaseASPPNet(2, 32)
46
-
47
- self.stg2_bridge = layers.Conv2DBNActiv(34, 16, 1, 1, 0)
48
- self.stg2_full_band_net = BaseASPPNet(16, 32)
49
-
50
- self.stg3_bridge = layers.Conv2DBNActiv(66, 32, 1, 1, 0)
51
- self.stg3_full_band_net = BaseASPPNet(32, 64)
52
-
53
- self.out = nn.Conv2d(64, 2, 1, bias=False)
54
- self.aux1_out = nn.Conv2d(32, 2, 1, bias=False)
55
- self.aux2_out = nn.Conv2d(32, 2, 1, bias=False)
56
-
57
- self.max_bin = n_fft // 2
58
- self.output_bin = n_fft // 2 + 1
59
-
60
- self.offset = 128
61
-
62
- def forward(self, x, aggressiveness=None):
63
- mix = x.detach()
64
- x = x.clone()
65
-
66
- x = x[:, :, :self.max_bin]
67
-
68
- bandw = x.size()[2] // 2
69
- aux1 = torch.cat([
70
- self.stg1_low_band_net(x[:, :, :bandw]),
71
- self.stg1_high_band_net(x[:, :, bandw:])
72
- ], dim=2)
73
-
74
- h = torch.cat([x, aux1], dim=1)
75
- aux2 = self.stg2_full_band_net(self.stg2_bridge(h))
76
-
77
- h = torch.cat([x, aux1, aux2], dim=1)
78
- h = self.stg3_full_band_net(self.stg3_bridge(h))
79
-
80
- mask = torch.sigmoid(self.out(h))
81
- mask = F.pad(
82
- input=mask,
83
- pad=(0, 0, 0, self.output_bin - mask.size()[2]),
84
- mode='replicate')
85
-
86
- if self.training:
87
- aux1 = torch.sigmoid(self.aux1_out(aux1))
88
- aux1 = F.pad(
89
- input=aux1,
90
- pad=(0, 0, 0, self.output_bin - aux1.size()[2]),
91
- mode='replicate')
92
- aux2 = torch.sigmoid(self.aux2_out(aux2))
93
- aux2 = F.pad(
94
- input=aux2,
95
- pad=(0, 0, 0, self.output_bin - aux2.size()[2]),
96
- mode='replicate')
97
- return mask * mix, aux1 * mix, aux2 * mix
98
- else:
99
- if aggressiveness:
100
- mask[:, :, :aggressiveness['split_bin']] = torch.pow(mask[:, :, :aggressiveness['split_bin']], 1 + aggressiveness['value'] / 3)
101
- mask[:, :, aggressiveness['split_bin']:] = torch.pow(mask[:, :, aggressiveness['split_bin']:], 1 + aggressiveness['value'])
102
-
103
- return mask * mix
104
-
105
- def predict(self, x_mag, aggressiveness=None):
106
- h = self.forward(x_mag, aggressiveness)
107
-
108
- if self.offset > 0:
109
- h = h[:, :, :, self.offset:-self.offset]
110
- assert h.size()[3] > 0
111
-
112
- return h
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
uvr5_pack/lib_v5/nets_33966KB.py DELETED
@@ -1,112 +0,0 @@
1
- import torch
2
- from torch import nn
3
- import torch.nn.functional as F
4
-
5
- from uvr5_pack.lib_v5 import layers_33966KB as layers
6
-
7
-
8
- class BaseASPPNet(nn.Module):
9
-
10
- def __init__(self, nin, ch, dilations=(4, 8, 16, 32)):
11
- super(BaseASPPNet, self).__init__()
12
- self.enc1 = layers.Encoder(nin, ch, 3, 2, 1)
13
- self.enc2 = layers.Encoder(ch, ch * 2, 3, 2, 1)
14
- self.enc3 = layers.Encoder(ch * 2, ch * 4, 3, 2, 1)
15
- self.enc4 = layers.Encoder(ch * 4, ch * 8, 3, 2, 1)
16
-
17
- self.aspp = layers.ASPPModule(ch * 8, ch * 16, dilations)
18
-
19
- self.dec4 = layers.Decoder(ch * (8 + 16), ch * 8, 3, 1, 1)
20
- self.dec3 = layers.Decoder(ch * (4 + 8), ch * 4, 3, 1, 1)
21
- self.dec2 = layers.Decoder(ch * (2 + 4), ch * 2, 3, 1, 1)
22
- self.dec1 = layers.Decoder(ch * (1 + 2), ch, 3, 1, 1)
23
-
24
- def __call__(self, x):
25
- h, e1 = self.enc1(x)
26
- h, e2 = self.enc2(h)
27
- h, e3 = self.enc3(h)
28
- h, e4 = self.enc4(h)
29
-
30
- h = self.aspp(h)
31
-
32
- h = self.dec4(h, e4)
33
- h = self.dec3(h, e3)
34
- h = self.dec2(h, e2)
35
- h = self.dec1(h, e1)
36
-
37
- return h
38
-
39
-
40
- class CascadedASPPNet(nn.Module):
41
-
42
- def __init__(self, n_fft):
43
- super(CascadedASPPNet, self).__init__()
44
- self.stg1_low_band_net = BaseASPPNet(2, 16)
45
- self.stg1_high_band_net = BaseASPPNet(2, 16)
46
-
47
- self.stg2_bridge = layers.Conv2DBNActiv(18, 8, 1, 1, 0)
48
- self.stg2_full_band_net = BaseASPPNet(8, 16)
49
-
50
- self.stg3_bridge = layers.Conv2DBNActiv(34, 16, 1, 1, 0)
51
- self.stg3_full_band_net = BaseASPPNet(16, 32)
52
-
53
- self.out = nn.Conv2d(32, 2, 1, bias=False)
54
- self.aux1_out = nn.Conv2d(16, 2, 1, bias=False)
55
- self.aux2_out = nn.Conv2d(16, 2, 1, bias=False)
56
-
57
- self.max_bin = n_fft // 2
58
- self.output_bin = n_fft // 2 + 1
59
-
60
- self.offset = 128
61
-
62
- def forward(self, x, aggressiveness=None):
63
- mix = x.detach()
64
- x = x.clone()
65
-
66
- x = x[:, :, :self.max_bin]
67
-
68
- bandw = x.size()[2] // 2
69
- aux1 = torch.cat([
70
- self.stg1_low_band_net(x[:, :, :bandw]),
71
- self.stg1_high_band_net(x[:, :, bandw:])
72
- ], dim=2)
73
-
74
- h = torch.cat([x, aux1], dim=1)
75
- aux2 = self.stg2_full_band_net(self.stg2_bridge(h))
76
-
77
- h = torch.cat([x, aux1, aux2], dim=1)
78
- h = self.stg3_full_band_net(self.stg3_bridge(h))
79
-
80
- mask = torch.sigmoid(self.out(h))
81
- mask = F.pad(
82
- input=mask,
83
- pad=(0, 0, 0, self.output_bin - mask.size()[2]),
84
- mode='replicate')
85
-
86
- if self.training:
87
- aux1 = torch.sigmoid(self.aux1_out(aux1))
88
- aux1 = F.pad(
89
- input=aux1,
90
- pad=(0, 0, 0, self.output_bin - aux1.size()[2]),
91
- mode='replicate')
92
- aux2 = torch.sigmoid(self.aux2_out(aux2))
93
- aux2 = F.pad(
94
- input=aux2,
95
- pad=(0, 0, 0, self.output_bin - aux2.size()[2]),
96
- mode='replicate')
97
- return mask * mix, aux1 * mix, aux2 * mix
98
- else:
99
- if aggressiveness:
100
- mask[:, :, :aggressiveness['split_bin']] = torch.pow(mask[:, :, :aggressiveness['split_bin']], 1 + aggressiveness['value'] / 3)
101
- mask[:, :, aggressiveness['split_bin']:] = torch.pow(mask[:, :, aggressiveness['split_bin']:], 1 + aggressiveness['value'])
102
-
103
- return mask * mix
104
-
105
- def predict(self, x_mag, aggressiveness=None):
106
- h = self.forward(x_mag, aggressiveness)
107
-
108
- if self.offset > 0:
109
- h = h[:, :, :, self.offset:-self.offset]
110
- assert h.size()[3] > 0
111
-
112
- return h
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
uvr5_pack/lib_v5/nets_537227KB.py DELETED
@@ -1,113 +0,0 @@
1
- import torch
2
- import numpy as np
3
- from torch import nn
4
- import torch.nn.functional as F
5
-
6
- from uvr5_pack.lib_v5 import layers_537238KB as layers
7
-
8
-
9
- class BaseASPPNet(nn.Module):
10
-
11
- def __init__(self, nin, ch, dilations=(4, 8, 16)):
12
- super(BaseASPPNet, self).__init__()
13
- self.enc1 = layers.Encoder(nin, ch, 3, 2, 1)
14
- self.enc2 = layers.Encoder(ch, ch * 2, 3, 2, 1)
15
- self.enc3 = layers.Encoder(ch * 2, ch * 4, 3, 2, 1)
16
- self.enc4 = layers.Encoder(ch * 4, ch * 8, 3, 2, 1)
17
-
18
- self.aspp = layers.ASPPModule(ch * 8, ch * 16, dilations)
19
-
20
- self.dec4 = layers.Decoder(ch * (8 + 16), ch * 8, 3, 1, 1)
21
- self.dec3 = layers.Decoder(ch * (4 + 8), ch * 4, 3, 1, 1)
22
- self.dec2 = layers.Decoder(ch * (2 + 4), ch * 2, 3, 1, 1)
23
- self.dec1 = layers.Decoder(ch * (1 + 2), ch, 3, 1, 1)
24
-
25
- def __call__(self, x):
26
- h, e1 = self.enc1(x)
27
- h, e2 = self.enc2(h)
28
- h, e3 = self.enc3(h)
29
- h, e4 = self.enc4(h)
30
-
31
- h = self.aspp(h)
32
-
33
- h = self.dec4(h, e4)
34
- h = self.dec3(h, e3)
35
- h = self.dec2(h, e2)
36
- h = self.dec1(h, e1)
37
-
38
- return h
39
-
40
-
41
- class CascadedASPPNet(nn.Module):
42
-
43
- def __init__(self, n_fft):
44
- super(CascadedASPPNet, self).__init__()
45
- self.stg1_low_band_net = BaseASPPNet(2, 64)
46
- self.stg1_high_band_net = BaseASPPNet(2, 64)
47
-
48
- self.stg2_bridge = layers.Conv2DBNActiv(66, 32, 1, 1, 0)
49
- self.stg2_full_band_net = BaseASPPNet(32, 64)
50
-
51
- self.stg3_bridge = layers.Conv2DBNActiv(130, 64, 1, 1, 0)
52
- self.stg3_full_band_net = BaseASPPNet(64, 128)
53
-
54
- self.out = nn.Conv2d(128, 2, 1, bias=False)
55
- self.aux1_out = nn.Conv2d(64, 2, 1, bias=False)
56
- self.aux2_out = nn.Conv2d(64, 2, 1, bias=False)
57
-
58
- self.max_bin = n_fft // 2
59
- self.output_bin = n_fft // 2 + 1
60
-
61
- self.offset = 128
62
-
63
- def forward(self, x, aggressiveness=None):
64
- mix = x.detach()
65
- x = x.clone()
66
-
67
- x = x[:, :, :self.max_bin]
68
-
69
- bandw = x.size()[2] // 2
70
- aux1 = torch.cat([
71
- self.stg1_low_band_net(x[:, :, :bandw]),
72
- self.stg1_high_band_net(x[:, :, bandw:])
73
- ], dim=2)
74
-
75
- h = torch.cat([x, aux1], dim=1)
76
- aux2 = self.stg2_full_band_net(self.stg2_bridge(h))
77
-
78
- h = torch.cat([x, aux1, aux2], dim=1)
79
- h = self.stg3_full_band_net(self.stg3_bridge(h))
80
-
81
- mask = torch.sigmoid(self.out(h))
82
- mask = F.pad(
83
- input=mask,
84
- pad=(0, 0, 0, self.output_bin - mask.size()[2]),
85
- mode='replicate')
86
-
87
- if self.training:
88
- aux1 = torch.sigmoid(self.aux1_out(aux1))
89
- aux1 = F.pad(
90
- input=aux1,
91
- pad=(0, 0, 0, self.output_bin - aux1.size()[2]),
92
- mode='replicate')
93
- aux2 = torch.sigmoid(self.aux2_out(aux2))
94
- aux2 = F.pad(
95
- input=aux2,
96
- pad=(0, 0, 0, self.output_bin - aux2.size()[2]),
97
- mode='replicate')
98
- return mask * mix, aux1 * mix, aux2 * mix
99
- else:
100
- if aggressiveness:
101
- mask[:, :, :aggressiveness['split_bin']] = torch.pow(mask[:, :, :aggressiveness['split_bin']], 1 + aggressiveness['value'] / 3)
102
- mask[:, :, aggressiveness['split_bin']:] = torch.pow(mask[:, :, aggressiveness['split_bin']:], 1 + aggressiveness['value'])
103
-
104
- return mask * mix
105
-
106
- def predict(self, x_mag, aggressiveness=None):
107
- h = self.forward(x_mag, aggressiveness)
108
-
109
- if self.offset > 0:
110
- h = h[:, :, :, self.offset:-self.offset]
111
- assert h.size()[3] > 0
112
-
113
- return h
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
uvr5_pack/lib_v5/nets_537238KB.py DELETED
@@ -1,113 +0,0 @@
1
- import torch
2
- import numpy as np
3
- from torch import nn
4
- import torch.nn.functional as F
5
-
6
- from uvr5_pack.lib_v5 import layers_537238KB as layers
7
-
8
-
9
- class BaseASPPNet(nn.Module):
10
-
11
- def __init__(self, nin, ch, dilations=(4, 8, 16)):
12
- super(BaseASPPNet, self).__init__()
13
- self.enc1 = layers.Encoder(nin, ch, 3, 2, 1)
14
- self.enc2 = layers.Encoder(ch, ch * 2, 3, 2, 1)
15
- self.enc3 = layers.Encoder(ch * 2, ch * 4, 3, 2, 1)
16
- self.enc4 = layers.Encoder(ch * 4, ch * 8, 3, 2, 1)
17
-
18
- self.aspp = layers.ASPPModule(ch * 8, ch * 16, dilations)
19
-
20
- self.dec4 = layers.Decoder(ch * (8 + 16), ch * 8, 3, 1, 1)
21
- self.dec3 = layers.Decoder(ch * (4 + 8), ch * 4, 3, 1, 1)
22
- self.dec2 = layers.Decoder(ch * (2 + 4), ch * 2, 3, 1, 1)
23
- self.dec1 = layers.Decoder(ch * (1 + 2), ch, 3, 1, 1)
24
-
25
- def __call__(self, x):
26
- h, e1 = self.enc1(x)
27
- h, e2 = self.enc2(h)
28
- h, e3 = self.enc3(h)
29
- h, e4 = self.enc4(h)
30
-
31
- h = self.aspp(h)
32
-
33
- h = self.dec4(h, e4)
34
- h = self.dec3(h, e3)
35
- h = self.dec2(h, e2)
36
- h = self.dec1(h, e1)
37
-
38
- return h
39
-
40
-
41
- class CascadedASPPNet(nn.Module):
42
-
43
- def __init__(self, n_fft):
44
- super(CascadedASPPNet, self).__init__()
45
- self.stg1_low_band_net = BaseASPPNet(2, 64)
46
- self.stg1_high_band_net = BaseASPPNet(2, 64)
47
-
48
- self.stg2_bridge = layers.Conv2DBNActiv(66, 32, 1, 1, 0)
49
- self.stg2_full_band_net = BaseASPPNet(32, 64)
50
-
51
- self.stg3_bridge = layers.Conv2DBNActiv(130, 64, 1, 1, 0)
52
- self.stg3_full_band_net = BaseASPPNet(64, 128)
53
-
54
- self.out = nn.Conv2d(128, 2, 1, bias=False)
55
- self.aux1_out = nn.Conv2d(64, 2, 1, bias=False)
56
- self.aux2_out = nn.Conv2d(64, 2, 1, bias=False)
57
-
58
- self.max_bin = n_fft // 2
59
- self.output_bin = n_fft // 2 + 1
60
-
61
- self.offset = 128
62
-
63
- def forward(self, x, aggressiveness=None):
64
- mix = x.detach()
65
- x = x.clone()
66
-
67
- x = x[:, :, :self.max_bin]
68
-
69
- bandw = x.size()[2] // 2
70
- aux1 = torch.cat([
71
- self.stg1_low_band_net(x[:, :, :bandw]),
72
- self.stg1_high_band_net(x[:, :, bandw:])
73
- ], dim=2)
74
-
75
- h = torch.cat([x, aux1], dim=1)
76
- aux2 = self.stg2_full_band_net(self.stg2_bridge(h))
77
-
78
- h = torch.cat([x, aux1, aux2], dim=1)
79
- h = self.stg3_full_band_net(self.stg3_bridge(h))
80
-
81
- mask = torch.sigmoid(self.out(h))
82
- mask = F.pad(
83
- input=mask,
84
- pad=(0, 0, 0, self.output_bin - mask.size()[2]),
85
- mode='replicate')
86
-
87
- if self.training:
88
- aux1 = torch.sigmoid(self.aux1_out(aux1))
89
- aux1 = F.pad(
90
- input=aux1,
91
- pad=(0, 0, 0, self.output_bin - aux1.size()[2]),
92
- mode='replicate')
93
- aux2 = torch.sigmoid(self.aux2_out(aux2))
94
- aux2 = F.pad(
95
- input=aux2,
96
- pad=(0, 0, 0, self.output_bin - aux2.size()[2]),
97
- mode='replicate')
98
- return mask * mix, aux1 * mix, aux2 * mix
99
- else:
100
- if aggressiveness:
101
- mask[:, :, :aggressiveness['split_bin']] = torch.pow(mask[:, :, :aggressiveness['split_bin']], 1 + aggressiveness['value'] / 3)
102
- mask[:, :, aggressiveness['split_bin']:] = torch.pow(mask[:, :, aggressiveness['split_bin']:], 1 + aggressiveness['value'])
103
-
104
- return mask * mix
105
-
106
- def predict(self, x_mag, aggressiveness=None):
107
- h = self.forward(x_mag, aggressiveness)
108
-
109
- if self.offset > 0:
110
- h = h[:, :, :, self.offset:-self.offset]
111
- assert h.size()[3] > 0
112
-
113
- return h
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
uvr5_pack/lib_v5/nets_61968KB.py DELETED
@@ -1,112 +0,0 @@
1
- import torch
2
- from torch import nn
3
- import torch.nn.functional as F
4
-
5
- from uvr5_pack.lib_v5 import layers_123821KB as layers
6
-
7
-
8
- class BaseASPPNet(nn.Module):
9
-
10
- def __init__(self, nin, ch, dilations=(4, 8, 16)):
11
- super(BaseASPPNet, self).__init__()
12
- self.enc1 = layers.Encoder(nin, ch, 3, 2, 1)
13
- self.enc2 = layers.Encoder(ch, ch * 2, 3, 2, 1)
14
- self.enc3 = layers.Encoder(ch * 2, ch * 4, 3, 2, 1)
15
- self.enc4 = layers.Encoder(ch * 4, ch * 8, 3, 2, 1)
16
-
17
- self.aspp = layers.ASPPModule(ch * 8, ch * 16, dilations)
18
-
19
- self.dec4 = layers.Decoder(ch * (8 + 16), ch * 8, 3, 1, 1)
20
- self.dec3 = layers.Decoder(ch * (4 + 8), ch * 4, 3, 1, 1)
21
- self.dec2 = layers.Decoder(ch * (2 + 4), ch * 2, 3, 1, 1)
22
- self.dec1 = layers.Decoder(ch * (1 + 2), ch, 3, 1, 1)
23
-
24
- def __call__(self, x):
25
- h, e1 = self.enc1(x)
26
- h, e2 = self.enc2(h)
27
- h, e3 = self.enc3(h)
28
- h, e4 = self.enc4(h)
29
-
30
- h = self.aspp(h)
31
-
32
- h = self.dec4(h, e4)
33
- h = self.dec3(h, e3)
34
- h = self.dec2(h, e2)
35
- h = self.dec1(h, e1)
36
-
37
- return h
38
-
39
-
40
- class CascadedASPPNet(nn.Module):
41
-
42
- def __init__(self, n_fft):
43
- super(CascadedASPPNet, self).__init__()
44
- self.stg1_low_band_net = BaseASPPNet(2, 32)
45
- self.stg1_high_band_net = BaseASPPNet(2, 32)
46
-
47
- self.stg2_bridge = layers.Conv2DBNActiv(34, 16, 1, 1, 0)
48
- self.stg2_full_band_net = BaseASPPNet(16, 32)
49
-
50
- self.stg3_bridge = layers.Conv2DBNActiv(66, 32, 1, 1, 0)
51
- self.stg3_full_band_net = BaseASPPNet(32, 64)
52
-
53
- self.out = nn.Conv2d(64, 2, 1, bias=False)
54
- self.aux1_out = nn.Conv2d(32, 2, 1, bias=False)
55
- self.aux2_out = nn.Conv2d(32, 2, 1, bias=False)
56
-
57
- self.max_bin = n_fft // 2
58
- self.output_bin = n_fft // 2 + 1
59
-
60
- self.offset = 128
61
-
62
- def forward(self, x, aggressiveness=None):
63
- mix = x.detach()
64
- x = x.clone()
65
-
66
- x = x[:, :, :self.max_bin]
67
-
68
- bandw = x.size()[2] // 2
69
- aux1 = torch.cat([
70
- self.stg1_low_band_net(x[:, :, :bandw]),
71
- self.stg1_high_band_net(x[:, :, bandw:])
72
- ], dim=2)
73
-
74
- h = torch.cat([x, aux1], dim=1)
75
- aux2 = self.stg2_full_band_net(self.stg2_bridge(h))
76
-
77
- h = torch.cat([x, aux1, aux2], dim=1)
78
- h = self.stg3_full_band_net(self.stg3_bridge(h))
79
-
80
- mask = torch.sigmoid(self.out(h))
81
- mask = F.pad(
82
- input=mask,
83
- pad=(0, 0, 0, self.output_bin - mask.size()[2]),
84
- mode='replicate')
85
-
86
- if self.training:
87
- aux1 = torch.sigmoid(self.aux1_out(aux1))
88
- aux1 = F.pad(
89
- input=aux1,
90
- pad=(0, 0, 0, self.output_bin - aux1.size()[2]),
91
- mode='replicate')
92
- aux2 = torch.sigmoid(self.aux2_out(aux2))
93
- aux2 = F.pad(
94
- input=aux2,
95
- pad=(0, 0, 0, self.output_bin - aux2.size()[2]),
96
- mode='replicate')
97
- return mask * mix, aux1 * mix, aux2 * mix
98
- else:
99
- if aggressiveness:
100
- mask[:, :, :aggressiveness['split_bin']] = torch.pow(mask[:, :, :aggressiveness['split_bin']], 1 + aggressiveness['value'] / 3)
101
- mask[:, :, aggressiveness['split_bin']:] = torch.pow(mask[:, :, aggressiveness['split_bin']:], 1 + aggressiveness['value'])
102
-
103
- return mask * mix
104
-
105
- def predict(self, x_mag, aggressiveness=None):
106
- h = self.forward(x_mag, aggressiveness)
107
-
108
- if self.offset > 0:
109
- h = h[:, :, :, self.offset:-self.offset]
110
- assert h.size()[3] > 0
111
-
112
- return h
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
uvr5_pack/lib_v5/spec_utils.py DELETED
@@ -1,485 +0,0 @@
1
- import os,librosa
2
- import numpy as np
3
- import soundfile as sf
4
- from tqdm import tqdm
5
- import json,math ,hashlib
6
-
7
- def crop_center(h1, h2):
8
- h1_shape = h1.size()
9
- h2_shape = h2.size()
10
-
11
- if h1_shape[3] == h2_shape[3]:
12
- return h1
13
- elif h1_shape[3] < h2_shape[3]:
14
- raise ValueError('h1_shape[3] must be greater than h2_shape[3]')
15
-
16
- # s_freq = (h2_shape[2] - h1_shape[2]) // 2
17
- # e_freq = s_freq + h1_shape[2]
18
- s_time = (h1_shape[3] - h2_shape[3]) // 2
19
- e_time = s_time + h2_shape[3]
20
- h1 = h1[:, :, :, s_time:e_time]
21
-
22
- return h1
23
-
24
-
25
- def wave_to_spectrogram(wave, hop_length, n_fft, mid_side=False, mid_side_b2=False, reverse=False):
26
- if reverse:
27
- wave_left = np.flip(np.asfortranarray(wave[0]))
28
- wave_right = np.flip(np.asfortranarray(wave[1]))
29
- elif mid_side:
30
- wave_left = np.asfortranarray(np.add(wave[0], wave[1]) / 2)
31
- wave_right = np.asfortranarray(np.subtract(wave[0], wave[1]))
32
- elif mid_side_b2:
33
- wave_left = np.asfortranarray(np.add(wave[1], wave[0] * .5))
34
- wave_right = np.asfortranarray(np.subtract(wave[0], wave[1] * .5))
35
- else:
36
- wave_left = np.asfortranarray(wave[0])
37
- wave_right = np.asfortranarray(wave[1])
38
-
39
- spec_left = librosa.stft(wave_left, n_fft, hop_length=hop_length)
40
- spec_right = librosa.stft(wave_right, n_fft, hop_length=hop_length)
41
-
42
- spec = np.asfortranarray([spec_left, spec_right])
43
-
44
- return spec
45
-
46
-
47
- def wave_to_spectrogram_mt(wave, hop_length, n_fft, mid_side=False, mid_side_b2=False, reverse=False):
48
- import threading
49
-
50
- if reverse:
51
- wave_left = np.flip(np.asfortranarray(wave[0]))
52
- wave_right = np.flip(np.asfortranarray(wave[1]))
53
- elif mid_side:
54
- wave_left = np.asfortranarray(np.add(wave[0], wave[1]) / 2)
55
- wave_right = np.asfortranarray(np.subtract(wave[0], wave[1]))
56
- elif mid_side_b2:
57
- wave_left = np.asfortranarray(np.add(wave[1], wave[0] * .5))
58
- wave_right = np.asfortranarray(np.subtract(wave[0], wave[1] * .5))
59
- else:
60
- wave_left = np.asfortranarray(wave[0])
61
- wave_right = np.asfortranarray(wave[1])
62
-
63
- def run_thread(**kwargs):
64
- global spec_left
65
- spec_left = librosa.stft(**kwargs)
66
-
67
- thread = threading.Thread(target=run_thread, kwargs={'y': wave_left, 'n_fft': n_fft, 'hop_length': hop_length})
68
- thread.start()
69
- spec_right = librosa.stft(wave_right, n_fft, hop_length=hop_length)
70
- thread.join()
71
-
72
- spec = np.asfortranarray([spec_left, spec_right])
73
-
74
- return spec
75
-
76
-
77
- def combine_spectrograms(specs, mp):
78
- l = min([specs[i].shape[2] for i in specs])
79
- spec_c = np.zeros(shape=(2, mp.param['bins'] + 1, l), dtype=np.complex64)
80
- offset = 0
81
- bands_n = len(mp.param['band'])
82
-
83
- for d in range(1, bands_n + 1):
84
- h = mp.param['band'][d]['crop_stop'] - mp.param['band'][d]['crop_start']
85
- spec_c[:, offset:offset+h, :l] = specs[d][:, mp.param['band'][d]['crop_start']:mp.param['band'][d]['crop_stop'], :l]
86
- offset += h
87
-
88
- if offset > mp.param['bins']:
89
- raise ValueError('Too much bins')
90
-
91
- # lowpass fiter
92
- if mp.param['pre_filter_start'] > 0: # and mp.param['band'][bands_n]['res_type'] in ['scipy', 'polyphase']:
93
- if bands_n == 1:
94
- spec_c = fft_lp_filter(spec_c, mp.param['pre_filter_start'], mp.param['pre_filter_stop'])
95
- else:
96
- gp = 1
97
- for b in range(mp.param['pre_filter_start'] + 1, mp.param['pre_filter_stop']):
98
- g = math.pow(10, -(b - mp.param['pre_filter_start']) * (3.5 - gp) / 20.0)
99
- gp = g
100
- spec_c[:, b, :] *= g
101
-
102
- return np.asfortranarray(spec_c)
103
-
104
-
105
- def spectrogram_to_image(spec, mode='magnitude'):
106
- if mode == 'magnitude':
107
- if np.iscomplexobj(spec):
108
- y = np.abs(spec)
109
- else:
110
- y = spec
111
- y = np.log10(y ** 2 + 1e-8)
112
- elif mode == 'phase':
113
- if np.iscomplexobj(spec):
114
- y = np.angle(spec)
115
- else:
116
- y = spec
117
-
118
- y -= y.min()
119
- y *= 255 / y.max()
120
- img = np.uint8(y)
121
-
122
- if y.ndim == 3:
123
- img = img.transpose(1, 2, 0)
124
- img = np.concatenate([
125
- np.max(img, axis=2, keepdims=True), img
126
- ], axis=2)
127
-
128
- return img
129
-
130
-
131
- def reduce_vocal_aggressively(X, y, softmask):
132
- v = X - y
133
- y_mag_tmp = np.abs(y)
134
- v_mag_tmp = np.abs(v)
135
-
136
- v_mask = v_mag_tmp > y_mag_tmp
137
- y_mag = np.clip(y_mag_tmp - v_mag_tmp * v_mask * softmask, 0, np.inf)
138
-
139
- return y_mag * np.exp(1.j * np.angle(y))
140
-
141
-
142
- def mask_silence(mag, ref, thres=0.2, min_range=64, fade_size=32):
143
- if min_range < fade_size * 2:
144
- raise ValueError('min_range must be >= fade_area * 2')
145
-
146
- mag = mag.copy()
147
-
148
- idx = np.where(ref.mean(axis=(0, 1)) < thres)[0]
149
- starts = np.insert(idx[np.where(np.diff(idx) != 1)[0] + 1], 0, idx[0])
150
- ends = np.append(idx[np.where(np.diff(idx) != 1)[0]], idx[-1])
151
- uninformative = np.where(ends - starts > min_range)[0]
152
- if len(uninformative) > 0:
153
- starts = starts[uninformative]
154
- ends = ends[uninformative]
155
- old_e = None
156
- for s, e in zip(starts, ends):
157
- if old_e is not None and s - old_e < fade_size:
158
- s = old_e - fade_size * 2
159
-
160
- if s != 0:
161
- weight = np.linspace(0, 1, fade_size)
162
- mag[:, :, s:s + fade_size] += weight * ref[:, :, s:s + fade_size]
163
- else:
164
- s -= fade_size
165
-
166
- if e != mag.shape[2]:
167
- weight = np.linspace(1, 0, fade_size)
168
- mag[:, :, e - fade_size:e] += weight * ref[:, :, e - fade_size:e]
169
- else:
170
- e += fade_size
171
-
172
- mag[:, :, s + fade_size:e - fade_size] += ref[:, :, s + fade_size:e - fade_size]
173
- old_e = e
174
-
175
- return mag
176
-
177
-
178
- def align_wave_head_and_tail(a, b):
179
- l = min([a[0].size, b[0].size])
180
-
181
- return a[:l,:l], b[:l,:l]
182
-
183
-
184
- def cache_or_load(mix_path, inst_path, mp):
185
- mix_basename = os.path.splitext(os.path.basename(mix_path))[0]
186
- inst_basename = os.path.splitext(os.path.basename(inst_path))[0]
187
-
188
- cache_dir = 'mph{}'.format(hashlib.sha1(json.dumps(mp.param, sort_keys=True).encode('utf-8')).hexdigest())
189
- mix_cache_dir = os.path.join('cache', cache_dir)
190
- inst_cache_dir = os.path.join('cache', cache_dir)
191
-
192
- os.makedirs(mix_cache_dir, exist_ok=True)
193
- os.makedirs(inst_cache_dir, exist_ok=True)
194
-
195
- mix_cache_path = os.path.join(mix_cache_dir, mix_basename + '.npy')
196
- inst_cache_path = os.path.join(inst_cache_dir, inst_basename + '.npy')
197
-
198
- if os.path.exists(mix_cache_path) and os.path.exists(inst_cache_path):
199
- X_spec_m = np.load(mix_cache_path)
200
- y_spec_m = np.load(inst_cache_path)
201
- else:
202
- X_wave, y_wave, X_spec_s, y_spec_s = {}, {}, {}, {}
203
-
204
- for d in range(len(mp.param['band']), 0, -1):
205
- bp = mp.param['band'][d]
206
-
207
- if d == len(mp.param['band']): # high-end band
208
- X_wave[d], _ = librosa.load(
209
- mix_path, bp['sr'], False, dtype=np.float32, res_type=bp['res_type'])
210
- y_wave[d], _ = librosa.load(
211
- inst_path, bp['sr'], False, dtype=np.float32, res_type=bp['res_type'])
212
- else: # lower bands
213
- X_wave[d] = librosa.resample(X_wave[d+1], mp.param['band'][d+1]['sr'], bp['sr'], res_type=bp['res_type'])
214
- y_wave[d] = librosa.resample(y_wave[d+1], mp.param['band'][d+1]['sr'], bp['sr'], res_type=bp['res_type'])
215
-
216
- X_wave[d], y_wave[d] = align_wave_head_and_tail(X_wave[d], y_wave[d])
217
-
218
- X_spec_s[d] = wave_to_spectrogram(X_wave[d], bp['hl'], bp['n_fft'], mp.param['mid_side'], mp.param['mid_side_b2'], mp.param['reverse'])
219
- y_spec_s[d] = wave_to_spectrogram(y_wave[d], bp['hl'], bp['n_fft'], mp.param['mid_side'], mp.param['mid_side_b2'], mp.param['reverse'])
220
-
221
- del X_wave, y_wave
222
-
223
- X_spec_m = combine_spectrograms(X_spec_s, mp)
224
- y_spec_m = combine_spectrograms(y_spec_s, mp)
225
-
226
- if X_spec_m.shape != y_spec_m.shape:
227
- raise ValueError('The combined spectrograms are different: ' + mix_path)
228
-
229
- _, ext = os.path.splitext(mix_path)
230
-
231
- np.save(mix_cache_path, X_spec_m)
232
- np.save(inst_cache_path, y_spec_m)
233
-
234
- return X_spec_m, y_spec_m
235
-
236
-
237
- def spectrogram_to_wave(spec, hop_length, mid_side, mid_side_b2, reverse):
238
- spec_left = np.asfortranarray(spec[0])
239
- spec_right = np.asfortranarray(spec[1])
240
-
241
- wave_left = librosa.istft(spec_left, hop_length=hop_length)
242
- wave_right = librosa.istft(spec_right, hop_length=hop_length)
243
-
244
- if reverse:
245
- return np.asfortranarray([np.flip(wave_left), np.flip(wave_right)])
246
- elif mid_side:
247
- return np.asfortranarray([np.add(wave_left, wave_right / 2), np.subtract(wave_left, wave_right / 2)])
248
- elif mid_side_b2:
249
- return np.asfortranarray([np.add(wave_right / 1.25, .4 * wave_left), np.subtract(wave_left / 1.25, .4 * wave_right)])
250
- else:
251
- return np.asfortranarray([wave_left, wave_right])
252
-
253
-
254
- def spectrogram_to_wave_mt(spec, hop_length, mid_side, reverse, mid_side_b2):
255
- import threading
256
-
257
- spec_left = np.asfortranarray(spec[0])
258
- spec_right = np.asfortranarray(spec[1])
259
-
260
- def run_thread(**kwargs):
261
- global wave_left
262
- wave_left = librosa.istft(**kwargs)
263
-
264
- thread = threading.Thread(target=run_thread, kwargs={'stft_matrix': spec_left, 'hop_length': hop_length})
265
- thread.start()
266
- wave_right = librosa.istft(spec_right, hop_length=hop_length)
267
- thread.join()
268
-
269
- if reverse:
270
- return np.asfortranarray([np.flip(wave_left), np.flip(wave_right)])
271
- elif mid_side:
272
- return np.asfortranarray([np.add(wave_left, wave_right / 2), np.subtract(wave_left, wave_right / 2)])
273
- elif mid_side_b2:
274
- return np.asfortranarray([np.add(wave_right / 1.25, .4 * wave_left), np.subtract(wave_left / 1.25, .4 * wave_right)])
275
- else:
276
- return np.asfortranarray([wave_left, wave_right])
277
-
278
-
279
- def cmb_spectrogram_to_wave(spec_m, mp, extra_bins_h=None, extra_bins=None):
280
- wave_band = {}
281
- bands_n = len(mp.param['band'])
282
- offset = 0
283
-
284
- for d in range(1, bands_n + 1):
285
- bp = mp.param['band'][d]
286
- spec_s = np.ndarray(shape=(2, bp['n_fft'] // 2 + 1, spec_m.shape[2]), dtype=complex)
287
- h = bp['crop_stop'] - bp['crop_start']
288
- spec_s[:, bp['crop_start']:bp['crop_stop'], :] = spec_m[:, offset:offset+h, :]
289
-
290
- offset += h
291
- if d == bands_n: # higher
292
- if extra_bins_h: # if --high_end_process bypass
293
- max_bin = bp['n_fft'] // 2
294
- spec_s[:, max_bin-extra_bins_h:max_bin, :] = extra_bins[:, :extra_bins_h, :]
295
- if bp['hpf_start'] > 0:
296
- spec_s = fft_hp_filter(spec_s, bp['hpf_start'], bp['hpf_stop'] - 1)
297
- if bands_n == 1:
298
- wave = spectrogram_to_wave(spec_s, bp['hl'], mp.param['mid_side'], mp.param['mid_side_b2'], mp.param['reverse'])
299
- else:
300
- wave = np.add(wave, spectrogram_to_wave(spec_s, bp['hl'], mp.param['mid_side'], mp.param['mid_side_b2'], mp.param['reverse']))
301
- else:
302
- sr = mp.param['band'][d+1]['sr']
303
- if d == 1: # lower
304
- spec_s = fft_lp_filter(spec_s, bp['lpf_start'], bp['lpf_stop'])
305
- wave = librosa.resample(spectrogram_to_wave(spec_s, bp['hl'], mp.param['mid_side'], mp.param['mid_side_b2'], mp.param['reverse']), bp['sr'], sr, res_type="sinc_fastest")
306
- else: # mid
307
- spec_s = fft_hp_filter(spec_s, bp['hpf_start'], bp['hpf_stop'] - 1)
308
- spec_s = fft_lp_filter(spec_s, bp['lpf_start'], bp['lpf_stop'])
309
- wave2 = np.add(wave, spectrogram_to_wave(spec_s, bp['hl'], mp.param['mid_side'], mp.param['mid_side_b2'], mp.param['reverse']))
310
- # wave = librosa.core.resample(wave2, bp['sr'], sr, res_type="sinc_fastest")
311
- wave = librosa.core.resample(wave2, bp['sr'], sr,res_type='scipy')
312
-
313
- return wave.T
314
-
315
-
316
- def fft_lp_filter(spec, bin_start, bin_stop):
317
- g = 1.0
318
- for b in range(bin_start, bin_stop):
319
- g -= 1 / (bin_stop - bin_start)
320
- spec[:, b, :] = g * spec[:, b, :]
321
-
322
- spec[:, bin_stop:, :] *= 0
323
-
324
- return spec
325
-
326
-
327
- def fft_hp_filter(spec, bin_start, bin_stop):
328
- g = 1.0
329
- for b in range(bin_start, bin_stop, -1):
330
- g -= 1 / (bin_start - bin_stop)
331
- spec[:, b, :] = g * spec[:, b, :]
332
-
333
- spec[:, 0:bin_stop+1, :] *= 0
334
-
335
- return spec
336
-
337
-
338
- def mirroring(a, spec_m, input_high_end, mp):
339
- if 'mirroring' == a:
340
- mirror = np.flip(np.abs(spec_m[:, mp.param['pre_filter_start']-10-input_high_end.shape[1]:mp.param['pre_filter_start']-10, :]), 1)
341
- mirror = mirror * np.exp(1.j * np.angle(input_high_end))
342
-
343
- return np.where(np.abs(input_high_end) <= np.abs(mirror), input_high_end, mirror)
344
-
345
- if 'mirroring2' == a:
346
- mirror = np.flip(np.abs(spec_m[:, mp.param['pre_filter_start']-10-input_high_end.shape[1]:mp.param['pre_filter_start']-10, :]), 1)
347
- mi = np.multiply(mirror, input_high_end * 1.7)
348
-
349
- return np.where(np.abs(input_high_end) <= np.abs(mi), input_high_end, mi)
350
-
351
-
352
- def ensembling(a, specs):
353
- for i in range(1, len(specs)):
354
- if i == 1:
355
- spec = specs[0]
356
-
357
- ln = min([spec.shape[2], specs[i].shape[2]])
358
- spec = spec[:,:,:ln]
359
- specs[i] = specs[i][:,:,:ln]
360
-
361
- if 'min_mag' == a:
362
- spec = np.where(np.abs(specs[i]) <= np.abs(spec), specs[i], spec)
363
- if 'max_mag' == a:
364
- spec = np.where(np.abs(specs[i]) >= np.abs(spec), specs[i], spec)
365
-
366
- return spec
367
-
368
- def stft(wave, nfft, hl):
369
- wave_left = np.asfortranarray(wave[0])
370
- wave_right = np.asfortranarray(wave[1])
371
- spec_left = librosa.stft(wave_left, nfft, hop_length=hl)
372
- spec_right = librosa.stft(wave_right, nfft, hop_length=hl)
373
- spec = np.asfortranarray([spec_left, spec_right])
374
-
375
- return spec
376
-
377
- def istft(spec, hl):
378
- spec_left = np.asfortranarray(spec[0])
379
- spec_right = np.asfortranarray(spec[1])
380
-
381
- wave_left = librosa.istft(spec_left, hop_length=hl)
382
- wave_right = librosa.istft(spec_right, hop_length=hl)
383
- wave = np.asfortranarray([wave_left, wave_right])
384
-
385
-
386
- if __name__ == "__main__":
387
- import cv2
388
- import sys
389
- import time
390
- import argparse
391
- from model_param_init import ModelParameters
392
-
393
- p = argparse.ArgumentParser()
394
- p.add_argument('--algorithm', '-a', type=str, choices=['invert', 'invert_p', 'min_mag', 'max_mag', 'deep', 'align'], default='min_mag')
395
- p.add_argument('--model_params', '-m', type=str, default=os.path.join('modelparams', '1band_sr44100_hl512.json'))
396
- p.add_argument('--output_name', '-o', type=str, default='output')
397
- p.add_argument('--vocals_only', '-v', action='store_true')
398
- p.add_argument('input', nargs='+')
399
- args = p.parse_args()
400
-
401
- start_time = time.time()
402
-
403
- if args.algorithm.startswith('invert') and len(args.input) != 2:
404
- raise ValueError('There should be two input files.')
405
-
406
- if not args.algorithm.startswith('invert') and len(args.input) < 2:
407
- raise ValueError('There must be at least two input files.')
408
-
409
- wave, specs = {}, {}
410
- mp = ModelParameters(args.model_params)
411
-
412
- for i in range(len(args.input)):
413
- spec = {}
414
-
415
- for d in range(len(mp.param['band']), 0, -1):
416
- bp = mp.param['band'][d]
417
-
418
- if d == len(mp.param['band']): # high-end band
419
- wave[d], _ = librosa.load(
420
- args.input[i], bp['sr'], False, dtype=np.float32, res_type=bp['res_type'])
421
-
422
- if len(wave[d].shape) == 1: # mono to stereo
423
- wave[d] = np.array([wave[d], wave[d]])
424
- else: # lower bands
425
- wave[d] = librosa.resample(wave[d+1], mp.param['band'][d+1]['sr'], bp['sr'], res_type=bp['res_type'])
426
-
427
- spec[d] = wave_to_spectrogram(wave[d], bp['hl'], bp['n_fft'], mp.param['mid_side'], mp.param['mid_side_b2'], mp.param['reverse'])
428
-
429
- specs[i] = combine_spectrograms(spec, mp)
430
-
431
- del wave
432
-
433
- if args.algorithm == 'deep':
434
- d_spec = np.where(np.abs(specs[0]) <= np.abs(spec[1]), specs[0], spec[1])
435
- v_spec = d_spec - specs[1]
436
- sf.write(os.path.join('{}.wav'.format(args.output_name)), cmb_spectrogram_to_wave(v_spec, mp), mp.param['sr'])
437
-
438
- if args.algorithm.startswith('invert'):
439
- ln = min([specs[0].shape[2], specs[1].shape[2]])
440
- specs[0] = specs[0][:,:,:ln]
441
- specs[1] = specs[1][:,:,:ln]
442
-
443
- if 'invert_p' == args.algorithm:
444
- X_mag = np.abs(specs[0])
445
- y_mag = np.abs(specs[1])
446
- max_mag = np.where(X_mag >= y_mag, X_mag, y_mag)
447
- v_spec = specs[1] - max_mag * np.exp(1.j * np.angle(specs[0]))
448
- else:
449
- specs[1] = reduce_vocal_aggressively(specs[0], specs[1], 0.2)
450
- v_spec = specs[0] - specs[1]
451
-
452
- if not args.vocals_only:
453
- X_mag = np.abs(specs[0])
454
- y_mag = np.abs(specs[1])
455
- v_mag = np.abs(v_spec)
456
-
457
- X_image = spectrogram_to_image(X_mag)
458
- y_image = spectrogram_to_image(y_mag)
459
- v_image = spectrogram_to_image(v_mag)
460
-
461
- cv2.imwrite('{}_X.png'.format(args.output_name), X_image)
462
- cv2.imwrite('{}_y.png'.format(args.output_name), y_image)
463
- cv2.imwrite('{}_v.png'.format(args.output_name), v_image)
464
-
465
- sf.write('{}_X.wav'.format(args.output_name), cmb_spectrogram_to_wave(specs[0], mp), mp.param['sr'])
466
- sf.write('{}_y.wav'.format(args.output_name), cmb_spectrogram_to_wave(specs[1], mp), mp.param['sr'])
467
-
468
- sf.write('{}_v.wav'.format(args.output_name), cmb_spectrogram_to_wave(v_spec, mp), mp.param['sr'])
469
- else:
470
- if not args.algorithm == 'deep':
471
- sf.write(os.path.join('ensembled','{}.wav'.format(args.output_name)), cmb_spectrogram_to_wave(ensembling(args.algorithm, specs), mp), mp.param['sr'])
472
-
473
- if args.algorithm == 'align':
474
-
475
- trackalignment = [
476
- {
477
- 'file1':'"{}"'.format(args.input[0]),
478
- 'file2':'"{}"'.format(args.input[1])
479
- }
480
- ]
481
-
482
- for i,e in tqdm(enumerate(trackalignment), desc="Performing Alignment..."):
483
- os.system(f"python lib/align_tracks.py {e['file1']} {e['file2']}")
484
-
485
- #print('Total time: {0:.{1}f}s'.format(time.time() - start_time, 1))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
uvr5_pack/utils.py DELETED
@@ -1,242 +0,0 @@
1
- import torch
2
- import numpy as np
3
- from tqdm import tqdm
4
-
5
- def make_padding(width, cropsize, offset):
6
- left = offset
7
- roi_size = cropsize - left * 2
8
- if roi_size == 0:
9
- roi_size = cropsize
10
- right = roi_size - (width % roi_size) + left
11
-
12
- return left, right, roi_size
13
- def inference(X_spec, device, model, aggressiveness,data):
14
- '''
15
- data : dic configs
16
- '''
17
-
18
- def _execute(X_mag_pad, roi_size, n_window, device, model, aggressiveness,is_half=True):
19
- model.eval()
20
- with torch.no_grad():
21
- preds = []
22
-
23
- iterations = [n_window]
24
-
25
- total_iterations = sum(iterations)
26
- for i in tqdm(range(n_window)):
27
- start = i * roi_size
28
- X_mag_window = X_mag_pad[None, :, :, start:start + data['window_size']]
29
- X_mag_window = torch.from_numpy(X_mag_window)
30
- if(is_half==True):X_mag_window=X_mag_window.half()
31
- X_mag_window=X_mag_window.to(device)
32
-
33
- pred = model.predict(X_mag_window, aggressiveness)
34
-
35
- pred = pred.detach().cpu().numpy()
36
- preds.append(pred[0])
37
-
38
- pred = np.concatenate(preds, axis=2)
39
- return pred
40
-
41
- def preprocess(X_spec):
42
- X_mag = np.abs(X_spec)
43
- X_phase = np.angle(X_spec)
44
-
45
- return X_mag, X_phase
46
-
47
- X_mag, X_phase = preprocess(X_spec)
48
-
49
- coef = X_mag.max()
50
- X_mag_pre = X_mag / coef
51
-
52
- n_frame = X_mag_pre.shape[2]
53
- pad_l, pad_r, roi_size = make_padding(n_frame,
54
- data['window_size'], model.offset)
55
- n_window = int(np.ceil(n_frame / roi_size))
56
-
57
- X_mag_pad = np.pad(
58
- X_mag_pre, ((0, 0), (0, 0), (pad_l, pad_r)), mode='constant')
59
-
60
- if(list(model.state_dict().values())[0].dtype==torch.float16):is_half=True
61
- else:is_half=False
62
- pred = _execute(X_mag_pad, roi_size, n_window,
63
- device, model, aggressiveness,is_half)
64
- pred = pred[:, :, :n_frame]
65
-
66
- if data['tta']:
67
- pad_l += roi_size // 2
68
- pad_r += roi_size // 2
69
- n_window += 1
70
-
71
- X_mag_pad = np.pad(
72
- X_mag_pre, ((0, 0), (0, 0), (pad_l, pad_r)), mode='constant')
73
-
74
- pred_tta = _execute(X_mag_pad, roi_size, n_window,
75
- device, model, aggressiveness,is_half)
76
- pred_tta = pred_tta[:, :, roi_size // 2:]
77
- pred_tta = pred_tta[:, :, :n_frame]
78
-
79
- return (pred + pred_tta) * 0.5 * coef, X_mag, np.exp(1.j * X_phase)
80
- else:
81
- return pred * coef, X_mag, np.exp(1.j * X_phase)
82
-
83
-
84
-
85
- def _get_name_params(model_path , model_hash):
86
- ModelName = model_path
87
- if model_hash == '47939caf0cfe52a0e81442b85b971dfd':
88
- model_params_auto=str('runtime/Lib/site-packages/uvr5_pack/lib_v5/modelparams/4band_44100.json')
89
- param_name_auto=str('4band_44100')
90
- if model_hash == '4e4ecb9764c50a8c414fee6e10395bbe':
91
- model_params_auto=str('runtime/Lib/site-packages/uvr5_pack/lib_v5/modelparams/4band_v2.json')
92
- param_name_auto=str('4band_v2')
93
- if model_hash == 'ca106edd563e034bde0bdec4bb7a4b36':
94
- model_params_auto=str('runtime/Lib/site-packages/uvr5_pack/lib_v5/modelparams/4band_v2.json')
95
- param_name_auto=str('4band_v2')
96
- if model_hash == 'e60a1e84803ce4efc0a6551206cc4b71':
97
- model_params_auto=str('runtime/Lib/site-packages/uvr5_pack/lib_v5/modelparams/4band_44100.json')
98
- param_name_auto=str('4band_44100')
99
- if model_hash == 'a82f14e75892e55e994376edbf0c8435':
100
- model_params_auto=str('runtime/Lib/site-packages/uvr5_pack/lib_v5/modelparams/4band_44100.json')
101
- param_name_auto=str('4band_44100')
102
- if model_hash == '6dd9eaa6f0420af9f1d403aaafa4cc06':
103
- model_params_auto=str('runtime/Lib/site-packages/uvr5_pack/lib_v5/modelparams/4band_v2_sn.json')
104
- param_name_auto=str('4band_v2_sn')
105
- if model_hash == '08611fb99bd59eaa79ad27c58d137727':
106
- model_params_auto=str('runtime/Lib/site-packages/uvr5_pack/lib_v5/modelparams/4band_v2_sn.json')
107
- param_name_auto=str('4band_v2_sn')
108
- if model_hash == '5c7bbca45a187e81abbbd351606164e5':
109
- model_params_auto=str('runtime/Lib/site-packages/uvr5_pack/lib_v5/modelparams/3band_44100_msb2.json')
110
- param_name_auto=str('3band_44100_msb2')
111
- if model_hash == 'd6b2cb685a058a091e5e7098192d3233':
112
- model_params_auto=str('runtime/Lib/site-packages/uvr5_pack/lib_v5/modelparams/3band_44100_msb2.json')
113
- param_name_auto=str('3band_44100_msb2')
114
- if model_hash == 'c1b9f38170a7c90e96f027992eb7c62b':
115
- model_params_auto=str('runtime/Lib/site-packages/uvr5_pack/lib_v5/modelparams/4band_44100.json')
116
- param_name_auto=str('4band_44100')
117
- if model_hash == 'c3448ec923fa0edf3d03a19e633faa53':
118
- model_params_auto=str('runtime/Lib/site-packages/uvr5_pack/lib_v5/modelparams/4band_44100.json')
119
- param_name_auto=str('4band_44100')
120
- if model_hash == '68aa2c8093d0080704b200d140f59e54':
121
- model_params_auto=str('runtime/Lib/site-packages/uvr5_pack/lib_v5/modelparams/3band_44100.json')
122
- param_name_auto=str('3band_44100.json')
123
- if model_hash == 'fdc83be5b798e4bd29fe00fe6600e147':
124
- model_params_auto=str('runtime/Lib/site-packages/uvr5_pack/lib_v5/modelparams/3band_44100_mid.json')
125
- param_name_auto=str('3band_44100_mid.json')
126
- if model_hash == '2ce34bc92fd57f55db16b7a4def3d745':
127
- model_params_auto=str('runtime/Lib/site-packages/uvr5_pack/lib_v5/modelparams/3band_44100_mid.json')
128
- param_name_auto=str('3band_44100_mid.json')
129
- if model_hash == '52fdca89576f06cf4340b74a4730ee5f':
130
- model_params_auto=str('runtime/Lib/site-packages/uvr5_pack/lib_v5/modelparams/4band_44100.json')
131
- param_name_auto=str('4band_44100.json')
132
- if model_hash == '41191165b05d38fc77f072fa9e8e8a30':
133
- model_params_auto=str('runtime/Lib/site-packages/uvr5_pack/lib_v5/modelparams/4band_44100.json')
134
- param_name_auto=str('4band_44100.json')
135
- if model_hash == '89e83b511ad474592689e562d5b1f80e':
136
- model_params_auto=str('runtime/Lib/site-packages/uvr5_pack/lib_v5/modelparams/2band_32000.json')
137
- param_name_auto=str('2band_32000.json')
138
- if model_hash == '0b954da81d453b716b114d6d7c95177f':
139
- model_params_auto=str('runtime/Lib/site-packages/uvr5_pack/lib_v5/modelparams/2band_32000.json')
140
- param_name_auto=str('2band_32000.json')
141
-
142
- #v4 Models
143
- if model_hash == '6a00461c51c2920fd68937d4609ed6c8':
144
- model_params_auto=str('runtime/Lib/site-packages/uvr5_pack/lib_v5/modelparams/1band_sr16000_hl512.json')
145
- param_name_auto=str('1band_sr16000_hl512')
146
- if model_hash == '0ab504864d20f1bd378fe9c81ef37140':
147
- model_params_auto=str('runtime/Lib/site-packages/uvr5_pack/lib_v5/modelparams/1band_sr32000_hl512.json')
148
- param_name_auto=str('1band_sr32000_hl512')
149
- if model_hash == '7dd21065bf91c10f7fccb57d7d83b07f':
150
- model_params_auto=str('runtime/Lib/site-packages/uvr5_pack/lib_v5/modelparams/1band_sr32000_hl512.json')
151
- param_name_auto=str('1band_sr32000_hl512')
152
- if model_hash == '80ab74d65e515caa3622728d2de07d23':
153
- model_params_auto=str('runtime/Lib/site-packages/uvr5_pack/lib_v5/modelparams/1band_sr32000_hl512.json')
154
- param_name_auto=str('1band_sr32000_hl512')
155
- if model_hash == 'edc115e7fc523245062200c00caa847f':
156
- model_params_auto=str('runtime/Lib/site-packages/uvr5_pack/lib_v5/modelparams/1band_sr33075_hl384.json')
157
- param_name_auto=str('1band_sr33075_hl384')
158
- if model_hash == '28063e9f6ab5b341c5f6d3c67f2045b7':
159
- model_params_auto=str('runtime/Lib/site-packages/uvr5_pack/lib_v5/modelparams/1band_sr33075_hl384.json')
160
- param_name_auto=str('1band_sr33075_hl384')
161
- if model_hash == 'b58090534c52cbc3e9b5104bad666ef2':
162
- model_params_auto=str('runtime/Lib/site-packages/uvr5_pack/lib_v5/modelparams/1band_sr44100_hl512.json')
163
- param_name_auto=str('1band_sr44100_hl512')
164
- if model_hash == '0cdab9947f1b0928705f518f3c78ea8f':
165
- model_params_auto=str('runtime/Lib/site-packages/uvr5_pack/lib_v5/modelparams/1band_sr44100_hl512.json')
166
- param_name_auto=str('1band_sr44100_hl512')
167
- if model_hash == 'ae702fed0238afb5346db8356fe25f13':
168
- model_params_auto=str('runtime/Lib/site-packages/uvr5_pack/lib_v5/modelparams/1band_sr44100_hl1024.json')
169
- param_name_auto=str('1band_sr44100_hl1024')
170
- #User Models
171
-
172
- #1 Band
173
- if '1band_sr16000_hl512' in ModelName:
174
- model_params_auto=str('runtime/Lib/site-packages/uvr5_pack/lib_v5/modelparams/1band_sr16000_hl512.json')
175
- param_name_auto=str('1band_sr16000_hl512')
176
- if '1band_sr32000_hl512' in ModelName:
177
- model_params_auto=str('runtime/Lib/site-packages/uvr5_pack/lib_v5/modelparams/1band_sr32000_hl512.json')
178
- param_name_auto=str('1band_sr32000_hl512')
179
- if '1band_sr33075_hl384' in ModelName:
180
- model_params_auto=str('runtime/Lib/site-packages/uvr5_pack/lib_v5/modelparams/1band_sr33075_hl384.json')
181
- param_name_auto=str('1band_sr33075_hl384')
182
- if '1band_sr44100_hl256' in ModelName:
183
- model_params_auto=str('runtime/Lib/site-packages/uvr5_pack/lib_v5/modelparams/1band_sr44100_hl256.json')
184
- param_name_auto=str('1band_sr44100_hl256')
185
- if '1band_sr44100_hl512' in ModelName:
186
- model_params_auto=str('runtime/Lib/site-packages/uvr5_pack/lib_v5/modelparams/1band_sr44100_hl512.json')
187
- param_name_auto=str('1band_sr44100_hl512')
188
- if '1band_sr44100_hl1024' in ModelName:
189
- model_params_auto=str('runtime/Lib/site-packages/uvr5_pack/lib_v5/modelparams/1band_sr44100_hl1024.json')
190
- param_name_auto=str('1band_sr44100_hl1024')
191
-
192
- #2 Band
193
- if '2band_44100_lofi' in ModelName:
194
- model_params_auto=str('runtime/Lib/site-packages/uvr5_pack/lib_v5/modelparams/2band_44100_lofi.json')
195
- param_name_auto=str('2band_44100_lofi')
196
- if '2band_32000' in ModelName:
197
- model_params_auto=str('runtime/Lib/site-packages/uvr5_pack/lib_v5/modelparams/2band_32000.json')
198
- param_name_auto=str('2band_32000')
199
- if '2band_48000' in ModelName:
200
- model_params_auto=str('runtime/Lib/site-packages/uvr5_pack/lib_v5/modelparams/2band_48000.json')
201
- param_name_auto=str('2band_48000')
202
-
203
- #3 Band
204
- if '3band_44100' in ModelName:
205
- model_params_auto=str('runtime/Lib/site-packages/uvr5_pack/lib_v5/modelparams/3band_44100.json')
206
- param_name_auto=str('3band_44100')
207
- if '3band_44100_mid' in ModelName:
208
- model_params_auto=str('runtime/Lib/site-packages/uvr5_pack/lib_v5/modelparams/3band_44100_mid.json')
209
- param_name_auto=str('3band_44100_mid')
210
- if '3band_44100_msb2' in ModelName:
211
- model_params_auto=str('runtime/Lib/site-packages/uvr5_pack/lib_v5/modelparams/3band_44100_msb2.json')
212
- param_name_auto=str('3band_44100_msb2')
213
-
214
- #4 Band
215
- if '4band_44100' in ModelName:
216
- model_params_auto=str('runtime/Lib/site-packages/uvr5_pack/lib_v5/modelparams/4band_44100.json')
217
- param_name_auto=str('4band_44100')
218
- if '4band_44100_mid' in ModelName:
219
- model_params_auto=str('runtime/Lib/site-packages/uvr5_pack/lib_v5/modelparams/4band_44100_mid.json')
220
- param_name_auto=str('4band_44100_mid')
221
- if '4band_44100_msb' in ModelName:
222
- model_params_auto=str('runtime/Lib/site-packages/uvr5_pack/lib_v5/modelparams/4band_44100_msb.json')
223
- param_name_auto=str('4band_44100_msb')
224
- if '4band_44100_msb2' in ModelName:
225
- model_params_auto=str('runtime/Lib/site-packages/uvr5_pack/lib_v5/modelparams/4band_44100_msb2.json')
226
- param_name_auto=str('4band_44100_msb2')
227
- if '4band_44100_reverse' in ModelName:
228
- model_params_auto=str('runtime/Lib/site-packages/uvr5_pack/lib_v5/modelparams/4band_44100_reverse.json')
229
- param_name_auto=str('4band_44100_reverse')
230
- if '4band_44100_sw' in ModelName:
231
- model_params_auto=str('runtime/Lib/site-packages/uvr5_pack/lib_v5/modelparams/4band_44100_sw.json')
232
- param_name_auto=str('4band_44100_sw')
233
- if '4band_v2' in ModelName:
234
- model_params_auto=str('runtime/Lib/site-packages/uvr5_pack/lib_v5/modelparams/4band_v2.json')
235
- param_name_auto=str('4band_v2')
236
- if '4band_v2_sn' in ModelName:
237
- model_params_auto=str('runtime/Lib/site-packages/uvr5_pack/lib_v5/modelparams/4band_v2_sn.json')
238
- param_name_auto=str('4band_v2_sn')
239
- if 'tmodelparam' in ModelName:
240
- model_params_auto=str('runtime/Lib/site-packages/uvr5_pack/lib_v5/modelparams/tmodelparam.json')
241
- param_name_auto=str('User Model Param Set')
242
- return param_name_auto , model_params_auto