gosummer commited on
Commit
d958504
·
verified ·
1 Parent(s): 69d5c26

Delete MDX23v24

Browse files
MDX23v24/config_vocals_segm_models.yaml DELETED
@@ -1,48 +0,0 @@
1
- audio:
2
- chunk_size: 261632
3
- dim_f: 4096
4
- dim_t: 512
5
- hop_length: 512
6
- n_fft: 8192
7
- num_channels: 2
8
- sample_rate: 44100
9
- min_mean_abs: 0.001
10
-
11
- model:
12
- encoder_name: tu-maxvit_large_tf_512 # look here for possibilities: https://github.com/qubvel/segmentation_models.pytorch#encoders-
13
- decoder_type: unet # unet, fpn
14
- act: gelu
15
- num_channels: 128
16
- num_subbands: 8
17
-
18
- training:
19
- batch_size: 8
20
- gradient_accumulation_steps: 1
21
- grad_clip: 0
22
- instruments:
23
- - vocals
24
- - other
25
- lr: 5.0e-05
26
- patience: 2
27
- reduce_factor: 0.95
28
- target_instrument: null
29
- num_epochs: 1000
30
- num_steps: 2000
31
- augmentation: false # enable augmentations by audiomentations and pedalboard
32
- augmentation_type: simple1
33
- use_mp3_compress: false # Deprecated
34
- augmentation_mix: true # Mix several stems of the same type with some probability
35
- augmentation_loudness: true # randomly change loudness of each stem
36
- augmentation_loudness_type: 1 # Type 1 or 2
37
- augmentation_loudness_min: 0.5
38
- augmentation_loudness_max: 1.5
39
- q: 0.95
40
- coarse_loss_clip: true
41
- ema_momentum: 0.999
42
- optimizer: adamw
43
- other_fix: true # it's needed for checking on multisong dataset if other is actually instrumental
44
-
45
- inference:
46
- batch_size: 1
47
- dim_t: 512
48
- num_overlap: 4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
MDX23v24/model.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:7c44aad6e89377d68458a95c6356730f14a1c742a10b6426608c50199e86fb04
3
- size 850242572
 
 
 
 
MDX23v24/model_2_stem_061321.yaml DELETED
@@ -1,36 +0,0 @@
1
- audio:
2
- chunk_size: 260096
3
- dim_f: 4096
4
- dim_t: 256
5
- hop_length: 2048
6
- n_fft: 12288
7
- num_channels: 2
8
- sample_rate: 44100
9
- min_mean_abs: 0.001
10
- model:
11
- act: gelu
12
- bottleneck_factor: 4
13
- growth: 64
14
- norm: InstanceNorm
15
- num_blocks_per_scale: 2
16
- num_channels: 128
17
- num_scales: 5
18
- num_subbands: 4
19
- scale:
20
- - 2
21
- - 2
22
- name: epoch_10.ckpt
23
- training:
24
- batch_size: 16
25
- grad_clip: 0
26
- instruments:
27
- - Vocals
28
- - Instrumental
29
- lr: 5.0e-05
30
- target_instrument: null
31
- num_epochs: 100
32
- num_steps: 1000
33
- inference:
34
- batch_size: 1
35
- dim_t: 256
36
- num_overlap: 8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
MDX23v24/model_2_stem_full_band_8k.yaml DELETED
@@ -1,43 +0,0 @@
1
- audio:
2
- chunk_size: 261120
3
- dim_f: 4096
4
- dim_t: 256
5
- hop_length: 1024
6
- n_fft: 8192
7
- num_channels: 2
8
- sample_rate: 44100
9
- min_mean_abs: 0.001
10
- model:
11
- act: gelu
12
- bottleneck_factor: 4
13
- growth: 128
14
- norm: InstanceNorm
15
- num_blocks_per_scale: 2
16
- num_channels: 128
17
- num_scales: 5
18
- num_subbands: 4
19
- scale:
20
- - 2
21
- - 2
22
- training:
23
- batch_size: 6
24
- grad_clip: 0
25
- instruments:
26
- - Vocals
27
- - Instrumental
28
- lr: 1.0e-05
29
- patience: 2
30
- reduce_factor: 0.95
31
- target_instrument: null
32
- num_epochs: 1000
33
- num_steps: 1000
34
- augmentation: 1
35
- augmentation_type: simple1
36
- augmentation_mix: true
37
- q: 0.95
38
- coarse_loss_clip: true
39
- ema_momentum: 0.999
40
- inference:
41
- batch_size: 1
42
- dim_t: 256
43
- num_overlap: 8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
MDX23v24/model_bs_roformer_ep_317_sdr_12.9755.yaml DELETED
@@ -1,133 +0,0 @@
1
- audio:
2
- chunk_size: 352800
3
- dim_f: 1024
4
- dim_t: 801 # don't work (use in model)
5
- hop_length: 441 # don't work (use in model)
6
- n_fft: 2048
7
- num_channels: 2
8
- sample_rate: 44100
9
- min_mean_abs: 0.001
10
-
11
- model:
12
- dim: 512
13
- depth: 12
14
- stereo: true
15
- num_stems: 1
16
- time_transformer_depth: 1
17
- freq_transformer_depth: 1
18
- freqs_per_bands: !!python/tuple
19
- - 2
20
- - 2
21
- - 2
22
- - 2
23
- - 2
24
- - 2
25
- - 2
26
- - 2
27
- - 2
28
- - 2
29
- - 2
30
- - 2
31
- - 2
32
- - 2
33
- - 2
34
- - 2
35
- - 2
36
- - 2
37
- - 2
38
- - 2
39
- - 2
40
- - 2
41
- - 2
42
- - 2
43
- - 4
44
- - 4
45
- - 4
46
- - 4
47
- - 4
48
- - 4
49
- - 4
50
- - 4
51
- - 4
52
- - 4
53
- - 4
54
- - 4
55
- - 12
56
- - 12
57
- - 12
58
- - 12
59
- - 12
60
- - 12
61
- - 12
62
- - 12
63
- - 24
64
- - 24
65
- - 24
66
- - 24
67
- - 24
68
- - 24
69
- - 24
70
- - 24
71
- - 48
72
- - 48
73
- - 48
74
- - 48
75
- - 48
76
- - 48
77
- - 48
78
- - 48
79
- - 128
80
- - 129
81
- dim_head: 64
82
- heads: 8
83
- attn_dropout: 0.1
84
- ff_dropout: 0.1
85
- flash_attn: true
86
- dim_freqs_in: 1025
87
- stft_n_fft: 2048
88
- stft_hop_length: 441
89
- stft_win_length: 2048
90
- stft_normalized: false
91
- mask_estimator_depth: 2
92
- multi_stft_resolution_loss_weight: 1.0
93
- multi_stft_resolutions_window_sizes: !!python/tuple
94
- - 4096
95
- - 2048
96
- - 1024
97
- - 512
98
- - 256
99
- multi_stft_hop_size: 147
100
- multi_stft_normalized: False
101
-
102
- training:
103
- batch_size: 16
104
- gradient_accumulation_steps: 1
105
- grad_clip: 0
106
- instruments:
107
- - Vocals
108
- - Instrumental
109
- lr: 5.0e-05
110
- patience: 2
111
- reduce_factor: 0.95
112
- target_instrument: Vocals
113
- num_epochs: 1000
114
- num_steps: 1000
115
- augmentation: false # enable augmentations by audiomentations and pedalboard
116
- augmentation_type: simple1
117
- use_mp3_compress: false # Deprecated
118
- augmentation_mix: true # Mix several stems of the same type with some probability
119
- augmentation_loudness: true # randomly change loudness of each stem
120
- augmentation_loudness_type: 1 # Type 1 or 2
121
- augmentation_loudness_min: 0.5
122
- augmentation_loudness_max: 1.5
123
- q: 0.95
124
- coarse_loss_clip: true
125
- ema_momentum: 0.999
126
- optimizer: adam
127
- other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental
128
- use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
129
-
130
- inference:
131
- batch_size: 1
132
- dim_t: 801
133
- num_overlap: 4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
MDX23v24/model_vocals_segm_models_sdr_9.77.ckpt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:9cb6e969309f96602318fcf5970a6973899db86e5fd9d8f9cf8f15bacdd299bb
3
- size 863683537