Delete MDX23v24

Browse files

Files changed (6) hide show

MDX23v24/config_vocals_segm_models.yaml +0 -48
MDX23v24/model.safetensors +0 -3
MDX23v24/model_2_stem_061321.yaml +0 -36
MDX23v24/model_2_stem_full_band_8k.yaml +0 -43
MDX23v24/model_bs_roformer_ep_317_sdr_12.9755.yaml +0 -133
MDX23v24/model_vocals_segm_models_sdr_9.77.ckpt +0 -3

MDX23v24/config_vocals_segm_models.yaml DELETED Viewed

@@ -1,48 +0,0 @@
-audio:
-  chunk_size: 261632
-  dim_f: 4096
-  dim_t: 512
-  hop_length: 512
-  n_fft: 8192
-  num_channels: 2
-  sample_rate: 44100
-  min_mean_abs: 0.001
-model:
-  encoder_name: tu-maxvit_large_tf_512 # look here for possibilities: https://github.com/qubvel/segmentation_models.pytorch#encoders-
-  decoder_type: unet # unet, fpn
-  act: gelu
-  num_channels: 128
-  num_subbands: 8
-training:
-  batch_size: 8
-  gradient_accumulation_steps: 1
-  grad_clip: 0
-  instruments:
-  - vocals
-  - other
-  lr: 5.0e-05
-  patience: 2
-  reduce_factor: 0.95
-  target_instrument: null
-  num_epochs: 1000
-  num_steps: 2000
-  augmentation: false # enable augmentations by audiomentations and pedalboard
-  augmentation_type: simple1
-  use_mp3_compress: false # Deprecated
-  augmentation_mix: true # Mix several stems of the same type with some probability
-  augmentation_loudness: true # randomly change loudness of each stem
-  augmentation_loudness_type: 1 # Type 1 or 2
-  augmentation_loudness_min: 0.5
-  augmentation_loudness_max: 1.5
-  q: 0.95
-  coarse_loss_clip: true
-  ema_momentum: 0.999
-  optimizer: adamw
-  other_fix: true # it's needed for checking on multisong dataset if other is actually instrumental
-inference:
-  batch_size: 1
-  dim_t: 512
-  num_overlap: 4

MDX23v24/model.safetensors DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:7c44aad6e89377d68458a95c6356730f14a1c742a10b6426608c50199e86fb04
-size 850242572

MDX23v24/model_2_stem_061321.yaml DELETED Viewed

@@ -1,36 +0,0 @@
-audio:
-  chunk_size: 260096
-  dim_f: 4096
-  dim_t: 256
-  hop_length: 2048
-  n_fft: 12288
-  num_channels: 2
-  sample_rate: 44100
-  min_mean_abs: 0.001
-model:
-  act: gelu
-  bottleneck_factor: 4
-  growth: 64
-  norm: InstanceNorm
-  num_blocks_per_scale: 2
-  num_channels: 128
-  num_scales: 5
-  num_subbands: 4
-  scale:
-  - 2
-  - 2
-  name: epoch_10.ckpt
-training:
-  batch_size: 16
-  grad_clip: 0
-  instruments:
-  - Vocals
-  - Instrumental
-  lr: 5.0e-05
-  target_instrument: null
-  num_epochs: 100
-  num_steps: 1000
-inference:
-  batch_size: 1
-  dim_t: 256
-  num_overlap: 8

MDX23v24/model_2_stem_full_band_8k.yaml DELETED Viewed

@@ -1,43 +0,0 @@
-audio:
-  chunk_size: 261120
-  dim_f: 4096
-  dim_t: 256
-  hop_length: 1024
-  n_fft: 8192
-  num_channels: 2
-  sample_rate: 44100
-  min_mean_abs: 0.001
-model:
-  act: gelu
-  bottleneck_factor: 4
-  growth: 128
-  norm: InstanceNorm
-  num_blocks_per_scale: 2
-  num_channels: 128
-  num_scales: 5
-  num_subbands: 4
-  scale:
-  - 2
-  - 2
-training:
-  batch_size: 6
-  grad_clip: 0
-  instruments:
-  - Vocals
-  - Instrumental
-  lr: 1.0e-05
-  patience: 2
-  reduce_factor: 0.95
-  target_instrument: null
-  num_epochs: 1000
-  num_steps: 1000
-  augmentation: 1
-  augmentation_type: simple1
-  augmentation_mix: true
-  q: 0.95
-  coarse_loss_clip: true
-  ema_momentum: 0.999
-inference:
-  batch_size: 1
-  dim_t: 256
-  num_overlap: 8

MDX23v24/model_bs_roformer_ep_317_sdr_12.9755.yaml DELETED Viewed

@@ -1,133 +0,0 @@
-audio:
-  chunk_size: 352800
-  dim_f: 1024
-  dim_t: 801 # don't work (use in model)
-  hop_length: 441 # don't work (use in model)
-  n_fft: 2048
-  num_channels: 2
-  sample_rate: 44100
-  min_mean_abs: 0.001
-model:
-  dim: 512
-  depth: 12
-  stereo: true
-  num_stems: 1
-  time_transformer_depth: 1
-  freq_transformer_depth: 1
-  freqs_per_bands: !!python/tuple
-    - 2
-    - 2
-    - 2
-    - 2
-    - 2
-    - 2
-    - 2
-    - 2
-    - 2
-    - 2
-    - 2
-    - 2
-    - 2
-    - 2
-    - 2
-    - 2
-    - 2
-    - 2
-    - 2
-    - 2
-    - 2
-    - 2
-    - 2
-    - 2
-    - 4
-    - 4
-    - 4
-    - 4
-    - 4
-    - 4
-    - 4
-    - 4
-    - 4
-    - 4
-    - 4
-    - 4
-    - 12
-    - 12
-    - 12
-    - 12
-    - 12
-    - 12
-    - 12
-    - 12
-    - 24
-    - 24
-    - 24
-    - 24
-    - 24
-    - 24
-    - 24
-    - 24
-    - 48
-    - 48
-    - 48
-    - 48
-    - 48
-    - 48
-    - 48
-    - 48
-    - 128
-    - 129
-  dim_head: 64
-  heads: 8
-  attn_dropout: 0.1
-  ff_dropout: 0.1
-  flash_attn: true
-  dim_freqs_in: 1025
-  stft_n_fft: 2048
-  stft_hop_length: 441
-  stft_win_length: 2048
-  stft_normalized: false
-  mask_estimator_depth: 2
-  multi_stft_resolution_loss_weight: 1.0
-  multi_stft_resolutions_window_sizes: !!python/tuple
-  - 4096
-  - 2048
-  - 1024
-  - 512
-  - 256
-  multi_stft_hop_size: 147
-  multi_stft_normalized: False
-training:
-  batch_size: 16
-  gradient_accumulation_steps: 1
-  grad_clip: 0
-  instruments:
-  - Vocals
-  - Instrumental
-  lr: 5.0e-05
-  patience: 2
-  reduce_factor: 0.95
-  target_instrument: Vocals
-  num_epochs: 1000
-  num_steps: 1000
-  augmentation: false # enable augmentations by audiomentations and pedalboard
-  augmentation_type: simple1
-  use_mp3_compress: false # Deprecated
-  augmentation_mix: true # Mix several stems of the same type with some probability
-  augmentation_loudness: true # randomly change loudness of each stem
-  augmentation_loudness_type: 1 # Type 1 or 2
-  augmentation_loudness_min: 0.5
-  augmentation_loudness_max: 1.5
-  q: 0.95
-  coarse_loss_clip: true
-  ema_momentum: 0.999
-  optimizer: adam
-  other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental
-  use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
-inference:
-  batch_size: 1
-  dim_t: 801
-  num_overlap: 4

MDX23v24/model_vocals_segm_models_sdr_9.77.ckpt DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:9cb6e969309f96602318fcf5970a6973899db86e5fd9d8f9cf8f15bacdd299bb
-size 863683537