|
|
|
|
|
AudioDataModule.num_workers = 20 |
|
|
|
|
|
|
|
AudioDataset.half_precision = True |
|
AudioDataset.mono = True |
|
AudioDataset.new_freq = 16000 |
|
AudioDataset.num_frames = 480000 |
|
AudioDataset.orig_freq = 16000 |
|
|
|
|
|
|
|
build_dev_datamodule.datamodule = @discotube |
|
|
|
|
|
|
|
build_module.ckpt_path = 'model.ckpt' |
|
build_module.module = @modules.maskingmodel.MaskingModel |
|
build_module.net = @nets.conformer.Conformer |
|
build_module.representation = @nets.melspectrogram.MelSpectrogram |
|
|
|
|
|
|
|
Conformer.alpha_deepnorm = 2.6321480259049848 |
|
Conformer.beta_deepnorm = 0.022386873579657126 |
|
Conformer.conv_kernel_size = 5 |
|
Conformer.depth = 24 |
|
Conformer.dropout = 0.2 |
|
Conformer.embed_dim = 1024 |
|
Conformer.input_dropout = 0.0 |
|
Conformer.mlp_ratio = 4.0 |
|
Conformer.mlp_residual_factor = 4.0 |
|
Conformer.num_heads = 8 |
|
Conformer.num_patches = 460 |
|
Conformer.use_deepnorm = True |
|
Conformer.use_rope = True |
|
|
|
|
|
|
|
CosineAnnealingCallback.eta_min = 1e-07 |
|
CosineAnnealingCallback.warmup_steps = 30000 |
|
|
|
|
|
|
|
DiscotubeAudioDataModule.batch_size = 32 |
|
DiscotubeAudioDataModule.data_dir = '' |
|
DiscotubeAudioDataModule.filelist_train = '' |
|
DiscotubeAudioDataModule.filelist_val = '' |
|
|
|
|
|
|
|
MaskingModel.codebook_dim = 16 |
|
MaskingModel.codebook_size = 8192 |
|
MaskingModel.diff_input = False |
|
MaskingModel.lr = 0.0001 |
|
MaskingModel.mask_prob = 0.6 |
|
MaskingModel.mask_seconds = 0.4 |
|
MaskingModel.num_codebooks = 4 |
|
MaskingModel.plot_tokens = False |
|
MaskingModel.seed = 0 |
|
MaskingModel.weight_decay = 0.01 |
|
|
|
|
|
|
|
MelSpectrogram.freq_mask_param = 0 |
|
MelSpectrogram.hop_len = 256 |
|
MelSpectrogram.mel_scale = 'slaney' |
|
MelSpectrogram.n_mel = 96 |
|
MelSpectrogram.norm = 'slaney' |
|
MelSpectrogram.norm_mean = 2.06755686098554 |
|
MelSpectrogram.norm_std = 1.268292820667291 |
|
MelSpectrogram.power = 2 |
|
MelSpectrogram.sr = 16000 |
|
MelSpectrogram.stretch_factor = 1 |
|
MelSpectrogram.time_mask_param = 0 |
|
MelSpectrogram.win_len = 512 |
|
MelSpectrogram.patch_size = (96, 4) |
|
|
|
|
|
|
|
train.params = \ |
|
{'accelerator': 'gpu', |
|
'devices': 4, |
|
'log_every_n_steps': 50, |
|
'max_steps': 400000, |
|
'num_nodes': 1, |
|
'num_sanity_val_steps': 0, |
|
'precision': 'bf16-mixed', |
|
'strategy': 'ddp_find_unused_parameters_true'} |
|
train.wandb_params = \ |
|
{'entity': 'mtg-upf', |
|
'group': 'masking_conformer', |
|
'name': 'mask_conformer_rope_multi4_large', |
|
'offline': True, |
|
'project': 'mtg-ssl', |
|
'save_dir': '/gpfs/projects/upf97/logs/'} |
|
|