omar-rq-multicodebook / config.gin
p-alonso's picture
Upload folder using huggingface_hub
56ef1b1 verified
# Parameters for AudioDataModule:
# ==============================================================================
AudioDataModule.num_workers = 20
# Parameters for AudioDataset:
# ==============================================================================
AudioDataset.half_precision = True
AudioDataset.mono = True
AudioDataset.new_freq = 16000
AudioDataset.num_frames = 480000
AudioDataset.orig_freq = 16000
# Parameters for build_dev_datamodule:
# ==============================================================================
build_dev_datamodule.datamodule = @discotube
# Parameters for build_module:
# ==============================================================================
build_module.ckpt_path = 'model.ckpt'
build_module.module = @modules.maskingmodel.MaskingModel
build_module.net = @nets.conformer.Conformer
build_module.representation = @nets.melspectrogram.MelSpectrogram
# Parameters for Conformer:
# ==============================================================================
Conformer.alpha_deepnorm = 2.6321480259049848
Conformer.beta_deepnorm = 0.022386873579657126
Conformer.conv_kernel_size = 5
Conformer.depth = 24
Conformer.dropout = 0.2
Conformer.embed_dim = 1024
Conformer.input_dropout = 0.0
Conformer.mlp_ratio = 4.0
Conformer.mlp_residual_factor = 4.0
Conformer.num_heads = 8
Conformer.num_patches = 460
Conformer.use_deepnorm = True
Conformer.use_rope = True
# Parameters for CosineAnnealingCallback:
# ==============================================================================
CosineAnnealingCallback.eta_min = 1e-07
CosineAnnealingCallback.warmup_steps = 30000
# Parameters for DiscotubeAudioDataModule:
# ==============================================================================
DiscotubeAudioDataModule.batch_size = 32
DiscotubeAudioDataModule.data_dir = ''
DiscotubeAudioDataModule.filelist_train = ''
DiscotubeAudioDataModule.filelist_val = ''
# Parameters for MaskingModel:
# ==============================================================================
MaskingModel.codebook_dim = 16
MaskingModel.codebook_size = 8192
MaskingModel.diff_input = False
MaskingModel.lr = 0.0001
MaskingModel.mask_prob = 0.6
MaskingModel.mask_seconds = 0.4
MaskingModel.num_codebooks = 4
MaskingModel.plot_tokens = False
MaskingModel.seed = 0
MaskingModel.weight_decay = 0.01
# Parameters for MelSpectrogram:
# ==============================================================================
MelSpectrogram.freq_mask_param = 0
MelSpectrogram.hop_len = 256
MelSpectrogram.mel_scale = 'slaney'
MelSpectrogram.n_mel = 96
MelSpectrogram.norm = 'slaney'
MelSpectrogram.norm_mean = 2.06755686098554
MelSpectrogram.norm_std = 1.268292820667291
MelSpectrogram.power = 2
MelSpectrogram.sr = 16000
MelSpectrogram.stretch_factor = 1
MelSpectrogram.time_mask_param = 0
MelSpectrogram.win_len = 512
MelSpectrogram.patch_size = (96, 4)
# Parameters for train:
# ==============================================================================
train.params = \
{'accelerator': 'gpu',
'devices': 4,
'log_every_n_steps': 50,
'max_steps': 400000,
'num_nodes': 1,
'num_sanity_val_steps': 0,
'precision': 'bf16-mixed',
'strategy': 'ddp_find_unused_parameters_true'}
train.wandb_params = \
{'entity': 'mtg-upf',
'group': 'masking_conformer',
'name': 'mask_conformer_rope_multi4_large',
'offline': True,
'project': 'mtg-ssl',
'save_dir': '/gpfs/projects/upf97/logs/'}