File size: 3,459 Bytes
cb49d44
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
# Parameters for AudioDataModule:
# ==============================================================================
AudioDataModule.num_workers = 20

# Parameters for AudioDataset:
# ==============================================================================
AudioDataset.half_precision = True
AudioDataset.mono = True
AudioDataset.new_freq = 16000
AudioDataset.num_frames = 480000
AudioDataset.orig_freq = 16000

# Parameters for build_dev_datamodule:
# ==============================================================================
build_dev_datamodule.datamodule = @discotube

# Parameters for build_module:
# ==============================================================================
build_module.ckpt_path = 'model.ckpt'
build_module.module = @modules.maskingmodel.MaskingModel
build_module.net = @nets.conformer.Conformer
build_module.representation = @nets.melspectrogram.MelSpectrogram

# Parameters for Conformer:
# ==============================================================================
Conformer.alpha_deepnorm = 2.6321480259049848
Conformer.beta_deepnorm = 0.022386873579657126
Conformer.conv_kernel_size = 5
Conformer.depth = 24
Conformer.dropout = 0.2
Conformer.embed_dim = 1024
Conformer.input_dropout = 0.0
Conformer.mlp_ratio = 4.0
Conformer.mlp_residual_factor = 4.0
Conformer.num_heads = 8
Conformer.num_patches = 460
Conformer.use_deepnorm = True
Conformer.use_rope = True

# Parameters for CosineAnnealingCallback:
# ==============================================================================
CosineAnnealingCallback.eta_min = 1e-07
CosineAnnealingCallback.warmup_steps = 30000

# Parameters for DiscotubeAudioDataModule:
# ==============================================================================
DiscotubeAudioDataModule.batch_size = 32
DiscotubeAudioDataModule.data_dir = ''
DiscotubeAudioDataModule.filelist_train = ''
DiscotubeAudioDataModule.filelist_val = ''

# Parameters for MaskingModel:
# ==============================================================================
MaskingModel.codebook_dim = 16
MaskingModel.codebook_size = 8192
MaskingModel.diff_input = False
MaskingModel.lr = 0.0001
MaskingModel.mask_prob = 0.6
MaskingModel.mask_seconds = 0.4
MaskingModel.num_codebooks = 4
MaskingModel.plot_tokens = False
MaskingModel.seed = 0
MaskingModel.weight_decay = 0.01

# Parameters for MelSpectrogram:
# ==============================================================================
MelSpectrogram.freq_mask_param = 0
MelSpectrogram.hop_len = 256
MelSpectrogram.mel_scale = 'slaney'
MelSpectrogram.n_mel = 96
MelSpectrogram.norm = 'slaney'
MelSpectrogram.norm_mean = 2.06755686098554
MelSpectrogram.norm_std = 1.268292820667291
MelSpectrogram.power = 2
MelSpectrogram.sr = 16000
MelSpectrogram.stretch_factor = 1
MelSpectrogram.time_mask_param = 0
MelSpectrogram.win_len = 512
MelSpectrogram.patch_size = (96, 4)

# Parameters for train:
# ==============================================================================
train.params = \
    {'accelerator': 'gpu',
     'devices': 4,
     'log_every_n_steps': 50,
     'max_steps': 400000,
     'num_nodes': 1,
     'num_sanity_val_steps': 0,
     'precision': 'bf16-mixed',
     'strategy': 'ddp_find_unused_parameters_true'}
train.wandb_params = \
    {'entity': 'mtg-upf',
     'group': 'masking_conformer',
     'name': 'mask_conformer_rope_multi4_large',
     'offline': True,
     'project': 'mtg-ssl',
     'save_dir': '/gpfs/projects/upf97/logs/'}