| generator: | |
| name: SoundStream | |
| config: | |
| n_filters: 32 | |
| D: 256 | |
| target_bandwidths: | |
| - 0.5 | |
| - 1 | |
| - 1.5 | |
| - 2 | |
| - 4 | |
| - 6 | |
| ratios: | |
| - 8 | |
| - 5 | |
| - 4 | |
| - 2 | |
| sample_rate: 16000 | |
| bins: 1024 | |
| d_list: | |
| - mfd | |
| mfd: | |
| name: MultiFrequencyDiscriminator | |
| config: | |
| hop_lengths: | |
| - 32 | |
| - 64 | |
| - 128 | |
| - 256 | |
| - 512 | |
| - 1024 | |
| hidden_channels: | |
| - 64 | |
| - 128 | |
| - 256 | |
| - 512 | |
| - 512 | |
| - 512 | |
| domain: double | |
| mel_scale: true | |
| sample_rate: 16000 | |
| mpd: | |
| name: MultiPeriodDiscriminator | |
| config: | |
| period_sizes: | |
| - 2 | |
| - 3 | |
| - 5 | |
| - 7 | |
| - 11 | |
| period_kernel_size: 5 | |
| msd: | |
| name: MultiScaleDiscriminator | |
| config: | |
| num_scales: 3 | |
| pool_kernel_size: 4 | |
| pool_stride: 2 | |
| optimizer: | |
| g: | |
| name: AdamW | |
| config: | |
| lr: 0.0002 | |
| betas: | |
| - 0.8 | |
| - 0.99 | |
| eps: 1.0e-06 | |
| d: | |
| name: AdamW | |
| config: | |
| lr: 0.0002 | |
| betas: | |
| - 0.8 | |
| - 0.99 | |
| eps: 1.0e-06 | |
| lr_scheduler: | |
| g: | |
| name: ExponentialLR | |
| config: | |
| gamma: 0.999 | |
| d: | |
| name: ExponentialLR | |
| config: | |
| gamma: 0.999 | |
| criterion: | |
| g_criterion: | |
| name: losses.generator_loss.GeneratorSTFTLoss | |
| config: | |
| use_mel_loss: false | |
| adv_criterion: MSEGLoss | |
| mel_loss_weight: 45 | |
| use_feature_match: true | |
| feat_match_loss_weight: 20 | |
| use_full_stft_loss: true | |
| use_sub_stft_loss: true | |
| full_stft_loss_weight: 1 | |
| sub_stft_loss_weight: 1 | |
| mel_scale_loss: | |
| sampling_rate: 16000 | |
| n_fft: 1024 | |
| num_mels: 80 | |
| hop_size: 160 | |
| win_size: 800 | |
| fmin: 0 | |
| full_multi_scale_stft_loss: | |
| fft_sizes: | |
| - 512 | |
| - 1024 | |
| - 2048 | |
| win_sizes: | |
| - 480 | |
| - 960 | |
| - 1200 | |
| hop_sizes: | |
| - 120 | |
| - 240 | |
| - 300 | |
| sub_multi_scale_stft_loss: | |
| num_bands: 6 | |
| fft_sizes: | |
| - 128 | |
| - 256 | |
| - 256 | |
| win_sizes: | |
| - 80 | |
| - 120 | |
| - 200 | |
| hop_sizes: | |
| - 20 | |
| - 40 | |
| - 50 | |
| d_criterion: | |
| name: losses.discriminator_loss.MSEDiscriminatorLoss | |
| config: null | |
| commit_loss_weight: 1.0 | |
| codebook_loss_weight: 75 | |
| training_file: /aifs4su/data/zheny/fairseq/vae_v2/codec_final/list/train.txt | |
| validation_file: /aifs4su/data/zheny/fairseq/vae_v2/codec_final/list/valid.txt | |
| seed: 2333 | |
| cudnn_deterministic: false | |
| tensorboard: true | |
| checkpoint_interval: 5000 | |
| summary_interval: 100 | |
| validation_interval: 500 | |
| num_epoches: 20 | |
| print_freq: 10 | |
| discriminator_iter_start: 0 | |
| num_ckpt_keep: 10 | |
| segment_size: 16000 | |
| audio_norm_scale: 0.95 | |
| batch_size: 48 | |
| num_workers: 8 | |
| num_plots: 8 | |
| local_rank: 1000000 | |
| basic_model_config: config/codec_16k_6kbps_v3_vqdp.yaml | |
| exp_model_config: null | |
| log_dir: 0518_20w_ckpts | |
| ngpus_per_node: 8 | |
| sample_rate: 16000 | |
| model_ckpt_dir: 0518_20w_ckpts/model_ckpts | |