Create hyperparams.yaml
Browse files- hyperparams.yaml +0 -49
hyperparams.yaml
CHANGED
|
@@ -10,31 +10,6 @@
|
|
| 10 |
|
| 11 |
save_folder: !ref librispeech-streaming-conformer-transducer
|
| 12 |
|
| 13 |
-
# Training parameters
|
| 14 |
-
# To make Transformers converge, the global bath size should be large enough.
|
| 15 |
-
# The global batch size is computed as batch_size * n_gpus * grad_accumulation_factor.
|
| 16 |
-
# Empirically, we found that this value should be >= 128.
|
| 17 |
-
# Please, set your parameters accordingly.
|
| 18 |
-
number_of_epochs: 50
|
| 19 |
-
warmup_steps: 25000
|
| 20 |
-
num_workers: 4
|
| 21 |
-
batch_size_valid: 4
|
| 22 |
-
lr: 0.0008
|
| 23 |
-
weight_decay: 0.01
|
| 24 |
-
number_of_ctc_epochs: 40
|
| 25 |
-
ctc_weight: 0.3 # Multitask with CTC for the encoder (0.0 = disabled)
|
| 26 |
-
ce_weight: 0.0 # Multitask with CE for the decoder (0.0 = disabled)
|
| 27 |
-
max_grad_norm: 5.0
|
| 28 |
-
loss_reduction: 'batchmean'
|
| 29 |
-
precision: fp32 # bf16, fp16 or fp32
|
| 30 |
-
|
| 31 |
-
# The batch size is used if and only if dynamic batching is set to False
|
| 32 |
-
# Validation and testing are done with fixed batches and not dynamic batching.
|
| 33 |
-
batch_size: 8
|
| 34 |
-
grad_accumulation_factor: 4
|
| 35 |
-
sorting: ascending
|
| 36 |
-
avg_checkpoints: 10 # Number of checkpoints to average for evaluation
|
| 37 |
-
|
| 38 |
# Feature parameters
|
| 39 |
sample_rate: 16000
|
| 40 |
n_fft: 512
|
|
@@ -44,22 +19,6 @@ win_length: 32
|
|
| 44 |
# Streaming
|
| 45 |
streaming: True # controls all Dynamic Chunk Training & chunk size & left context mechanisms
|
| 46 |
|
| 47 |
-
# This setup works well for 3090 24GB GPU, adapt it to your needs.
|
| 48 |
-
# Adjust grad_accumulation_factor depending on the DDP node count (here 3)
|
| 49 |
-
# Or turn it off (but training speed will decrease)
|
| 50 |
-
dynamic_batching: True
|
| 51 |
-
max_batch_len: 250
|
| 52 |
-
max_batch_len_val: 50 # we reduce it as the beam is much wider (VRAM)
|
| 53 |
-
num_bucket: 200
|
| 54 |
-
|
| 55 |
-
dynamic_batch_sampler:
|
| 56 |
-
max_batch_len: !ref <max_batch_len>
|
| 57 |
-
max_batch_len_val: !ref <max_batch_len_val>
|
| 58 |
-
num_buckets: !ref <num_bucket>
|
| 59 |
-
shuffle_ex: True # if true re-creates batches at each epoch shuffling examples.
|
| 60 |
-
batch_ordering: random
|
| 61 |
-
max_batch_ex: 256
|
| 62 |
-
|
| 63 |
# Model parameters
|
| 64 |
# Transformer
|
| 65 |
d_model: 512
|
|
@@ -88,9 +47,6 @@ state_beam: 2.3
|
|
| 88 |
expand_beam: 2.3
|
| 89 |
lm_weight: 0.50
|
| 90 |
|
| 91 |
-
epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
|
| 92 |
-
limit: !ref <number_of_epochs>
|
| 93 |
-
|
| 94 |
normalize: !new:speechbrain.processing.features.InputNormalization
|
| 95 |
norm_type: global
|
| 96 |
update_until_epoch: 4
|
|
@@ -146,11 +102,6 @@ proj_dec: !new:speechbrain.nnet.linear.Linear
|
|
| 146 |
n_neurons: !ref <joint_dim>
|
| 147 |
bias: False
|
| 148 |
|
| 149 |
-
# Uncomment for MTL with CTC
|
| 150 |
-
ctc_cost: !name:speechbrain.nnet.losses.ctc_loss
|
| 151 |
-
blank_index: !ref <blank_index>
|
| 152 |
-
reduction: !ref <loss_reduction>
|
| 153 |
-
|
| 154 |
emb: !new:speechbrain.nnet.embedding.Embedding
|
| 155 |
num_embeddings: !ref <output_neurons>
|
| 156 |
consider_as_one_hot: True
|
|
|
|
| 10 |
|
| 11 |
save_folder: !ref librispeech-streaming-conformer-transducer
|
| 12 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
# Feature parameters
|
| 14 |
sample_rate: 16000
|
| 15 |
n_fft: 512
|
|
|
|
| 19 |
# Streaming
|
| 20 |
streaming: True # controls all Dynamic Chunk Training & chunk size & left context mechanisms
|
| 21 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
# Model parameters
|
| 23 |
# Transformer
|
| 24 |
d_model: 512
|
|
|
|
| 47 |
expand_beam: 2.3
|
| 48 |
lm_weight: 0.50
|
| 49 |
|
|
|
|
|
|
|
|
|
|
| 50 |
normalize: !new:speechbrain.processing.features.InputNormalization
|
| 51 |
norm_type: global
|
| 52 |
update_until_epoch: 4
|
|
|
|
| 102 |
n_neurons: !ref <joint_dim>
|
| 103 |
bias: False
|
| 104 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 105 |
emb: !new:speechbrain.nnet.embedding.Embedding
|
| 106 |
num_embeddings: !ref <output_neurons>
|
| 107 |
consider_as_one_hot: True
|