pere commited on
Commit
66c89b8
·
1 Parent(s): e299e7f
Files changed (2) hide show
  1. distil-whisper +1 -0
  2. run_large_training.sh +5 -5
distil-whisper ADDED
@@ -0,0 +1 @@
 
 
1
+ Subproject commit a36e6353d80218e98b78eddb75455babc84f080d
run_large_training.sh CHANGED
@@ -3,11 +3,11 @@ TOKENIZERS_PARALLELISM=false python3 run_distillation_nodes.py \
3
  --model_name_or_path "./nb-distil-large-init" \
4
  --teacher_model_name_or_path "NbAiLab/nb-whisper-large" \
5
  --train_dataset_name "NbAiLab/annotated_distil_raw_ncc_speech_v7_large" \
6
- --train_dataset_config_name "no" \
7
  --train_split_name "train" \
8
  --eval_dataset_name "NbAiLab/annotated_distil_raw_ncc_speech_v7_large" \
9
- --eval_dataset_config_name "no" \
10
- --eval_split_name "validation_norwegian_fleurs" \
11
  --eval_steps 500 \
12
  --save_steps 1000 \
13
  --warmup_steps 1000 \
@@ -17,8 +17,8 @@ TOKENIZERS_PARALLELISM=false python3 run_distillation_nodes.py \
17
  --save_total_limit 1 \
18
  --max_steps 100000 \
19
  --wer_threshold 10 \
20
- --per_device_train_batch_size 32\
21
- --per_device_eval_batch_size 32 \
22
  --dataloader_num_workers 32 \
23
  --dtype "bfloat16" \
24
  --output_dir "./" \
 
3
  --model_name_or_path "./nb-distil-large-init" \
4
  --teacher_model_name_or_path "NbAiLab/nb-whisper-large" \
5
  --train_dataset_name "NbAiLab/annotated_distil_raw_ncc_speech_v7_large" \
6
+ --train_dataset_config_name "" \
7
  --train_split_name "train" \
8
  --eval_dataset_name "NbAiLab/annotated_distil_raw_ncc_speech_v7_large" \
9
+ --eval_dataset_config_name "" \
10
+ --eval_split_name "validation_norwegian" \
11
  --eval_steps 500 \
12
  --save_steps 1000 \
13
  --warmup_steps 1000 \
 
17
  --save_total_limit 1 \
18
  --max_steps 100000 \
19
  --wer_threshold 10 \
20
+ --per_device_train_batch_size 16\
21
+ --per_device_eval_batch_size 16 \
22
  --dataloader_num_workers 32 \
23
  --dtype "bfloat16" \
24
  --output_dir "./" \