nvidia
/

canary-1b-v2

@@ -35,6 +35,639 @@ metrics:
 - comet
 pipeline_tag: automatic-speech-recognition
 library_name: nemo
 ---
 ## <span style="color:#ffb300;">🐤 Canary 1B v2: Multitask Speech Transcription and Translation Model </span>

 - comet
 pipeline_tag: automatic-speech-recognition
 library_name: nemo
+tags:
+  - automatic-speech-recognition
+  - automatic-speech-translation
+  - speech
+  - audio
+  - Transformer
+  - FastConformer
+  - Conformer
+  - pytorch
+  - NeMo
+  - hf-asr-leaderboard
+model-index:
+  - name: canary-1b-v2
+    results:
+      # FLEURS ASR Results
+      - task:
+          type: Automatic Speech Recognition
+          name: automatic-speech-recognition
+        dataset:
+          name: FLEURS
+          type: google/fleurs
+          config: bg_bg
+          split: test
+          args:
+            language: bg
+        metrics:
+          - name: Test WER (Bg)
+            type: wer
+            value: 9.25
+      - task:
+          type: Automatic Speech Recognition
+          name: automatic-speech-recognition
+        dataset:
+          name: FLEURS
+          type: google/fleurs
+          config: cs_cz
+          split: test
+          args:
+            language: cs
+        metrics:
+          - name: Test WER (Cs)
+            type: wer
+            value: 7.86
+      - task:
+          type: Automatic Speech Recognition
+          name: automatic-speech-recognition
+        dataset:
+          name: FLEURS
+          type: google/fleurs
+          config: da_dk
+          split: test
+          args:
+            language: da
+        metrics:
+          - name: Test WER (Da)
+            type: wer
+            value: 11.25
+      - task:
+          type: Automatic Speech Recognition
+          name: automatic-speech-recognition
+        dataset:
+          name: FLEURS
+          type: google/fleurs
+          config: de_de
+          split: test
+          args:
+            language: de
+        metrics:
+          - name: Test WER (De)
+            type: wer
+            value: 4.40
+      - task:
+          type: Automatic Speech Recognition
+          name: automatic-speech-recognition
+        dataset:
+          name: FLEURS
+          type: google/fleurs
+          config: el_gr
+          split: test
+          args:
+            language: el
+        metrics:
+          - name: Test WER (El)
+            type: wer
+            value: 9.21
+      - task:
+          type: Automatic Speech Recognition
+          name: automatic-speech-recognition
+        dataset:
+          name: FLEURS
+          type: google/fleurs
+          config: en_us
+          split: test
+          args:
+            language: en
+        metrics:
+          - name: Test WER (En)
+            type: wer
+            value: 4.50
+      - task:
+          type: Automatic Speech Recognition
+          name: automatic-speech-recognition
+        dataset:
+          name: FLEURS
+          type: google/fleurs
+          config: es_419
+          split: test
+          args:
+            language: es
+        metrics:
+          - name: Test WER (Es)
+            type: wer
+            value: 2.90
+      - task:
+          type: Automatic Speech Recognition
+          name: automatic-speech-recognition
+        dataset:
+          name: FLEURS
+          type: google/fleurs
+          config: et_ee
+          split: test
+          args:
+            language: et
+        metrics:
+          - name: Test WER (Et)
+            type: wer
+            value: 12.55
+      - task:
+          type: Automatic Speech Recognition
+          name: automatic-speech-recognition
+        dataset:
+          name: FLEURS
+          type: google/fleurs
+          config: fi_fi
+          split: test
+          args:
+            language: fi
+        metrics:
+          - name: Test WER (Fi)
+            type: wer
+            value: 8.59
+      - task:
+          type: Automatic Speech Recognition
+          name: automatic-speech-recognition
+        dataset:
+          name: FLEURS
+          type: google/fleurs
+          config: fr_fr
+          split: test
+          args:
+            language: fr
+        metrics:
+          - name: Test WER (Fr)
+            type: wer
+            value: 5.02
+      - task:
+          type: Automatic Speech Recognition
+          name: automatic-speech-recognition
+        dataset:
+          name: FLEURS
+          type: google/fleurs
+          config: hr_hr
+          split: test
+          args:
+            language: hr
+        metrics:
+          - name: Test WER (Hr)
+            type: wer
+            value: 8.29
+      - task:
+          type: Automatic Speech Recognition
+          name: automatic-speech-recognition
+        dataset:
+          name: FLEURS
+          type: google/fleurs
+          config: hu_hu
+          split: test
+          args:
+            language: hu
+        metrics:
+          - name: Test WER (Hu)
+            type: wer
+            value: 12.90
+      - task:
+          type: Automatic Speech Recognition
+          name: automatic-speech-recognition
+        dataset:
+          name: FLEURS
+          type: google/fleurs
+          config: it_it
+          split: test
+          args:
+            language: it
+        metrics:
+          - name: Test WER (It)
+            type: wer
+            value: 3.07
+      - task:
+          type: Automatic Speech Recognition
+          name: automatic-speech-recognition
+        dataset:
+          name: FLEURS
+          type: google/fleurs
+          config: lt_lt
+          split: test
+          args:
+            language: lt
+        metrics:
+          - name: Test WER (Lt)
+            type: wer
+            value: 12.36
+      - task:
+          type: Automatic Speech Recognition
+          name: automatic-speech-recognition
+        dataset:
+          name: FLEURS
+          type: google/fleurs
+          config: lv_lv
+          split: test
+          args:
+            language: lv
+        metrics:
+          - name: Test WER (Lv)
+            type: wer
+            value: 9.66
+      - task:
+          type: Automatic Speech Recognition
+          name: automatic-speech-recognition
+        dataset:
+          name: FLEURS
+          type: google/fleurs
+          config: mt_mt
+          split: test
+          args:
+            language: mt
+        metrics:
+          - name: Test WER (Mt)
+            type: wer
+            value: 18.31
+      - task:
+          type: Automatic Speech Recognition
+          name: automatic-speech-recognition
+        dataset:
+          name: FLEURS
+          type: google/fleurs
+          config: nl_nl
+          split: test
+          args:
+            language: nl
+        metrics:
+          - name: Test WER (Nl)
+            type: wer
+            value: 6.12
+      - task:
+          type: Automatic Speech Recognition
+          name: automatic-speech-recognition
+        dataset:
+          name: FLEURS
+          type: google/fleurs
+          config: pl_pl
+          split: test
+          args:
+            language: pl
+        metrics:
+          - name: Test WER (Pl)
+            type: wer
+            value: 6.64
+      - task:
+          type: Automatic Speech Recognition
+          name: automatic-speech-recognition
+        dataset:
+          name: FLEURS
+          type: google/fleurs
+          config: pt_br
+          split: test
+          args:
+            language: pt
+        metrics:
+          - name: Test WER (Pt)
+            type: wer
+            value: 4.39
+      - task:
+          type: Automatic Speech Recognition
+          name: automatic-speech-recognition
+        dataset:
+          name: FLEURS
+          type: google/fleurs
+          config: ro_ro
+          split: test
+          args:
+            language: ro
+        metrics:
+          - name: Test WER (Ro)
+            type: wer
+            value: 6.61
+      - task:
+          type: Automatic Speech Recognition
+          name: automatic-speech-recognition
+        dataset:
+          name: FLEURS
+          type: google/fleurs
+          config: ru_ru
+          split: test
+          args:
+            language: ru
+        metrics:
+          - name: Test WER (Ru)
+            type: wer
+            value: 6.90
+      - task:
+          type: Automatic Speech Recognition
+          name: automatic-speech-recognition
+        dataset:
+          name: FLEURS
+          type: google/fleurs
+          config: sk_sk
+          split: test
+          args:
+            language: sk
+        metrics:
+          - name: Test WER (Sk)
+            type: wer
+            value: 5.74
+      - task:
+          type: Automatic Speech Recognition
+          name: automatic-speech-recognition
+        dataset:
+          name: FLEURS
+          type: google/fleurs
+          config: sl_si
+          split: test
+          args:
+            language: sl
+        metrics:
+          - name: Test WER (Sl)
+            type: wer
+            value: 13.32
+      - task:
+          type: Automatic Speech Recognition
+          name: automatic-speech-recognition
+        dataset:
+          name: FLEURS
+          type: google/fleurs
+          config: sv_se
+          split: test
+          args:
+            language: sv
+        metrics:
+          - name: Test WER (Sv)
+            type: wer
+            value: 9.57
+      - task:
+          type: Automatic Speech Recognition
+          name: automatic-speech-recognition
+        dataset:
+          name: FLEURS
+          type: google/fleurs
+          config: uk_ua
+          split: test
+          args:
+            language: uk
+        metrics:
+          - name: Test WER (Uk)
+            type: wer
+            value: 10.50
+      # Multilingual LibriSpeech ASR Results
+      - task:
+          type: Automatic Speech Recognition
+          name: automatic-speech-recognition
+        dataset:
+          name: Multilingual LibriSpeech
+          type: facebook/multilingual_librispeech
+          config: spanish
+          split: test
+          args:
+            language: es
+        metrics:
+          - name: Test WER (Es)
+            type: wer
+            value: 2.94
+      - task:
+          type: Automatic Speech Recognition
+          name: automatic-speech-recognition
+        dataset:
+          name: Multilingual LibriSpeech
+          type: facebook/multilingual_librispeech
+          config: french
+          split: test
+          args:
+            language: fr
+        metrics:
+          - name: Test WER (Fr)
+            type: wer
+            value: 3.36
+      - task:
+          type: Automatic Speech Recognition
+          name: automatic-speech-recognition
+        dataset:
+          name: Multilingual LibriSpeech
+          type: facebook/multilingual_librispeech
+          config: italian
+          split: test
+          args:
+            language: it
+        metrics:
+          - name: Test WER (It)
+            type: wer
+            value: 9.16
+      - task:
+          type: Automatic Speech Recognition
+          name: automatic-speech-recognition
+        dataset:
+          name: Multilingual LibriSpeech
+          type: facebook/multilingual_librispeech
+          config: dutch
+          split: test
+          args:
+            language: nl
+        metrics:
+          - name: Test WER (Nl)
+            type: wer
+            value: 11.27
+      - task:
+          type: Automatic Speech Recognition
+          name: automatic-speech-recognition
+        dataset:
+          name: Multilingual LibriSpeech
+          type: facebook/multilingual_librispeech
+          config: polish
+          split: test
+          args:
+            language: pl
+        metrics:
+          - name: Test WER (Pl)
+            type: wer
+            value: 8.77
+      - task:
+          type: Automatic Speech Recognition
+          name: automatic-speech-recognition
+        dataset:
+          name: Multilingual LibriSpeech
+          type: facebook/multilingual_librispeech
+          config: portuguese
+          split: test
+          args:
+            language: pt
+        metrics:
+          - name: Test WER (Pt)
+            type: wer
+            value: 8.14
+      # CoVoST2 ASR Results
+      - task:
+          type: Automatic Speech Recognition
+          name: automatic-speech-recognition
+        dataset:
+          name: CoVoST2
+          type: covost2
+          config: de
+          split: test
+          args:
+            language: de
+        metrics:
+          - name: Test WER (De)
+            type: wer
+            value: 5.53
+      - task:
+          type: Automatic Speech Recognition
+          name: automatic-speech-recognition
+        dataset:
+          name: CoVoST2
+          type: covost2
+          config: en
+          split: test
+          args:
+            language: en
+        metrics:
+          - name: Test WER (En)
+            type: wer
+            value: 6.85
+      - task:
+          type: Automatic Speech Recognition
+          name: automatic-speech-recognition
+        dataset:
+          name: CoVoST2
+          type: covost2
+          config: es
+          split: test
+          args:
+            language: es
+        metrics:
+          - name: Test WER (Es)
+            type: wer
+            value: 3.81
+      - task:
+          type: Automatic Speech Recognition
+          name: automatic-speech-recognition
+        dataset:
+          name: CoVoST2
+          type: covost2
+          config: et
+          split: test
+          args:
+            language: et
+        metrics:
+          - name: Test WER (Et)
+            type: wer
+            value: 18.28
+      - task:
+          type: Automatic Speech Recognition
+          name: automatic-speech-recognition
+        dataset:
+          name: CoVoST2
+          type: covost2
+          config: fr
+          split: test
+          args:
+            language: fr
+        metrics:
+          - name: Test WER (Fr)
+            type: wer
+            value: 6.30
+      - task:
+          type: Automatic Speech Recognition
+          name: automatic-speech-recognition
+        dataset:
+          name: CoVoST2
+          type: covost2
+          config: it
+          split: test
+          args:
+            language: it
+        metrics:
+          - name: Test WER (It)
+            type: wer
+            value: 4.80
+      - task:
+          type: Automatic Speech Recognition
+          name: automatic-speech-recognition
+        dataset:
+          name: CoVoST2
+          type: covost2
+          config: lv
+          split: test
+          args:
+            language: lv
+        metrics:
+          - name: Test WER (Lv)
+            type: wer
+            value: 11.49
+      - task:
+          type: Automatic Speech Recognition
+          name: automatic-speech-recognition
+        dataset:
+          name: CoVoST2
+          type: covost2
+          config: nl
+          split: test
+          args:
+            language: nl
+        metrics:
+          - name: Test WER (Nl)
+            type: wer
+            value: 6.93
+      - task:
+          type: Automatic Speech Recognition
+          name: automatic-speech-recognition
+        dataset:
+          name: CoVoST2
+          type: covost2
+          config: pt
+          split: test
+          args:
+            language: pt
+        metrics:
+          - name: Test WER (Pt)
+            type: wer
+            value: 6.87
+      - task:
+          type: Automatic Speech Recognition
+          name: automatic-speech-recognition
+        dataset:
+          name: CoVoST2
+          type: covost2
+          config: ru
+          split: test
+          args:
+            language: ru
+        metrics:
+          - name: Test WER (Ru)
+            type: wer
+            value: 5.14
+      - task:
+          type: Automatic Speech Recognition
+          name: automatic-speech-recognition
+        dataset:
+          name: CoVoST2
+          type: covost2
+          config: sl
+          split: test
+          args:
+            language: sl
+        metrics:
+          - name: Test WER (Sl)
+            type: wer
+            value: 7.59
+      - task:
+          type: Automatic Speech Recognition
+          name: automatic-speech-recognition
+        dataset:
+          name: CoVoST2
+          type: covost2
+          config: sv
+          split: test
+          args:
+            language: sv
+        metrics:
+          - name: Test WER (Sv)
+            type: wer
+            value: 13.32
+      - task:
+          type: Automatic Speech Recognition
+          name: automatic-speech-recognition
+        dataset:
+          name: CoVoST2
+          type: covost2
+          config: uk
+          split: test
+          args:
+            language: uk
+        metrics:
+          - name: Test WER (Uk)
+            type: wer
+            value: 18.15
 ---
 ## <span style="color:#ffb300;">🐤 Canary 1B v2: Multitask Speech Transcription and Translation Model </span>