Automatic Speech Recognition
Transformers
PyTorch
TensorBoard
whisper
whisper-event
Generated from Trainer
hf-asr-leaderboard
Eval Results (legacy)
Instructions to use arbml/whisper-small-ar with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use arbml/whisper-small-ar with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("automatic-speech-recognition", model="arbml/whisper-small-ar")# Load model directly from transformers import AutoProcessor, AutoModelForSpeechSeq2Seq processor = AutoProcessor.from_pretrained("arbml/whisper-small-ar") model = AutoModelForSpeechSeq2Seq.from_pretrained("arbml/whisper-small-ar") - Notebooks
- Google Colab
- Kaggle
| from pathlib import Path | |
| import soundfile as sf | |
| import xml.etree.ElementTree as ET | |
| split = "train" # or "dev" | |
| # set the following path to where you | |
| # extracted the mgb2 archive | |
| archive_path = Path("data/train") | |
| wav_dir = archive_path / "wav" | |
| segments_file = archive_path / "xml" / "utf8" | |
| # output directories | |
| output_wav_dir = archive_path / "dataset" / split /"wav" | |
| output_txt_dir = archive_path / "dataset" / split /"txt" | |
| # create directories for output datasets | |
| output_wav_dir.mkdir(parents=True, exist_ok=True) | |
| output_txt_dir.mkdir(parents=True, exist_ok=True) | |
| # for all xml segments files under utf8 directory from archive | |
| for s_file in segments_file.glob("*.xml"): | |
| tree = ET.parse(str(s_file)) | |
| root = tree.getroot() | |
| head = root[0] | |
| segments = root[1][0] | |
| # get the name of the wav file form the recording tag | |
| for child in head: | |
| if child.tag == "recording": | |
| print(child.attrib) | |
| file_name = child.attrib.get("filename") | |
| # get the start and end times from the segment under segments tag | |
| # and join the text from each segment to construct the transcript | |
| for segment in segments: | |
| start_time = int(float(segment.attrib.get("starttime")) *16_000) | |
| end_time = int(float(segment.attrib.get("endtime")) * 16_000) | |
| text = " ".join([x.text for x in segment]) | |
| # now store the meta data and the correctly sampled wav file in the correct | |
| # output directories | |
| wav_path = wav_dir / f"{file_name}.wav" | |
| sound, _ = sf.read(wav_path, start=start_time, stop=end_time) | |
| sf.write(output_wav_dir / f"{file_name}_seg{start_time}_{end_time}.wav", sound, 16_000) | |
| open(output_txt_dir / f"{file_name}_seg{start_time}_{end_time}.txt", "w").write(text) |