File size: 2,209 Bytes
1482df5 7ec3fd2 1482df5 7ec3fd2 1482df5 7ec3fd2 1482df5 7ec3fd2 1482df5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 |
# clone dataset
git clone https://huggingface.co/datasets/kotoba-tech/kotoba-whisper-eval
# convert to 16khz
ffmpeg -i kotoba-whisper-eval/audio/long_interview_1.mp3 -ar 16000 -ac 1 -c:a pcm_s16le kotoba-whisper-eval/audio/long_interview_1.wav
ffmpeg -i kotoba-whisper-eval/audio/manzai1.mp3 -ar 16000 -ac 1 -c:a pcm_s16le kotoba-whisper-eval/audio/manzai1.wav
ffmpeg -i kotoba-whisper-eval/audio/manzai2.mp3 -ar 16000 -ac 1 -c:a pcm_s16le kotoba-whisper-eval/audio/manzai2.wav
ffmpeg -i kotoba-whisper-eval/audio/manzai3.mp3 -ar 16000 -ac 1 -c:a pcm_s16le kotoba-whisper-eval/audio/manzai3.wav
# cache the model
python -c 'from faster_whisper import WhisperModel; model = WhisperModel("kotoba-tech/kotoba-whisper-v1.0-faster")'
SECONDS=0
python -c 'from faster_whisper import WhisperModel; model = WhisperModel("kotoba-tech/kotoba-whisper-v1.0-faster"); print(["[%.2fs -> %.2fs] %s" % (segment.start, segment.end, segment.text) for segment in model.transcribe("kotoba-whisper-eval/audio/long_interview_1.wav", language="ja", chunk_length=15, condition_on_previous_text=False)[0]])'
TIME_INTERVIEW=$SECONDS
SECONDS=0
python -c 'from faster_whisper import WhisperModel; model = WhisperModel("kotoba-tech/kotoba-whisper-v1.0-faster"); print(["[%.2fs -> %.2fs] %s" % (segment.start, segment.end, segment.text) for segment in model.transcribe("kotoba-whisper-eval/audio/manzai1.wav", language="ja", chunk_length=15, condition_on_previous_text=False)[0]])'
TIME_MANZAI1=$SECONDS
SECONDS=0
python -c 'from faster_whisper import WhisperModel; model = WhisperModel("kotoba-tech/kotoba-whisper-v1.0-faster"); print(["[%.2fs -> %.2fs] %s" % (segment.start, segment.end, segment.text) for segment in model.transcribe("kotoba-whisper-eval/audio/manzai2.wav", language="ja", chunk_length=15, condition_on_previous_text=False)[0]])'
TIME_MANZAI2=$SECONDS
SECONDS=0
python -c 'from faster_whisper import WhisperModel; model = WhisperModel("kotoba-tech/kotoba-whisper-v1.0-faster"); print(["[%.2fs -> %.2fs] %s" % (segment.start, segment.end, segment.text) for segment in model.transcribe("kotoba-whisper-eval/audio/manzai3.wav", language="ja", chunk_length=15, condition_on_previous_text=False)[0]])'
TIME_MANZAI3=$SECONDS
|