|
|
|
git clone https://huggingface.co/datasets/kotoba-tech/kotoba-whisper-eval |
|
|
|
ffmpeg -i kotoba-whisper-eval/audio/long_interview_1.mp3 -ar 16000 -ac 1 -c:a pcm_s16le kotoba-whisper-eval/audio/long_interview_1.wav |
|
ffmpeg -i kotoba-whisper-eval/audio/manzai1.mp3 -ar 16000 -ac 1 -c:a pcm_s16le kotoba-whisper-eval/audio/manzai1.wav |
|
ffmpeg -i kotoba-whisper-eval/audio/manzai2.mp3 -ar 16000 -ac 1 -c:a pcm_s16le kotoba-whisper-eval/audio/manzai2.wav |
|
ffmpeg -i kotoba-whisper-eval/audio/manzai3.mp3 -ar 16000 -ac 1 -c:a pcm_s16le kotoba-whisper-eval/audio/manzai3.wav |
|
|
|
python -c 'from faster_whisper import WhisperModel; model = WhisperModel("kotoba-tech/kotoba-whisper-v1.0-faster")' |
|
SECONDS=0 |
|
python -c 'from faster_whisper import WhisperModel; model = WhisperModel("kotoba-tech/kotoba-whisper-v1.0-faster"); print(["[%.2fs -> %.2fs] %s" % (segment.start, segment.end, segment.text) for segment in model.transcribe("kotoba-whisper-eval/audio/long_interview_1.wav", language="ja", chunk_length=15, condition_on_previous_text=False)[0]])' |
|
TIME_INTERVIEW=$SECONDS |
|
SECONDS=0 |
|
python -c 'from faster_whisper import WhisperModel; model = WhisperModel("kotoba-tech/kotoba-whisper-v1.0-faster"); print(["[%.2fs -> %.2fs] %s" % (segment.start, segment.end, segment.text) for segment in model.transcribe("kotoba-whisper-eval/audio/manzai1.wav", language="ja", chunk_length=15, condition_on_previous_text=False)[0]])' |
|
TIME_MANZAI1=$SECONDS |
|
SECONDS=0 |
|
python -c 'from faster_whisper import WhisperModel; model = WhisperModel("kotoba-tech/kotoba-whisper-v1.0-faster"); print(["[%.2fs -> %.2fs] %s" % (segment.start, segment.end, segment.text) for segment in model.transcribe("kotoba-whisper-eval/audio/manzai2.wav", language="ja", chunk_length=15, condition_on_previous_text=False)[0]])' |
|
TIME_MANZAI2=$SECONDS |
|
SECONDS=0 |
|
python -c 'from faster_whisper import WhisperModel; model = WhisperModel("kotoba-tech/kotoba-whisper-v1.0-faster"); print(["[%.2fs -> %.2fs] %s" % (segment.start, segment.end, segment.text) for segment in model.transcribe("kotoba-whisper-eval/audio/manzai3.wav", language="ja", chunk_length=15, condition_on_previous_text=False)[0]])' |
|
TIME_MANZAI3=$SECONDS |
|
|
|
|