File size: 2,209 Bytes
1482df5
 
 
 
 
 
 
 
 
 
7ec3fd2
1482df5
 
7ec3fd2
1482df5
 
7ec3fd2
1482df5
 
7ec3fd2
1482df5
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
# clone dataset
git clone https://huggingface.co/datasets/kotoba-tech/kotoba-whisper-eval
# convert to 16khz
ffmpeg -i kotoba-whisper-eval/audio/long_interview_1.mp3 -ar 16000 -ac 1 -c:a pcm_s16le kotoba-whisper-eval/audio/long_interview_1.wav
ffmpeg -i kotoba-whisper-eval/audio/manzai1.mp3 -ar 16000 -ac 1 -c:a pcm_s16le kotoba-whisper-eval/audio/manzai1.wav
ffmpeg -i kotoba-whisper-eval/audio/manzai2.mp3 -ar 16000 -ac 1 -c:a pcm_s16le kotoba-whisper-eval/audio/manzai2.wav
ffmpeg -i kotoba-whisper-eval/audio/manzai3.mp3 -ar 16000 -ac 1 -c:a pcm_s16le kotoba-whisper-eval/audio/manzai3.wav
# cache the model
python -c 'from faster_whisper import WhisperModel; model = WhisperModel("kotoba-tech/kotoba-whisper-v1.0-faster")'
SECONDS=0
python -c 'from faster_whisper import WhisperModel; model = WhisperModel("kotoba-tech/kotoba-whisper-v1.0-faster"); print(["[%.2fs -> %.2fs] %s" % (segment.start, segment.end, segment.text) for segment in model.transcribe("kotoba-whisper-eval/audio/long_interview_1.wav", language="ja", chunk_length=15, condition_on_previous_text=False)[0]])'
TIME_INTERVIEW=$SECONDS
SECONDS=0
python -c 'from faster_whisper import WhisperModel; model = WhisperModel("kotoba-tech/kotoba-whisper-v1.0-faster"); print(["[%.2fs -> %.2fs] %s" % (segment.start, segment.end, segment.text) for segment in model.transcribe("kotoba-whisper-eval/audio/manzai1.wav", language="ja", chunk_length=15, condition_on_previous_text=False)[0]])'
TIME_MANZAI1=$SECONDS
SECONDS=0
python -c 'from faster_whisper import WhisperModel; model = WhisperModel("kotoba-tech/kotoba-whisper-v1.0-faster"); print(["[%.2fs -> %.2fs] %s" % (segment.start, segment.end, segment.text) for segment in model.transcribe("kotoba-whisper-eval/audio/manzai2.wav", language="ja", chunk_length=15, condition_on_previous_text=False)[0]])'
TIME_MANZAI2=$SECONDS
SECONDS=0
python -c 'from faster_whisper import WhisperModel; model = WhisperModel("kotoba-tech/kotoba-whisper-v1.0-faster"); print(["[%.2fs -> %.2fs] %s" % (segment.start, segment.end, segment.text) for segment in model.transcribe("kotoba-whisper-eval/audio/manzai3.wav", language="ja", chunk_length=15, condition_on_previous_text=False)[0]])'
TIME_MANZAI3=$SECONDS