bweng commited on
Commit
c70b7e2
·
verified ·
1 Parent(s): dccc982

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +155 -0
README.md CHANGED
@@ -125,4 +125,159 @@ The provided OpenVINO™ IR model is compatible with:
125
 
126
  ```bash
127
  optimum-cli export openvino --trust-remote-code --model openai/whisper-large-v3-turbo --weight-format int8 --disable-stateful whisper-large-v3-turbo-int8-ov
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
128
  ```
 
125
 
126
  ```bash
127
  optimum-cli export openvino --trust-remote-code --model openai/whisper-large-v3-turbo --weight-format int8 --disable-stateful whisper-large-v3-turbo-int8-ov
128
+ ```
129
+
130
+
131
+ ```python
132
+ ```python
133
+ #!/usr/bin/env python3
134
+ import time
135
+ import requests
136
+ import openvino_genai
137
+ import librosa
138
+ from pathlib import Path
139
+ from huggingface_hub import snapshot_download
140
+
141
+
142
+ def download_model(model_id="FluidInference/whisper-large-v3-turbo-int8-ov-npu"):
143
+ """Download model from HuggingFace Hub"""
144
+ local_dir = Path("models") / model_id.split("/")[-1]
145
+
146
+ if local_dir.exists() and any(local_dir.iterdir()):
147
+ return str(local_dir)
148
+
149
+ print(f"Downloading model...")
150
+ snapshot_download(
151
+ repo_id=model_id,
152
+ local_dir=str(local_dir),
153
+ local_dir_use_symlinks=False
154
+ )
155
+ return str(local_dir)
156
+
157
+
158
+ def download_hf_audio_samples():
159
+ """Download audio samples from Hugging Face"""
160
+ samples_dir = Path("sample_audios")
161
+ samples_dir.mkdir(exist_ok=True)
162
+
163
+ downloaded = []
164
+ whisper_samples = [
165
+ ("https://cdn-media.huggingface.co/speech_samples/sample1.flac", "sample1.flac"),
166
+ ("https://cdn-media.huggingface.co/speech_samples/sample2.flac", "sample2.flac"),
167
+ ]
168
+
169
+ for url, filename in whisper_samples:
170
+ filepath = samples_dir / filename
171
+ if filepath.exists():
172
+ downloaded.append(str(filepath))
173
+ continue
174
+
175
+ try:
176
+ response = requests.get(url, headers={"User-Agent": "Mozilla/5.0"})
177
+ response.raise_for_status()
178
+
179
+ with open(filepath, 'wb') as f:
180
+ f.write(response.content)
181
+
182
+ downloaded.append(str(filepath))
183
+ except Exception as e:
184
+ print(f"Error downloading {filename}: {e}")
185
+
186
+ return downloaded
187
+
188
+
189
+ def read_audio(filepath):
190
+ """Read audio file and convert to 16kHz"""
191
+ try:
192
+ raw_speech, _ = librosa.load(filepath, sr=16000)
193
+ return raw_speech.tolist()
194
+ except Exception as e:
195
+ print(f"Error reading {filepath}: {e}")
196
+ return None
197
+
198
+
199
+ def test_whisper_on_file(pipe, filepath):
200
+ """Test Whisper on a single audio file"""
201
+ config = pipe.get_generation_config()
202
+ config.language = "<|en|>"
203
+ config.task = "transcribe"
204
+ config.return_timestamps = True
205
+ config.max_new_tokens = 448
206
+
207
+ raw_speech = read_audio(filepath)
208
+ if raw_speech is None:
209
+ return None
210
+
211
+ duration = len(raw_speech) / 16000
212
+
213
+ start_time = time.time()
214
+ result = pipe.generate(raw_speech, config)
215
+ inference_time = time.time() - start_time
216
+
217
+ return {
218
+ "file": filepath,
219
+ "duration": duration,
220
+ "inference_time": inference_time,
221
+ "rtf": inference_time/duration,
222
+ "transcription": str(result)
223
+ }
224
+
225
+
226
+ def main():
227
+ # Download model
228
+ model_path = download_model()
229
+
230
+ # Initialize pipeline on NPU
231
+ print(f"\nInitializing NPU...")
232
+ start_time = time.time()
233
+ pipe = openvino_genai.WhisperPipeline(model_path, "NPU")
234
+ init_time = time.time() - start_time
235
+
236
+ results = []
237
+
238
+ # Collect test files
239
+ test_files = []
240
+ test_files.extend(Path(".").glob("*.wav"))
241
+
242
+ if Path("samples/c/whisper_speech_recognition").exists():
243
+ test_files.extend(Path("samples/c/whisper_speech_recognition").glob("*.wav"))
244
+
245
+ # Download HF samples
246
+ hf_samples = download_hf_audio_samples()
247
+ test_files.extend([Path(f) for f in hf_samples])
248
+
249
+ # Test all files
250
+ print(f"\nTesting {len(test_files)} files...")
251
+ for audio_file in test_files:
252
+ result = test_whisper_on_file(pipe, str(audio_file))
253
+ if result:
254
+ results.append(result)
255
+ print(f"[OK] {Path(result['file']).name}: RTF={result['rtf']:.2f}x")
256
+
257
+ # Print summary
258
+ if results:
259
+ total_duration = sum(r["duration"] for r in results)
260
+ total_inference = sum(r["inference_time"] for r in results)
261
+ avg_rtf = total_inference / total_duration
262
+
263
+ print(f"\n{'='*50}")
264
+ print(f"NPU Performance Summary")
265
+ print(f"{'='*50}")
266
+ print(f"Model load time: {init_time:.1f}s")
267
+ print(f"Files tested: {len(results)}")
268
+ print(f"Total audio: {total_duration:.1f}s")
269
+ print(f"Total inference: {total_inference:.1f}s")
270
+ print(f"Average RTF: {avg_rtf:.2f}x {'[Faster than real-time]' if avg_rtf < 1 else '[Slower than real-time]'}")
271
+
272
+ print(f"\nResults:")
273
+ for r in results:
274
+ trans = r['transcription'].strip()
275
+ if len(trans) > 60:
276
+ trans = trans[:57] + "..."
277
+ print(f"- {Path(r['file']).name}: \"{trans}\"")
278
+
279
+
280
+ if __name__ == "__main__":
281
+ main()
282
+ ```
283
  ```