Upload 3 files
Browse files- Dockerfile +13 -0
- main.py +120 -0
- requirements.txt +7 -0
Dockerfile
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM python:3.8-slim
|
2 |
+
|
3 |
+
WORKDIR /app
|
4 |
+
|
5 |
+
COPY requirements.txt .
|
6 |
+
COPY main.py .
|
7 |
+
COPY iic iic/
|
8 |
+
|
9 |
+
RUN pip install --upgrade pip
|
10 |
+
RUN pip install torch>=1.13 torchaudio --index-url https://download.pytorch.org/whl/cpu
|
11 |
+
RUN pip install -r requirements.txt
|
12 |
+
|
13 |
+
CMD ["python", "main.py"]
|
main.py
ADDED
@@ -0,0 +1,120 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# -*- coding: utf-8 -*-
|
2 |
+
"""
|
3 |
+
Author: 一铭
|
4 |
+
Date : 2024-08-28
|
5 |
+
|
6 |
+
Github: https://github.com/HG-ha
|
7 |
+
Home : https://api2.wer.plus
|
8 |
+
|
9 |
+
Description:
|
10 |
+
From ali dharma school project: https://github.com/FunAudioLLM/SenseVoice
|
11 |
+
|
12 |
+
This program is distributed using ONNX-encapsulated fastapi,Provides an interface for reading audio from a network or file and predicting content.
|
13 |
+
|
14 |
+
If you need to use cuda, you need to install the OnnxRun-time gpu, not the onnxruntime.
|
15 |
+
"""
|
16 |
+
|
17 |
+
import librosa
|
18 |
+
import numpy as np
|
19 |
+
import aiohttp
|
20 |
+
from fastapi import FastAPI, Form, UploadFile, HTTPException
|
21 |
+
from pydantic import HttpUrl, ValidationError, BaseModel, Field
|
22 |
+
from typing import List, Union
|
23 |
+
from funasr_onnx import SenseVoiceSmall
|
24 |
+
from funasr_onnx.utils.postprocess_utils import rich_transcription_postprocess
|
25 |
+
from io import BytesIO
|
26 |
+
|
27 |
+
|
28 |
+
class ApiResponse(BaseModel):
|
29 |
+
message: str = Field(..., description="Status message indicating the success of the operation.")
|
30 |
+
results: str = Field(..., description="Remove label output")
|
31 |
+
label_result: str = Field(..., description="Default output")
|
32 |
+
|
33 |
+
|
34 |
+
app = FastAPI()
|
35 |
+
|
36 |
+
async def from_url_load_audio(audio: HttpUrl) -> np.array:
|
37 |
+
async with aiohttp.ClientSession() as session:
|
38 |
+
async with session.get(
|
39 |
+
audio,
|
40 |
+
headers={
|
41 |
+
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36 Edg/127.0.0.0"
|
42 |
+
},
|
43 |
+
) as response:
|
44 |
+
if response.status != 200:
|
45 |
+
raise HTTPException(
|
46 |
+
status_code=400,
|
47 |
+
detail=f"Failed to download image: {response.status}",
|
48 |
+
)
|
49 |
+
image_bytes = await response.read()
|
50 |
+
return BytesIO(image_bytes)
|
51 |
+
|
52 |
+
@app.post("/extract_text",response_model=ApiResponse)
|
53 |
+
async def upload_url(url: Union[HttpUrl, None] = Form(None), file: Union[UploadFile, None] = Form(None)):
|
54 |
+
if file:
|
55 |
+
audio = BytesIO(await file.read())
|
56 |
+
elif url:
|
57 |
+
try:
|
58 |
+
audio = await from_url_load_audio(str(url))
|
59 |
+
except Exception as e:
|
60 |
+
raise HTTPException(status_code=500, detail=str(e))
|
61 |
+
|
62 |
+
else:
|
63 |
+
return HTTPException(400,{"error": "No valid audio source provided."})
|
64 |
+
try:
|
65 |
+
res = model(audio, language=language, use_itn=True)
|
66 |
+
return {
|
67 |
+
"message": "input processed successfully",
|
68 |
+
"results": rich_transcription_postprocess(res[0]),
|
69 |
+
"label_result": res[0]
|
70 |
+
}
|
71 |
+
except ValidationError as e:
|
72 |
+
raise HTTPException(status_code=400, detail=e.errors())
|
73 |
+
except Exception as e:
|
74 |
+
raise HTTPException(status_code=500, detail=str(e))
|
75 |
+
|
76 |
+
|
77 |
+
if __name__ == "__main__":
|
78 |
+
|
79 |
+
model_dir = "iic/SenseVoiceSmall"
|
80 |
+
device_id = 0 # Use GPU 0, automatically use CPU when not available
|
81 |
+
batch_size = 16
|
82 |
+
language = "auto"
|
83 |
+
quantize = True # Quantization model, small size, fast speed, accuracy may be insufficient: model_quant.onnx
|
84 |
+
# quantize = False # Standard model: model.onnx
|
85 |
+
|
86 |
+
# Override built-in load_data method to fix np.ndarray type accuracy bug
|
87 |
+
# cannot pass the librosa.load object directly, which would make the accuracy of other languages extremely poor
|
88 |
+
# No specific reason
|
89 |
+
def load_data(self, wav_content: Union[str, np.ndarray, List[str], BytesIO], fs: int = None) -> List:
|
90 |
+
def load_wav(path: str) -> np.ndarray:
|
91 |
+
waveform, _ = librosa.load(path, sr=fs)
|
92 |
+
return waveform
|
93 |
+
|
94 |
+
if isinstance(wav_content, np.ndarray):
|
95 |
+
return [wav_content]
|
96 |
+
|
97 |
+
if isinstance(wav_content, str):
|
98 |
+
return [load_wav(wav_content)]
|
99 |
+
|
100 |
+
if isinstance(wav_content, list):
|
101 |
+
return [load_wav(path) for path in wav_content]
|
102 |
+
|
103 |
+
if isinstance(wav_content, BytesIO):
|
104 |
+
return [load_wav(wav_content)]
|
105 |
+
|
106 |
+
raise TypeError(f"The type of {wav_content} is not in [str, np.ndarray, list]")
|
107 |
+
|
108 |
+
SenseVoiceSmall.load_data = load_data
|
109 |
+
|
110 |
+
model = SenseVoiceSmall(
|
111 |
+
model_dir,
|
112 |
+
quantize=quantize,
|
113 |
+
device_id=device_id,
|
114 |
+
batch_size=batch_size
|
115 |
+
)
|
116 |
+
|
117 |
+
print("\n\nDocs: http://127.0.0.1:8000/docs\n")
|
118 |
+
import uvicorn
|
119 |
+
|
120 |
+
uvicorn.run(app, host="0.0.0.0", port=8000)
|
requirements.txt
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
funasr_onnx==0.4.1
|
2 |
+
fastapi==0.112.2
|
3 |
+
numpy==1.26.4
|
4 |
+
uvicorn==0.30.6
|
5 |
+
librosa==0.10.2
|
6 |
+
aiohttp==3.10.5
|
7 |
+
python-multipart==0.0.9
|