mingl commited on
Commit
261aad4
·
verified ·
1 Parent(s): b0ff397

Upload 3 files

Browse files
Files changed (3) hide show
  1. Dockerfile +13 -0
  2. main.py +120 -0
  3. requirements.txt +7 -0
Dockerfile ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.8-slim
2
+
3
+ WORKDIR /app
4
+
5
+ COPY requirements.txt .
6
+ COPY main.py .
7
+ COPY iic iic/
8
+
9
+ RUN pip install --upgrade pip
10
+ RUN pip install torch>=1.13 torchaudio --index-url https://download.pytorch.org/whl/cpu
11
+ RUN pip install -r requirements.txt
12
+
13
+ CMD ["python", "main.py"]
main.py ADDED
@@ -0,0 +1,120 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ Author: 一铭
4
+ Date : 2024-08-28
5
+
6
+ Github: https://github.com/HG-ha
7
+ Home : https://api2.wer.plus
8
+
9
+ Description:
10
+ From ali dharma school project: https://github.com/FunAudioLLM/SenseVoice
11
+
12
+ This program is distributed using ONNX-encapsulated fastapi,Provides an interface for reading audio from a network or file and predicting content.
13
+
14
+ If you need to use cuda, you need to install the OnnxRun-time gpu, not the onnxruntime.
15
+ """
16
+
17
+ import librosa
18
+ import numpy as np
19
+ import aiohttp
20
+ from fastapi import FastAPI, Form, UploadFile, HTTPException
21
+ from pydantic import HttpUrl, ValidationError, BaseModel, Field
22
+ from typing import List, Union
23
+ from funasr_onnx import SenseVoiceSmall
24
+ from funasr_onnx.utils.postprocess_utils import rich_transcription_postprocess
25
+ from io import BytesIO
26
+
27
+
28
+ class ApiResponse(BaseModel):
29
+ message: str = Field(..., description="Status message indicating the success of the operation.")
30
+ results: str = Field(..., description="Remove label output")
31
+ label_result: str = Field(..., description="Default output")
32
+
33
+
34
+ app = FastAPI()
35
+
36
+ async def from_url_load_audio(audio: HttpUrl) -> np.array:
37
+ async with aiohttp.ClientSession() as session:
38
+ async with session.get(
39
+ audio,
40
+ headers={
41
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36 Edg/127.0.0.0"
42
+ },
43
+ ) as response:
44
+ if response.status != 200:
45
+ raise HTTPException(
46
+ status_code=400,
47
+ detail=f"Failed to download image: {response.status}",
48
+ )
49
+ image_bytes = await response.read()
50
+ return BytesIO(image_bytes)
51
+
52
+ @app.post("/extract_text",response_model=ApiResponse)
53
+ async def upload_url(url: Union[HttpUrl, None] = Form(None), file: Union[UploadFile, None] = Form(None)):
54
+ if file:
55
+ audio = BytesIO(await file.read())
56
+ elif url:
57
+ try:
58
+ audio = await from_url_load_audio(str(url))
59
+ except Exception as e:
60
+ raise HTTPException(status_code=500, detail=str(e))
61
+
62
+ else:
63
+ return HTTPException(400,{"error": "No valid audio source provided."})
64
+ try:
65
+ res = model(audio, language=language, use_itn=True)
66
+ return {
67
+ "message": "input processed successfully",
68
+ "results": rich_transcription_postprocess(res[0]),
69
+ "label_result": res[0]
70
+ }
71
+ except ValidationError as e:
72
+ raise HTTPException(status_code=400, detail=e.errors())
73
+ except Exception as e:
74
+ raise HTTPException(status_code=500, detail=str(e))
75
+
76
+
77
+ if __name__ == "__main__":
78
+
79
+ model_dir = "iic/SenseVoiceSmall"
80
+ device_id = 0 # Use GPU 0, automatically use CPU when not available
81
+ batch_size = 16
82
+ language = "auto"
83
+ quantize = True # Quantization model, small size, fast speed, accuracy may be insufficient: model_quant.onnx
84
+ # quantize = False # Standard model: model.onnx
85
+
86
+ # Override built-in load_data method to fix np.ndarray type accuracy bug
87
+ # cannot pass the librosa.load object directly, which would make the accuracy of other languages extremely poor
88
+ # No specific reason
89
+ def load_data(self, wav_content: Union[str, np.ndarray, List[str], BytesIO], fs: int = None) -> List:
90
+ def load_wav(path: str) -> np.ndarray:
91
+ waveform, _ = librosa.load(path, sr=fs)
92
+ return waveform
93
+
94
+ if isinstance(wav_content, np.ndarray):
95
+ return [wav_content]
96
+
97
+ if isinstance(wav_content, str):
98
+ return [load_wav(wav_content)]
99
+
100
+ if isinstance(wav_content, list):
101
+ return [load_wav(path) for path in wav_content]
102
+
103
+ if isinstance(wav_content, BytesIO):
104
+ return [load_wav(wav_content)]
105
+
106
+ raise TypeError(f"The type of {wav_content} is not in [str, np.ndarray, list]")
107
+
108
+ SenseVoiceSmall.load_data = load_data
109
+
110
+ model = SenseVoiceSmall(
111
+ model_dir,
112
+ quantize=quantize,
113
+ device_id=device_id,
114
+ batch_size=batch_size
115
+ )
116
+
117
+ print("\n\nDocs: http://127.0.0.1:8000/docs\n")
118
+ import uvicorn
119
+
120
+ uvicorn.run(app, host="0.0.0.0", port=8000)
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ funasr_onnx==0.4.1
2
+ fastapi==0.112.2
3
+ numpy==1.26.4
4
+ uvicorn==0.30.6
5
+ librosa==0.10.2
6
+ aiohttp==3.10.5
7
+ python-multipart==0.0.9