open-webui/backend/apps/audio/main.py

81 lines
1.9 KiB
Python
Raw Normal View History

2024-02-11 09:17:50 +01:00
from fastapi import (
FastAPI,
Request,
Depends,
HTTPException,
status,
UploadFile,
File,
Form,
)
from fastapi.middleware.cors import CORSMiddleware
from faster_whisper import WhisperModel
from constants import ERROR_MESSAGES
from utils.utils import (
decode_token,
get_current_user,
get_verified_user,
get_admin_user,
)
from utils.misc import calculate_sha256
from config import CACHE_DIR, UPLOAD_DIR, WHISPER_MODEL_NAME
app = FastAPI()
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
@app.post("/transcribe")
def transcribe(
file: UploadFile = File(...),
user=Depends(get_current_user),
):
print(file.content_type)
if file.content_type not in ["audio/mpeg", "audio/wav"]:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=ERROR_MESSAGES.FILE_NOT_SUPPORTED,
)
try:
filename = file.filename
file_path = f"{UPLOAD_DIR}/{filename}"
contents = file.file.read()
with open(file_path, "wb") as f:
f.write(contents)
f.close()
model_name = WHISPER_MODEL_NAME
model = WhisperModel(
model_name,
device="cpu",
compute_type="int8",
download_root=f"{CACHE_DIR}/whisper/models",
)
segments, info = model.transcribe(file_path, beam_size=5)
print(
"Detected language '%s' with probability %f"
% (info.language, info.language_probability)
)
transcript = "".join([segment.text for segment in list(segments)])
2024-02-11 11:17:24 +01:00
return {"text": transcript.strip()}
2024-02-11 09:17:50 +01:00
except Exception as e:
print(e)
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=ERROR_MESSAGES.DEFAULT(e),
)