diff --git a/backend/apps/openai/main.py b/backend/apps/openai/main.py index 780475ad..bed9181a 100644 --- a/backend/apps/openai/main.py +++ b/backend/apps/openai/main.py @@ -1,15 +1,19 @@ from fastapi import FastAPI, Request, Response, HTTPException, Depends from fastapi.middleware.cors import CORSMiddleware -from fastapi.responses import StreamingResponse, JSONResponse +from fastapi.responses import StreamingResponse, JSONResponse, FileResponse import requests import json from pydantic import BaseModel + from apps.web.models.users import Users from constants import ERROR_MESSAGES from utils.utils import decode_token, get_current_user -from config import OPENAI_API_BASE_URL, OPENAI_API_KEY +from config import OPENAI_API_BASE_URL, OPENAI_API_KEY, CACHE_DIR + +import hashlib +from pathlib import Path app = FastAPI() app.add_middleware( @@ -66,6 +70,68 @@ async def update_openai_key(form_data: KeyUpdateForm, user=Depends(get_current_u raise HTTPException(status_code=401, detail=ERROR_MESSAGES.ACCESS_PROHIBITED) +@app.post("/audio/speech") +async def speech(request: Request, user=Depends(get_current_user)): + target_url = f"{app.state.OPENAI_API_BASE_URL}/audio/speech" + + if user.role not in ["user", "admin"]: + raise HTTPException(status_code=401, detail=ERROR_MESSAGES.ACCESS_PROHIBITED) + if app.state.OPENAI_API_KEY == "": + raise HTTPException(status_code=401, detail=ERROR_MESSAGES.API_KEY_NOT_FOUND) + + body = await request.body() + + name = hashlib.sha256(body).hexdigest() + + SPEECH_CACHE_DIR = Path(CACHE_DIR).joinpath("./audio/speech/") + SPEECH_CACHE_DIR.mkdir(parents=True, exist_ok=True) + file_path = SPEECH_CACHE_DIR.joinpath(f"{name}.mp3") + file_body_path = SPEECH_CACHE_DIR.joinpath(f"{name}.json") + + # Check if the file already exists in the cache + if file_path.is_file(): + return FileResponse(file_path) + + headers = {} + headers["Authorization"] = f"Bearer {app.state.OPENAI_API_KEY}" + headers["Content-Type"] = "application/json" + + try: + print("openai") + r = requests.post( + url=target_url, + data=body, + headers=headers, + stream=True, + ) + + r.raise_for_status() + + # Save the streaming content to a file + with open(file_path, "wb") as f: + for chunk in r.iter_content(chunk_size=8192): + f.write(chunk) + + with open(file_body_path, "w") as f: + json.dump(json.loads(body.decode("utf-8")), f) + + # Return the saved file + return FileResponse(file_path) + + except Exception as e: + print(e) + error_detail = "Ollama WebUI: Server Connection Error" + if r is not None: + try: + res = r.json() + if "error" in res: + error_detail = f"External: {res['error']}" + except: + error_detail = f"External: {e}" + + raise HTTPException(status_code=r.status_code, detail=error_detail) + + @app.api_route("/{path:path}", methods=["GET", "POST", "PUT", "DELETE"]) async def proxy(path: str, request: Request, user=Depends(get_current_user)): target_url = f"{app.state.OPENAI_API_BASE_URL}/{path}" @@ -129,8 +195,6 @@ async def proxy(path: str, request: Request, user=Depends(get_current_user)): response_data = r.json() - print(type(response_data)) - if "openai" in app.state.OPENAI_API_BASE_URL and path == "models": response_data["data"] = list( filter(lambda model: "gpt" in model["id"], response_data["data"]) diff --git a/backend/config.py b/backend/config.py index 273a4dfb..cb501988 100644 --- a/backend/config.py +++ b/backend/config.py @@ -35,6 +35,14 @@ FRONTEND_BUILD_DIR = str(Path(os.getenv("FRONTEND_BUILD_DIR", "../build"))) UPLOAD_DIR = f"{DATA_DIR}/uploads" Path(UPLOAD_DIR).mkdir(parents=True, exist_ok=True) + +#################################### +# Cache DIR +#################################### + +CACHE_DIR = f"{DATA_DIR}/cache" +Path(CACHE_DIR).mkdir(parents=True, exist_ok=True) + #################################### # OLLAMA_API_BASE_URL #################################### diff --git a/src/lib/apis/openai/index.ts b/src/lib/apis/openai/index.ts index edebf693..f9173f70 100644 --- a/src/lib/apis/openai/index.ts +++ b/src/lib/apis/openai/index.ts @@ -229,3 +229,34 @@ export const generateOpenAIChatCompletion = async (token: string = '', body: obj return res; }; + +export const synthesizeOpenAISpeech = async ( + token: string = '', + speaker: string = 'alloy', + text: string = '' +) => { + let error = null; + + const res = await fetch(`${OPENAI_API_BASE_URL}/audio/speech`, { + method: 'POST', + headers: { + Authorization: `Bearer ${token}`, + 'Content-Type': 'application/json' + }, + body: JSON.stringify({ + model: 'tts-1', + input: text, + voice: speaker + }) + }).catch((err) => { + console.log(err); + error = err; + return null; + }); + + if (error) { + throw error; + } + + return res; +}; diff --git a/src/lib/components/chat/Messages/ResponseMessage.svelte b/src/lib/components/chat/Messages/ResponseMessage.svelte index c28eaffa..7cd9da38 100644 --- a/src/lib/components/chat/Messages/ResponseMessage.svelte +++ b/src/lib/components/chat/Messages/ResponseMessage.svelte @@ -1,7 +1,8 @@ @@ -29,24 +51,52 @@ class="flex flex-col h-full justify-between space-y-3 text-sm" on:submit|preventDefault={() => { saveSettings({ - speakVoice: speakVoice !== '' ? speakVoice : undefined + speech: { + engine: engine !== '' ? engine : undefined, + speaker: speaker !== '' ? speaker : undefined + } }); dispatch('save'); }} >