Merge pull request #656 from ollama-webui/openai-voice

feat: openai tts support
This commit is contained in:
Timothy Jaeryang Baek 2024-02-05 23:15:45 -08:00 committed by GitHub
commit 7f3ba3d2ac
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 284 additions and 55 deletions

View file

@ -1,15 +1,19 @@
from fastapi import FastAPI, Request, Response, HTTPException, Depends from fastapi import FastAPI, Request, Response, HTTPException, Depends
from fastapi.middleware.cors import CORSMiddleware from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import StreamingResponse, JSONResponse from fastapi.responses import StreamingResponse, JSONResponse, FileResponse
import requests import requests
import json import json
from pydantic import BaseModel from pydantic import BaseModel
from apps.web.models.users import Users from apps.web.models.users import Users
from constants import ERROR_MESSAGES from constants import ERROR_MESSAGES
from utils.utils import decode_token, get_current_user from utils.utils import decode_token, get_current_user
from config import OPENAI_API_BASE_URL, OPENAI_API_KEY from config import OPENAI_API_BASE_URL, OPENAI_API_KEY, CACHE_DIR
import hashlib
from pathlib import Path
app = FastAPI() app = FastAPI()
app.add_middleware( app.add_middleware(
@ -66,6 +70,68 @@ async def update_openai_key(form_data: KeyUpdateForm, user=Depends(get_current_u
raise HTTPException(status_code=401, detail=ERROR_MESSAGES.ACCESS_PROHIBITED) raise HTTPException(status_code=401, detail=ERROR_MESSAGES.ACCESS_PROHIBITED)
@app.post("/audio/speech")
async def speech(request: Request, user=Depends(get_current_user)):
target_url = f"{app.state.OPENAI_API_BASE_URL}/audio/speech"
if user.role not in ["user", "admin"]:
raise HTTPException(status_code=401, detail=ERROR_MESSAGES.ACCESS_PROHIBITED)
if app.state.OPENAI_API_KEY == "":
raise HTTPException(status_code=401, detail=ERROR_MESSAGES.API_KEY_NOT_FOUND)
body = await request.body()
name = hashlib.sha256(body).hexdigest()
SPEECH_CACHE_DIR = Path(CACHE_DIR).joinpath("./audio/speech/")
SPEECH_CACHE_DIR.mkdir(parents=True, exist_ok=True)
file_path = SPEECH_CACHE_DIR.joinpath(f"{name}.mp3")
file_body_path = SPEECH_CACHE_DIR.joinpath(f"{name}.json")
# Check if the file already exists in the cache
if file_path.is_file():
return FileResponse(file_path)
headers = {}
headers["Authorization"] = f"Bearer {app.state.OPENAI_API_KEY}"
headers["Content-Type"] = "application/json"
try:
print("openai")
r = requests.post(
url=target_url,
data=body,
headers=headers,
stream=True,
)
r.raise_for_status()
# Save the streaming content to a file
with open(file_path, "wb") as f:
for chunk in r.iter_content(chunk_size=8192):
f.write(chunk)
with open(file_body_path, "w") as f:
json.dump(json.loads(body.decode("utf-8")), f)
# Return the saved file
return FileResponse(file_path)
except Exception as e:
print(e)
error_detail = "Ollama WebUI: Server Connection Error"
if r is not None:
try:
res = r.json()
if "error" in res:
error_detail = f"External: {res['error']}"
except:
error_detail = f"External: {e}"
raise HTTPException(status_code=r.status_code, detail=error_detail)
@app.api_route("/{path:path}", methods=["GET", "POST", "PUT", "DELETE"]) @app.api_route("/{path:path}", methods=["GET", "POST", "PUT", "DELETE"])
async def proxy(path: str, request: Request, user=Depends(get_current_user)): async def proxy(path: str, request: Request, user=Depends(get_current_user)):
target_url = f"{app.state.OPENAI_API_BASE_URL}/{path}" target_url = f"{app.state.OPENAI_API_BASE_URL}/{path}"
@ -129,8 +195,6 @@ async def proxy(path: str, request: Request, user=Depends(get_current_user)):
response_data = r.json() response_data = r.json()
print(type(response_data))
if "openai" in app.state.OPENAI_API_BASE_URL and path == "models": if "openai" in app.state.OPENAI_API_BASE_URL and path == "models":
response_data["data"] = list( response_data["data"] = list(
filter(lambda model: "gpt" in model["id"], response_data["data"]) filter(lambda model: "gpt" in model["id"], response_data["data"])

View file

@ -35,6 +35,14 @@ FRONTEND_BUILD_DIR = str(Path(os.getenv("FRONTEND_BUILD_DIR", "../build")))
UPLOAD_DIR = f"{DATA_DIR}/uploads" UPLOAD_DIR = f"{DATA_DIR}/uploads"
Path(UPLOAD_DIR).mkdir(parents=True, exist_ok=True) Path(UPLOAD_DIR).mkdir(parents=True, exist_ok=True)
####################################
# Cache DIR
####################################
CACHE_DIR = f"{DATA_DIR}/cache"
Path(CACHE_DIR).mkdir(parents=True, exist_ok=True)
#################################### ####################################
# OLLAMA_API_BASE_URL # OLLAMA_API_BASE_URL
#################################### ####################################

View file

@ -229,3 +229,34 @@ export const generateOpenAIChatCompletion = async (token: string = '', body: obj
return res; return res;
}; };
export const synthesizeOpenAISpeech = async (
token: string = '',
speaker: string = 'alloy',
text: string = ''
) => {
let error = null;
const res = await fetch(`${OPENAI_API_BASE_URL}/audio/speech`, {
method: 'POST',
headers: {
Authorization: `Bearer ${token}`,
'Content-Type': 'application/json'
},
body: JSON.stringify({
model: 'tts-1',
input: text,
voice: speaker
})
}).catch((err) => {
console.log(err);
error = err;
return null;
});
if (error) {
throw error;
}
return res;
};

View file

@ -1,7 +1,8 @@
<script lang="ts"> <script lang="ts">
import toast from 'svelte-french-toast';
import dayjs from 'dayjs'; import dayjs from 'dayjs';
import { marked } from 'marked'; import { marked } from 'marked';
import { settings, voices } from '$lib/stores'; import { settings } from '$lib/stores';
import tippy from 'tippy.js'; import tippy from 'tippy.js';
import auto_render from 'katex/dist/contrib/auto-render.mjs'; import auto_render from 'katex/dist/contrib/auto-render.mjs';
import 'katex/dist/katex.min.css'; import 'katex/dist/katex.min.css';
@ -13,6 +14,8 @@
import Skeleton from './Skeleton.svelte'; import Skeleton from './Skeleton.svelte';
import CodeBlock from './CodeBlock.svelte'; import CodeBlock from './CodeBlock.svelte';
import { synthesizeOpenAISpeech } from '$lib/apis/openai';
export let modelfiles = []; export let modelfiles = [];
export let message; export let message;
export let siblings; export let siblings;
@ -31,7 +34,10 @@
let editedContent = ''; let editedContent = '';
let tooltipInstance = null; let tooltipInstance = null;
let audioMap = {};
let speaking = null; let speaking = null;
let loadingSpeech = false;
$: tokens = marked.lexer(message.content); $: tokens = marked.lexer(message.content);
@ -114,13 +120,59 @@
if (speaking) { if (speaking) {
speechSynthesis.cancel(); speechSynthesis.cancel();
speaking = null; speaking = null;
audioMap[message.id].pause();
audioMap[message.id].currentTime = 0;
} else { } else {
speaking = true; speaking = true;
if ($settings?.speech?.engine === 'openai') {
loadingSpeech = true;
const res = await synthesizeOpenAISpeech(
localStorage.token,
$settings?.speech?.speaker,
message.content
).catch((error) => {
toast.error(error);
return null;
});
if (res) {
const blob = await res.blob();
const blobUrl = URL.createObjectURL(blob);
console.log(blobUrl);
loadingSpeech = false;
const audio = new Audio(blobUrl);
audioMap[message.id] = audio;
audio.onended = () => {
speaking = null;
};
audio.play().catch((e) => console.error('Error playing audio:', e));
}
} else {
let voices = [];
const getVoicesLoop = setInterval(async () => {
voices = await speechSynthesis.getVoices();
if (voices.length > 0) {
clearInterval(getVoicesLoop);
const voice =
voices?.filter((v) => v.name === $settings?.speech?.speaker)?.at(0) ?? undefined;
const speak = new SpeechSynthesisUtterance(message.content); const speak = new SpeechSynthesisUtterance(message.content);
const voice = $voices?.filter((v) => v.name === $settings?.speakVoice)?.at(0) ?? undefined;
speak.onend = () => {
speaking = null;
};
speak.voice = voice; speak.voice = voice;
speechSynthesis.speak(speak); speechSynthesis.speak(speak);
} }
}, 100);
}
}
}; };
const editMessageHandler = async () => { const editMessageHandler = async () => {
@ -410,10 +462,42 @@
? 'visible' ? 'visible'
: 'invisible group-hover:visible'} p-1 rounded dark:hover:bg-gray-800 transition" : 'invisible group-hover:visible'} p-1 rounded dark:hover:bg-gray-800 transition"
on:click={() => { on:click={() => {
if (!loadingSpeech) {
toggleSpeakMessage(message); toggleSpeakMessage(message);
}
}} }}
> >
{#if speaking} {#if loadingSpeech}
<svg
class=" w-4 h-4"
fill="currentColor"
viewBox="0 0 24 24"
xmlns="http://www.w3.org/2000/svg"
><style>
.spinner_S1WN {
animation: spinner_MGfb 0.8s linear infinite;
animation-delay: -0.8s;
}
.spinner_Km9P {
animation-delay: -0.65s;
}
.spinner_JApP {
animation-delay: -0.5s;
}
@keyframes spinner_MGfb {
93.75%,
100% {
opacity: 0.2;
}
}
</style><circle class="spinner_S1WN" cx="4" cy="12" r="3" /><circle
class="spinner_S1WN spinner_Km9P"
cx="12"
cy="12"
r="3"
/><circle class="spinner_S1WN spinner_JApP" cx="20" cy="12" r="3" /></svg
>
{:else if speaking}
<svg <svg
xmlns="http://www.w3.org/2000/svg" xmlns="http://www.w3.org/2000/svg"
fill="none" fill="none"

View file

@ -1,27 +1,49 @@
<script lang="ts"> <script lang="ts">
import { createEventDispatcher, onMount } from 'svelte'; import { createEventDispatcher, onMount } from 'svelte';
import { voices } from '$lib/stores';
const dispatch = createEventDispatcher(); const dispatch = createEventDispatcher();
export let saveSettings: Function; export let saveSettings: Function;
// Voice // Voice
let speakVoice = ''; let engines = ['', 'openai'];
let engine = '';
let voices = [];
let speaker = '';
const getOpenAIVoices = () => {
voices = [
{ name: 'alloy' },
{ name: 'echo' },
{ name: 'fable' },
{ name: 'onyx' },
{ name: 'nova' },
{ name: 'shimmer' }
];
};
const getWebAPIVoices = () => {
const getVoicesLoop = setInterval(async () => {
voices = await speechSynthesis.getVoices();
// do your loop
if (voices.length > 0) {
clearInterval(getVoicesLoop);
}
}, 100);
};
onMount(async () => { onMount(async () => {
let settings = JSON.parse(localStorage.getItem('settings') ?? '{}'); let settings = JSON.parse(localStorage.getItem('settings') ?? '{}');
speakVoice = settings.speakVoice ?? ''; engine = settings?.speech?.engine ?? '';
speaker = settings?.speech?.speaker ?? '';
const getVoicesLoop = setInterval(async () => { if (engine === 'openai') {
const _voices = await speechSynthesis.getVoices(); getOpenAIVoices();
await voices.set(_voices); } else {
getWebAPIVoices();
// do your loop
if (_voices.length > 0) {
clearInterval(getVoicesLoop);
} }
}, 100);
}); });
</script> </script>
@ -29,24 +51,52 @@
class="flex flex-col h-full justify-between space-y-3 text-sm" class="flex flex-col h-full justify-between space-y-3 text-sm"
on:submit|preventDefault={() => { on:submit|preventDefault={() => {
saveSettings({ saveSettings({
speakVoice: speakVoice !== '' ? speakVoice : undefined speech: {
engine: engine !== '' ? engine : undefined,
speaker: speaker !== '' ? speaker : undefined
}
}); });
dispatch('save'); dispatch('save');
}} }}
> >
<div class=" space-y-3"> <div class=" space-y-3">
<div class=" space-y-3"> <div class=" py-0.5 flex w-full justify-between">
<div class=" self-center text-sm font-medium">Speech Engine</div>
<div class="flex items-center relative">
<select
class="w-fit pr-8 rounded py-2 px-2 text-xs bg-transparent outline-none text-right"
bind:value={engine}
placeholder="Select a mode"
on:change={(e) => {
if (e.target.value === 'openai') {
getOpenAIVoices();
speaker = 'alloy';
} else {
getWebAPIVoices();
speaker = '';
}
}}
>
<option value="">Default (Web API)</option>
<option value="openai">Open AI</option>
</select>
</div>
</div>
<hr class=" dark:border-gray-700" />
{#if engine === ''}
<div> <div>
<div class=" mb-2.5 text-sm font-medium">Set Default Voice</div> <div class=" mb-2.5 text-sm font-medium">Set Voice</div>
<div class="flex w-full"> <div class="flex w-full">
<div class="flex-1"> <div class="flex-1">
<select <select
class="w-full rounded py-2 px-4 text-sm dark:text-gray-300 dark:bg-gray-800 outline-none" class="w-full rounded py-2 px-4 text-sm dark:text-gray-300 dark:bg-gray-800 outline-none"
bind:value={speakVoice} bind:value={speaker}
placeholder="Select a voice" placeholder="Select a voice"
> >
<option value="" selected>Default</option> <option value="" selected>Default</option>
{#each $voices.filter((v) => v.localService === true) as voice} {#each voices.filter((v) => v.localService === true) as voice}
<option value={voice.name} class="bg-gray-100 dark:bg-gray-700">{voice.name}</option <option value={voice.name} class="bg-gray-100 dark:bg-gray-700">{voice.name}</option
> >
{/each} {/each}
@ -54,32 +104,25 @@
</div> </div>
</div> </div>
</div> </div>
</div> {:else if engine === 'openai'}
<!--
<div> <div>
<div class=" mb-2.5 text-sm font-medium"> <div class=" mb-2.5 text-sm font-medium">Set Voice</div>
Gravatar Email <span class=" text-gray-400 text-sm">(optional)</span>
</div>
<div class="flex w-full"> <div class="flex w-full">
<div class="flex-1"> <div class="flex-1">
<input <select
class="w-full rounded py-2 px-4 text-sm dark:text-gray-300 dark:bg-gray-800 outline-none" class="w-full rounded py-2 px-4 text-sm dark:text-gray-300 dark:bg-gray-800 outline-none"
placeholder="Enter Your Email" bind:value={speaker}
bind:value={gravatarEmail} placeholder="Select a voice"
autocomplete="off"
type="email"
/>
</div>
</div>
<div class="mt-2 text-xs text-gray-400 dark:text-gray-500">
Changes user profile image to match your <a
class=" text-gray-500 dark:text-gray-300 font-medium"
href="https://gravatar.com/"
target="_blank">Gravatar.</a
> >
{#each voices as voice}
<option value={voice.name} class="bg-gray-100 dark:bg-gray-700">{voice.name}</option
>
{/each}
</select>
</div> </div>
</div> --> </div>
</div>
{/if}
</div> </div>
<div class="flex justify-end pt-3 text-sm font-medium"> <div class="flex justify-end pt-3 text-sm font-medium">

View file

@ -12,7 +12,6 @@ export const chatId = writable('');
export const chats = writable([]); export const chats = writable([]);
export const tags = writable([]); export const tags = writable([]);
export const models = writable([]); export const models = writable([]);
export const voices = writable([]);
export const modelfiles = writable([]); export const modelfiles = writable([]);
export const prompts = writable([]); export const prompts = writable([]);