feat: external openai tts support

2024-04-20 16:00:24 -05:00 · 2024-04-20 16:00:24 -05:00 · cbd18ec63c
commit cbd18ec63c
parent 713934edb6
5 changed files with 187 additions and 74 deletions
--- a/backend/apps/audio/main.py
+++ b/backend/apps/audio/main.py
@ -101,61 +101,57 @@ async def update_openai_config(
@app.post("/speech")
 async def speech(request: Request, user=Depends(get_verified_user)):
-    idx = None
+    body = await request.body()
    name = hashlib.sha256(body).hexdigest()
    file_path = SPEECH_CACHE_DIR.joinpath(f"{name}.mp3")
    file_body_path = SPEECH_CACHE_DIR.joinpath(f"{name}.json")
    # Check if the file already exists in the cache
    if file_path.is_file():
        return FileResponse(file_path)
    headers = {}
    headers["Authorization"] = f"Bearer {app.state.OPENAI_API_KEY}"
    headers["Content-Type"] = "application/json"
    r = None
    try:
-        body = await request.body()
+        r = requests.post(
-        name = hashlib.sha256(body).hexdigest()
+            url=f"{app.state.OPENAI_API_BASE_URL}/audio/speech",
            data=body,
            headers=headers,
            stream=True,
        )
-        file_path = SPEECH_CACHE_DIR.joinpath(f"{name}.mp3")
+        r.raise_for_status()
        file_body_path = SPEECH_CACHE_DIR.joinpath(f"{name}.json")
-        # Check if the file already exists in the cache
+        # Save the streaming content to a file
-        if file_path.is_file():
+        with open(file_path, "wb") as f:
-            return FileResponse(file_path)
+            for chunk in r.iter_content(chunk_size=8192):
                f.write(chunk)
-        headers = {}
+        with open(file_body_path, "w") as f:
-        headers["Authorization"] = f"Bearer {app.state.OPENAI_API_KEY}"
+            json.dump(json.loads(body.decode("utf-8")), f)
        headers["Content-Type"] = "application/json"
-        r = None
+        # Return the saved file
-        try:
+        return FileResponse(file_path)
            r = requests.post(
                url=f"{app.state.OPENAI_API_BASE_URL}/audio/speech",
                data=body,
                headers=headers,
                stream=True,
            )
-            r.raise_for_status()
+    except Exception as e:
        log.exception(e)
        error_detail = "Open WebUI: Server Connection Error"
        if r is not None:
            try:
                res = r.json()
                if "error" in res:
                    error_detail = f"External: {res['error']['message']}"
            except:
                error_detail = f"External: {e}"
-            # Save the streaming content to a file
+        raise HTTPException(
-            with open(file_path, "wb") as f:
+            status_code=r.status_code if r != None else 500,
-                for chunk in r.iter_content(chunk_size=8192):
+            detail=error_detail,
-                    f.write(chunk)
+        )
            with open(file_body_path, "w") as f:
                json.dump(json.loads(body.decode("utf-8")), f)
            # Return the saved file
            return FileResponse(file_path)
        except Exception as e:
            log.exception(e)
            error_detail = "Open WebUI: Server Connection Error"
            if r is not None:
                try:
                    res = r.json()
                    if "error" in res:
                        error_detail = f"External: {res['error']}"
                except:
                    error_detail = f"External: {e}"
            raise HTTPException(
                status_code=r.status_code if r else 500, detail=error_detail
            )
    except ValueError:
        raise HTTPException(status_code=401, detail=ERROR_MESSAGES.OPENAI_NOT_FOUND)
@app.post("/transcriptions")
--- a/src/lib/apis/audio/index.ts
+++ b/src/lib/apis/audio/index.ts
@ -1,5 +1,67 @@
 import { AUDIO_API_BASE_URL } from '$lib/constants';
 export const getAudioConfig = async (token: string) => {
 	let error = null;
 	const res = await fetch(`${AUDIO_API_BASE_URL}/config`, {
 		method: 'GET',
 		headers: {
 			'Content-Type': 'application/json',
 			Authorization: `Bearer ${token}`
 		}
 	})
 		.then(async (res) => {
 			if (!res.ok) throw await res.json();
 			return res.json();
 		})
 		.catch((err) => {
 			console.log(err);
 			error = err.detail;
 			return null;
 		});
 	if (error) {
 		throw error;
 	}
 	return res;
 };
 type OpenAIConfigForm = {
 	url: string;
 	key: string;
 };
 export const updateAudioConfig = async (token: string, payload: OpenAIConfigForm) => {
 	let error = null;
 	const res = await fetch(`${AUDIO_API_BASE_URL}/config/update`, {
 		method: 'POST',
 		headers: {
 			'Content-Type': 'application/json',
 			Authorization: `Bearer ${token}`
 		},
 		body: JSON.stringify({
 			...payload
 		})
 	})
 		.then(async (res) => {
 			if (!res.ok) throw await res.json();
 			return res.json();
 		})
 		.catch((err) => {
 			console.log(err);
 			error = err.detail;
 			return null;
 		});
 	if (error) {
 		throw error;
 	}
 	return res;
 };
 export const transcribeAudio = async (token: string, file: File) => {
 	const data = new FormData();
 	data.append('file', file);
@ -48,11 +110,17 @@ export const synthesizeOpenAISpeech = async (
 			input: text,
 			voice: speaker
 		})
-	}).catch((err) => {
+	})
-		console.log(err);
+		.then(async (res) => {
-		error = err;
+			if (!res.ok) throw await res.json();
-		return null;
+			return res;
-	});
+		})
 		.catch((err) => {
 			error = err.detail;
 			console.log(err);
 			return null;
 		});
 	if (error) {
 		throw error;
--- a/src/lib/components/chat/Messages/ResponseMessage.svelte
+++ b/src/lib/components/chat/Messages/ResponseMessage.svelte
@ -176,10 +176,12 @@
 	const toggleSpeakMessage = async () => {
 		if (speaking) {
-			speechSynthesis.cancel();
+			try {
 				speechSynthesis.cancel();
-			sentencesAudio[speakingIdx].pause();
+				sentencesAudio[speakingIdx].pause();
-			sentencesAudio[speakingIdx].currentTime = 0;
+				sentencesAudio[speakingIdx].currentTime = 0;
 			} catch {}
 			speaking = null;
 			speakingIdx = null;
@ -221,6 +223,10 @@
 						sentence
 					).catch((error) => {
 						toast.error(error);
 						speaking = null;
 						loadingSpeech = false;
 						return null;
 					});
@ -230,7 +236,6 @@
 						const audio = new Audio(blobUrl);
 						sentencesAudio[idx] = audio;
 						loadingSpeech = false;
 						lastPlayedAudioPromise = lastPlayedAudioPromise.then(() => playAudio(idx));
 					}
 				}
--- a/src/lib/components/chat/Settings/Audio.svelte
+++ b/src/lib/components/chat/Settings/Audio.svelte
@ -1,4 +1,5 @@
 <script lang="ts">
 	import { getAudioConfig, updateAudioConfig } from '$lib/apis/audio';
 	import { createEventDispatcher, onMount, getContext } from 'svelte';
 	import { toast } from 'svelte-sonner';
 	const dispatch = createEventDispatcher();
@ -9,6 +10,9 @@
 	// Audio
 	let OpenAIUrl = '';
 	let OpenAIKey = '';
 	let STTEngines = ['', 'openai'];
 	let STTEngine = '';
@ -69,6 +73,18 @@
 		saveSettings({ speechAutoSend: speechAutoSend });
 	};
 	const updateConfigHandler = async () => {
 		const res = await updateAudioConfig(localStorage.token, {
 			url: OpenAIUrl,
 			key: OpenAIKey
 		});
 		if (res) {
 			OpenAIUrl = res.OPENAI_API_BASE_URL;
 			OpenAIKey = res.OPENAI_API_KEY;
 		}
 	};
 	onMount(async () => {
 		let settings = JSON.parse(localStorage.getItem('settings') ?? '{}');
@ -85,12 +101,20 @@
 		} else {
 			getWebAPIVoices();
 		}
 		const res = await getAudioConfig(localStorage.token);
 		if (res) {
 			OpenAIUrl = res.OPENAI_API_BASE_URL;
 			OpenAIKey = res.OPENAI_API_KEY;
 		}
 	});
 </script>
 <form
 	class="flex flex-col h-full justify-between space-y-3 text-sm"
-	on:submit|preventDefault={() => {
+	on:submit|preventDefault={async () => {
 		await updateConfigHandler();
 		saveSettings({
 			audio: {
 				STTEngine: STTEngine !== '' ? STTEngine : undefined,
@ -101,7 +125,7 @@
 		dispatch('save');
 	}}
 >
-	<div class=" space-y-3 pr-1.5 overflow-y-scroll max-h-80">
+	<div class=" space-y-3 pr-1.5 overflow-y-scroll max-h-[22rem]">
 		<div>
 			<div class=" mb-1 text-sm font-medium">{$i18n.t('STT Settings')}</div>
@ -196,6 +220,24 @@
 				</div>
 			</div>
 			{#if TTSEngine === 'openai'}
 				<div class="mt-1 flex gap-2 mb-1">
 					<input
 						class="w-full rounded-lg py-2 px-4 text-sm dark:text-gray-300 dark:bg-gray-850 outline-none"
 						placeholder={$i18n.t('API Base URL')}
 						bind:value={OpenAIUrl}
 						required
 					/>
 					<input
 						class="w-full rounded-lg py-2 px-4 text-sm dark:text-gray-300 dark:bg-gray-850 outline-none"
 						placeholder={$i18n.t('API Key')}
 						bind:value={OpenAIKey}
 						required
 					/>
 				</div>
 			{/if}
 			<div class=" py-0.5 flex w-full justify-between">
 				<div class=" self-center text-xs font-medium">{$i18n.t('Auto-playback response')}</div>
@ -241,16 +283,18 @@
 				<div class=" mb-2.5 text-sm font-medium">{$i18n.t('Set Voice')}</div>
 				<div class="flex w-full">
 					<div class="flex-1">
-						<select
+						<input
-							class="w-full rounded py-2 px-4 text-sm dark:text-gray-300 dark:bg-gray-800 outline-none"
+							list="voice-list"
 							class="w-full rounded-lg py-2 px-4 text-sm dark:text-gray-300 dark:bg-gray-850 outline-none"
 							bind:value={speaker}
 							placeholder="Select a voice"
-						>
+						/>
 						<datalist id="voice-list">
 							{#each voices as voice}
-								<option value={voice.name} class="bg-gray-100 dark:bg-gray-700">{voice.name}</option
+								<option value={voice.name} />
 								>
 							{/each}
-						</select>
+						</datalist>
 					</div>
 				</div>
 			</div>
--- a/src/lib/components/documents/Settings/General.svelte
+++ b/src/lib/components/documents/Settings/General.svelte
@ -29,8 +29,8 @@
 	let embeddingEngine = '';
 	let embeddingModel = '';
-	let openAIKey = '';
+	let OpenAIKey = '';
-	let openAIUrl = '';
+	let OpenAIUrl = '';
 	let chunkSize = 0;
 	let chunkOverlap = 0;
@ -79,7 +79,7 @@
 			return;
 		}
-		if ((embeddingEngine === 'openai' && openAIKey === '') || openAIUrl === '') {
+		if ((embeddingEngine === 'openai' && OpenAIKey === '') || OpenAIUrl === '') {
 			toast.error($i18n.t('OpenAI URL/Key required.'));
 			return;
 		}
@ -93,8 +93,8 @@
 			...(embeddingEngine === 'openai'
 				? {
 						openai_config: {
-							key: openAIKey,
+							key: OpenAIKey,
-							url: openAIUrl
+							url: OpenAIUrl
 						}
 				  }
 				: {})
@ -133,8 +133,8 @@
 			embeddingEngine = embeddingConfig.embedding_engine;
 			embeddingModel = embeddingConfig.embedding_model;
-			openAIKey = embeddingConfig.openai_config.key;
+			OpenAIKey = embeddingConfig.openai_config.key;
-			openAIUrl = embeddingConfig.openai_config.url;
+			OpenAIUrl = embeddingConfig.openai_config.url;
 		}
 	};
@ -192,14 +192,14 @@
 					<input
 						class="w-full rounded-lg py-2 px-4 text-sm dark:text-gray-300 dark:bg-gray-850 outline-none"
 						placeholder={$i18n.t('API Base URL')}
-						bind:value={openAIUrl}
+						bind:value={OpenAIUrl}
 						required
 					/>
 					<input
 						class="w-full rounded-lg py-2 px-4 text-sm dark:text-gray-300 dark:bg-gray-850 outline-none"
 						placeholder={$i18n.t('API Key')}
-						bind:value={openAIKey}
+						bind:value={OpenAIKey}
 						required
 					/>
 				</div>