feat: external openai tts support

This commit is contained in:
Timothy J. Baek 2024-04-20 16:00:24 -05:00
parent 713934edb6
commit cbd18ec63c
5 changed files with 187 additions and 74 deletions

View file

@ -101,8 +101,6 @@ async def update_openai_config(
@app.post("/speech") @app.post("/speech")
async def speech(request: Request, user=Depends(get_verified_user)): async def speech(request: Request, user=Depends(get_verified_user)):
idx = None
try:
body = await request.body() body = await request.body()
name = hashlib.sha256(body).hexdigest() name = hashlib.sha256(body).hexdigest()
@ -146,17 +144,15 @@ async def speech(request: Request, user=Depends(get_verified_user)):
try: try:
res = r.json() res = r.json()
if "error" in res: if "error" in res:
error_detail = f"External: {res['error']}" error_detail = f"External: {res['error']['message']}"
except: except:
error_detail = f"External: {e}" error_detail = f"External: {e}"
raise HTTPException( raise HTTPException(
status_code=r.status_code if r else 500, detail=error_detail status_code=r.status_code if r != None else 500,
detail=error_detail,
) )
except ValueError:
raise HTTPException(status_code=401, detail=ERROR_MESSAGES.OPENAI_NOT_FOUND)
@app.post("/transcriptions") @app.post("/transcriptions")
def transcribe( def transcribe(

View file

@ -1,5 +1,67 @@
import { AUDIO_API_BASE_URL } from '$lib/constants'; import { AUDIO_API_BASE_URL } from '$lib/constants';
export const getAudioConfig = async (token: string) => {
let error = null;
const res = await fetch(`${AUDIO_API_BASE_URL}/config`, {
method: 'GET',
headers: {
'Content-Type': 'application/json',
Authorization: `Bearer ${token}`
}
})
.then(async (res) => {
if (!res.ok) throw await res.json();
return res.json();
})
.catch((err) => {
console.log(err);
error = err.detail;
return null;
});
if (error) {
throw error;
}
return res;
};
type OpenAIConfigForm = {
url: string;
key: string;
};
export const updateAudioConfig = async (token: string, payload: OpenAIConfigForm) => {
let error = null;
const res = await fetch(`${AUDIO_API_BASE_URL}/config/update`, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
Authorization: `Bearer ${token}`
},
body: JSON.stringify({
...payload
})
})
.then(async (res) => {
if (!res.ok) throw await res.json();
return res.json();
})
.catch((err) => {
console.log(err);
error = err.detail;
return null;
});
if (error) {
throw error;
}
return res;
};
export const transcribeAudio = async (token: string, file: File) => { export const transcribeAudio = async (token: string, file: File) => {
const data = new FormData(); const data = new FormData();
data.append('file', file); data.append('file', file);
@ -48,9 +110,15 @@ export const synthesizeOpenAISpeech = async (
input: text, input: text,
voice: speaker voice: speaker
}) })
}).catch((err) => { })
.then(async (res) => {
if (!res.ok) throw await res.json();
return res;
})
.catch((err) => {
error = err.detail;
console.log(err); console.log(err);
error = err;
return null; return null;
}); });

View file

@ -176,10 +176,12 @@
const toggleSpeakMessage = async () => { const toggleSpeakMessage = async () => {
if (speaking) { if (speaking) {
try {
speechSynthesis.cancel(); speechSynthesis.cancel();
sentencesAudio[speakingIdx].pause(); sentencesAudio[speakingIdx].pause();
sentencesAudio[speakingIdx].currentTime = 0; sentencesAudio[speakingIdx].currentTime = 0;
} catch {}
speaking = null; speaking = null;
speakingIdx = null; speakingIdx = null;
@ -221,6 +223,10 @@
sentence sentence
).catch((error) => { ).catch((error) => {
toast.error(error); toast.error(error);
speaking = null;
loadingSpeech = false;
return null; return null;
}); });
@ -230,7 +236,6 @@
const audio = new Audio(blobUrl); const audio = new Audio(blobUrl);
sentencesAudio[idx] = audio; sentencesAudio[idx] = audio;
loadingSpeech = false; loadingSpeech = false;
lastPlayedAudioPromise = lastPlayedAudioPromise.then(() => playAudio(idx)); lastPlayedAudioPromise = lastPlayedAudioPromise.then(() => playAudio(idx));
} }
} }

View file

@ -1,4 +1,5 @@
<script lang="ts"> <script lang="ts">
import { getAudioConfig, updateAudioConfig } from '$lib/apis/audio';
import { createEventDispatcher, onMount, getContext } from 'svelte'; import { createEventDispatcher, onMount, getContext } from 'svelte';
import { toast } from 'svelte-sonner'; import { toast } from 'svelte-sonner';
const dispatch = createEventDispatcher(); const dispatch = createEventDispatcher();
@ -9,6 +10,9 @@
// Audio // Audio
let OpenAIUrl = '';
let OpenAIKey = '';
let STTEngines = ['', 'openai']; let STTEngines = ['', 'openai'];
let STTEngine = ''; let STTEngine = '';
@ -69,6 +73,18 @@
saveSettings({ speechAutoSend: speechAutoSend }); saveSettings({ speechAutoSend: speechAutoSend });
}; };
const updateConfigHandler = async () => {
const res = await updateAudioConfig(localStorage.token, {
url: OpenAIUrl,
key: OpenAIKey
});
if (res) {
OpenAIUrl = res.OPENAI_API_BASE_URL;
OpenAIKey = res.OPENAI_API_KEY;
}
};
onMount(async () => { onMount(async () => {
let settings = JSON.parse(localStorage.getItem('settings') ?? '{}'); let settings = JSON.parse(localStorage.getItem('settings') ?? '{}');
@ -85,12 +101,20 @@
} else { } else {
getWebAPIVoices(); getWebAPIVoices();
} }
const res = await getAudioConfig(localStorage.token);
if (res) {
OpenAIUrl = res.OPENAI_API_BASE_URL;
OpenAIKey = res.OPENAI_API_KEY;
}
}); });
</script> </script>
<form <form
class="flex flex-col h-full justify-between space-y-3 text-sm" class="flex flex-col h-full justify-between space-y-3 text-sm"
on:submit|preventDefault={() => { on:submit|preventDefault={async () => {
await updateConfigHandler();
saveSettings({ saveSettings({
audio: { audio: {
STTEngine: STTEngine !== '' ? STTEngine : undefined, STTEngine: STTEngine !== '' ? STTEngine : undefined,
@ -101,7 +125,7 @@
dispatch('save'); dispatch('save');
}} }}
> >
<div class=" space-y-3 pr-1.5 overflow-y-scroll max-h-80"> <div class=" space-y-3 pr-1.5 overflow-y-scroll max-h-[22rem]">
<div> <div>
<div class=" mb-1 text-sm font-medium">{$i18n.t('STT Settings')}</div> <div class=" mb-1 text-sm font-medium">{$i18n.t('STT Settings')}</div>
@ -196,6 +220,24 @@
</div> </div>
</div> </div>
{#if TTSEngine === 'openai'}
<div class="mt-1 flex gap-2 mb-1">
<input
class="w-full rounded-lg py-2 px-4 text-sm dark:text-gray-300 dark:bg-gray-850 outline-none"
placeholder={$i18n.t('API Base URL')}
bind:value={OpenAIUrl}
required
/>
<input
class="w-full rounded-lg py-2 px-4 text-sm dark:text-gray-300 dark:bg-gray-850 outline-none"
placeholder={$i18n.t('API Key')}
bind:value={OpenAIKey}
required
/>
</div>
{/if}
<div class=" py-0.5 flex w-full justify-between"> <div class=" py-0.5 flex w-full justify-between">
<div class=" self-center text-xs font-medium">{$i18n.t('Auto-playback response')}</div> <div class=" self-center text-xs font-medium">{$i18n.t('Auto-playback response')}</div>
@ -241,16 +283,18 @@
<div class=" mb-2.5 text-sm font-medium">{$i18n.t('Set Voice')}</div> <div class=" mb-2.5 text-sm font-medium">{$i18n.t('Set Voice')}</div>
<div class="flex w-full"> <div class="flex w-full">
<div class="flex-1"> <div class="flex-1">
<select <input
class="w-full rounded py-2 px-4 text-sm dark:text-gray-300 dark:bg-gray-800 outline-none" list="voice-list"
class="w-full rounded-lg py-2 px-4 text-sm dark:text-gray-300 dark:bg-gray-850 outline-none"
bind:value={speaker} bind:value={speaker}
placeholder="Select a voice" placeholder="Select a voice"
> />
<datalist id="voice-list">
{#each voices as voice} {#each voices as voice}
<option value={voice.name} class="bg-gray-100 dark:bg-gray-700">{voice.name}</option <option value={voice.name} />
>
{/each} {/each}
</select> </datalist>
</div> </div>
</div> </div>
</div> </div>

View file

@ -29,8 +29,8 @@
let embeddingEngine = ''; let embeddingEngine = '';
let embeddingModel = ''; let embeddingModel = '';
let openAIKey = ''; let OpenAIKey = '';
let openAIUrl = ''; let OpenAIUrl = '';
let chunkSize = 0; let chunkSize = 0;
let chunkOverlap = 0; let chunkOverlap = 0;
@ -79,7 +79,7 @@
return; return;
} }
if ((embeddingEngine === 'openai' && openAIKey === '') || openAIUrl === '') { if ((embeddingEngine === 'openai' && OpenAIKey === '') || OpenAIUrl === '') {
toast.error($i18n.t('OpenAI URL/Key required.')); toast.error($i18n.t('OpenAI URL/Key required.'));
return; return;
} }
@ -93,8 +93,8 @@
...(embeddingEngine === 'openai' ...(embeddingEngine === 'openai'
? { ? {
openai_config: { openai_config: {
key: openAIKey, key: OpenAIKey,
url: openAIUrl url: OpenAIUrl
} }
} }
: {}) : {})
@ -133,8 +133,8 @@
embeddingEngine = embeddingConfig.embedding_engine; embeddingEngine = embeddingConfig.embedding_engine;
embeddingModel = embeddingConfig.embedding_model; embeddingModel = embeddingConfig.embedding_model;
openAIKey = embeddingConfig.openai_config.key; OpenAIKey = embeddingConfig.openai_config.key;
openAIUrl = embeddingConfig.openai_config.url; OpenAIUrl = embeddingConfig.openai_config.url;
} }
}; };
@ -192,14 +192,14 @@
<input <input
class="w-full rounded-lg py-2 px-4 text-sm dark:text-gray-300 dark:bg-gray-850 outline-none" class="w-full rounded-lg py-2 px-4 text-sm dark:text-gray-300 dark:bg-gray-850 outline-none"
placeholder={$i18n.t('API Base URL')} placeholder={$i18n.t('API Base URL')}
bind:value={openAIUrl} bind:value={OpenAIUrl}
required required
/> />
<input <input
class="w-full rounded-lg py-2 px-4 text-sm dark:text-gray-300 dark:bg-gray-850 outline-none" class="w-full rounded-lg py-2 px-4 text-sm dark:text-gray-300 dark:bg-gray-850 outline-none"
placeholder={$i18n.t('API Key')} placeholder={$i18n.t('API Key')}
bind:value={openAIKey} bind:value={OpenAIKey}
required required
/> />
</div> </div>