feat: openai tts support

This commit is contained in:
Timothy J. Baek 2024-02-05 22:51:08 -08:00
parent ce31113abd
commit 0b8df52c97
5 changed files with 216 additions and 23 deletions

View file

@ -229,3 +229,34 @@ export const generateOpenAIChatCompletion = async (token: string = '', body: obj
return res;
};
export const synthesizeOpenAISpeech = async (
token: string = '',
speaker: string = 'alloy',
text: string = ''
) => {
let error = null;
const res = await fetch(`${OPENAI_API_BASE_URL}/audio/speech`, {
method: 'POST',
headers: {
Authorization: `Bearer ${token}`,
'Content-Type': 'application/json'
},
body: JSON.stringify({
model: 'tts-1',
input: text,
voice: speaker
})
}).catch((err) => {
console.log(err);
error = err;
return null;
});
if (error) {
throw error;
}
return res;
};

View file

@ -1,4 +1,5 @@
<script lang="ts">
import toast from 'svelte-french-toast';
import dayjs from 'dayjs';
import { marked } from 'marked';
import { settings } from '$lib/stores';
@ -13,6 +14,8 @@
import Skeleton from './Skeleton.svelte';
import CodeBlock from './CodeBlock.svelte';
import { synthesizeOpenAISpeech } from '$lib/apis/openai';
export let modelfiles = [];
export let message;
export let siblings;
@ -27,6 +30,8 @@
export let copyToClipboard: Function;
export let regenerateResponse: Function;
let audioMap = {};
let edit = false;
let editedContent = '';
@ -114,22 +119,55 @@
if (speaking) {
speechSynthesis.cancel();
speaking = null;
audioMap[message.id].pause();
audioMap[message.id].currentTime = 0;
} else {
speaking = true;
let voices = [];
const getVoicesLoop = setInterval(async () => {
voices = await speechSynthesis.getVoices();
if (voices.length > 0) {
clearInterval(getVoicesLoop);
if ($settings?.speech?.engine === 'openai') {
const res = await synthesizeOpenAISpeech(
localStorage.token,
$settings?.speech?.speaker,
message.content
).catch((error) => {
toast.error(error);
return null;
});
const voice = voices?.filter((v) => v.name === $settings?.speaker)?.at(0) ?? undefined;
if (res) {
const blob = await res.blob();
const blobUrl = URL.createObjectURL(blob);
console.log(blobUrl);
const speak = new SpeechSynthesisUtterance(message.content);
speak.voice = voice;
speechSynthesis.speak(speak);
const audio = new Audio(blobUrl);
audioMap[message.id] = audio;
audio.onended = () => {
speaking = null;
};
audio.play().catch((e) => console.error('Error playing audio:', e));
}
}, 100);
} else {
let voices = [];
const getVoicesLoop = setInterval(async () => {
voices = await speechSynthesis.getVoices();
if (voices.length > 0) {
clearInterval(getVoicesLoop);
const voice =
voices?.filter((v) => v.name === $settings?.speech?.speaker)?.at(0) ?? undefined;
const speak = new SpeechSynthesisUtterance(message.content);
speak.onend = () => {
speaking = null;
};
speak.voice = voice;
speechSynthesis.speak(speak);
}
}, 100);
}
}
};

View file

@ -6,16 +6,23 @@
// Voice
let engines = ['', 'openai'];
let selectedEngine = '';
let engine = '';
let voices = [];
let speaker = '';
onMount(async () => {
let settings = JSON.parse(localStorage.getItem('settings') ?? '{}');
speaker = settings.speaker ?? '';
const getOpenAIVoices = () => {
voices = [
{ name: 'alloy' },
{ name: 'echo' },
{ name: 'fable' },
{ name: 'onyx' },
{ name: 'nova' },
{ name: 'shimmer' }
];
};
const getWebAPIVoices = () => {
const getVoicesLoop = setInterval(async () => {
voices = await speechSynthesis.getVoices();
@ -24,6 +31,19 @@
clearInterval(getVoicesLoop);
}
}, 100);
};
onMount(async () => {
let settings = JSON.parse(localStorage.getItem('settings') ?? '{}');
engine = settings?.speech?.engine ?? '';
speaker = settings?.speech?.speaker ?? '';
if (engine === 'openai') {
getOpenAIVoices();
} else {
getWebAPIVoices();
}
});
</script>
@ -31,7 +51,10 @@
class="flex flex-col h-full justify-between space-y-3 text-sm"
on:submit|preventDefault={() => {
saveSettings({
speaker: speaker !== '' ? speaker : undefined
speech: {
engine: engine !== '' ? engine : undefined,
speaker: speaker !== '' ? speaker : undefined
}
});
dispatch('save');
}}
@ -42,10 +65,16 @@
<div class="flex items-center relative">
<select
class="w-fit pr-8 rounded py-2 px-2 text-xs bg-transparent outline-none text-right"
bind:value={selectedEngine}
bind:value={engine}
placeholder="Select a mode"
on:change={(e) => {
console.log(e);
if (e.target.value === 'openai') {
getOpenAIVoices();
speaker = 'alloy';
} else {
getWebAPIVoices();
speaker = '';
}
}}
>
<option value="">Default (Web API)</option>
@ -56,7 +85,7 @@
<hr class=" dark:border-gray-700" />
{#if selectedEngine === ''}
{#if engine === ''}
<div>
<div class=" mb-2.5 text-sm font-medium">Set Voice</div>
<div class="flex w-full">
@ -75,6 +104,24 @@
</div>
</div>
</div>
{:else if engine === 'openai'}
<div>
<div class=" mb-2.5 text-sm font-medium">Set Voice</div>
<div class="flex w-full">
<div class="flex-1">
<select
class="w-full rounded py-2 px-4 text-sm dark:text-gray-300 dark:bg-gray-800 outline-none"
bind:value={speaker}
placeholder="Select a voice"
>
{#each voices as voice}
<option value={voice.name} class="bg-gray-100 dark:bg-gray-700">{voice.name}</option
>
{/each}
</select>
</div>
</div>
</div>
{/if}
</div>