feat: whisper voice input

This commit is contained in:
Timothy J. Baek 2024-02-11 02:12:49 -08:00
parent c6c69924d1
commit f245c62692
3 changed files with 219 additions and 76 deletions

View file

@ -1,17 +1,21 @@
<script lang="ts">
import { createEventDispatcher, onMount } from 'svelte';
import toast from 'svelte-french-toast';
const dispatch = createEventDispatcher();
export let saveSettings: Function;
// Voice
let STTEngines = ['', 'openai'];
let STTEngine = '';
let conversationMode = false;
let speechAutoSend = false;
let responseAutoPlayback = false;
let engines = ['', 'openai'];
let engine = '';
let TTSEngines = ['', 'openai'];
let TTSEngine = '';
let voices = [];
let speaker = '';
@ -70,10 +74,11 @@
speechAutoSend = settings.speechAutoSend ?? false;
responseAutoPlayback = settings.responseAutoPlayback ?? false;
engine = settings?.speech?.engine ?? '';
speaker = settings?.speech?.speaker ?? '';
STTEngine = settings?.voice?.STTEngine ?? '';
TTSEngine = settings?.voice?.TTSEngine ?? '';
speaker = settings?.voice?.speaker ?? '';
if (engine === 'openai') {
if (TTSEngine === 'openai') {
getOpenAIVoices();
} else {
getWebAPIVoices();
@ -85,37 +90,37 @@
class="flex flex-col h-full justify-between space-y-3 text-sm"
on:submit|preventDefault={() => {
saveSettings({
speech: {
engine: engine !== '' ? engine : undefined,
voice: {
STTEngine: STTEngine !== '' ? STTEngine : undefined,
TTSEngine: TTSEngine !== '' ? TTSEngine : undefined,
speaker: speaker !== '' ? speaker : undefined
}
});
dispatch('save');
}}
>
<div class=" space-y-3">
<div class=" space-y-3 pr-1.5 overflow-y-scroll max-h-80">
<div>
<div class=" mb-1 text-sm font-medium">TTS Settings</div>
<div class=" mb-1 text-sm font-medium">STT Settings</div>
<div class=" py-0.5 flex w-full justify-between">
<div class=" self-center text-xs font-medium">Speech Engine</div>
<div class=" self-center text-xs font-medium">Speech-to-Text Engine</div>
<div class="flex items-center relative">
<select
class="w-fit pr-8 rounded px-2 p-1 text-xs bg-transparent outline-none text-right"
bind:value={engine}
bind:value={STTEngine}
placeholder="Select a mode"
on:change={(e) => {
if (e.target.value === 'openai') {
getOpenAIVoices();
speaker = 'alloy';
} else {
getWebAPIVoices();
speaker = '';
if (e.target.value !== '') {
navigator.mediaDevices.getUserMedia({ audio: true }).catch(function (err) {
toast.error(`Permission denied when accessing microphone: ${err}`);
STTEngine = '';
});
}
}}
>
<option value="">Default (Web API)</option>
<option value="openai">Open AI</option>
<option value="whisper-local">Whisper (Local)</option>
</select>
</div>
</div>
@ -155,6 +160,33 @@
{/if}
</button>
</div>
</div>
<div>
<div class=" mb-1 text-sm font-medium">TTS Settings</div>
<div class=" py-0.5 flex w-full justify-between">
<div class=" self-center text-xs font-medium">Text-to-Speech Engine</div>
<div class="flex items-center relative">
<select
class="w-fit pr-8 rounded px-2 p-1 text-xs bg-transparent outline-none text-right"
bind:value={TTSEngine}
placeholder="Select a mode"
on:change={(e) => {
if (e.target.value === 'openai') {
getOpenAIVoices();
speaker = 'alloy';
} else {
getWebAPIVoices();
speaker = '';
}
}}
>
<option value="">Default (Web API)</option>
<option value="openai">Open AI</option>
</select>
</div>
</div>
<div class=" py-0.5 flex w-full justify-between">
<div class=" self-center text-xs font-medium">Auto-playback response</div>
@ -177,7 +209,7 @@
<hr class=" dark:border-gray-700" />
{#if engine === ''}
{#if TTSEngine === ''}
<div>
<div class=" mb-2.5 text-sm font-medium">Set Voice</div>
<div class="flex w-full">
@ -196,7 +228,7 @@
</div>
</div>
</div>
{:else if engine === 'openai'}
{:else if TTSEngine === 'openai'}
<div>
<div class=" mb-2.5 text-sm font-medium">Set Voice</div>
<div class="flex w-full">