From 401799c6fa1cc374f55bcc6bcc099c1db7ed8f80 Mon Sep 17 00:00:00 2001 From: "Timothy J. Baek" Date: Sat, 10 Feb 2024 19:20:56 -0800 Subject: [PATCH 1/3] feat: tts optimisation --- .../chat/Messages/ResponseMessage.svelte | 78 ++++++++++++------- src/lib/utils/index.ts | 17 ++++ 2 files changed, 69 insertions(+), 26 deletions(-) diff --git a/src/lib/components/chat/Messages/ResponseMessage.svelte b/src/lib/components/chat/Messages/ResponseMessage.svelte index ba41ef62..5ab41fcd 100644 --- a/src/lib/components/chat/Messages/ResponseMessage.svelte +++ b/src/lib/components/chat/Messages/ResponseMessage.svelte @@ -15,6 +15,7 @@ import CodeBlock from './CodeBlock.svelte'; import { synthesizeOpenAISpeech } from '$lib/apis/openai'; + import { extractSentences } from '$lib/utils'; export let modelfiles = []; export let message; @@ -35,8 +36,10 @@ let tooltipInstance = null; - let audioMap = {}; + let sentencesAudio = {}; let speaking = null; + let speakingIdx = null; + let loadingSpeech = false; $: tokens = marked.lexer(message.content); @@ -116,44 +119,67 @@ } }; + const playAudio = (idx) => { + return new Promise((res) => { + speakingIdx = idx; + const audio = sentencesAudio[idx]; + audio.play(); + audio.onended = async (e) => { + await new Promise((r) => setTimeout(r, 500)); + + if (Object.keys(sentencesAudio).length - 1 === idx) { + speaking = null; + } + + res(e); + }; + }); + }; + const toggleSpeakMessage = async () => { if (speaking) { speechSynthesis.cancel(); - speaking = null; - audioMap[message.id].pause(); - audioMap[message.id].currentTime = 0; + sentencesAudio[speakingIdx].pause(); + sentencesAudio[speakingIdx].currentTime = 0; + + speaking = null; + speakingIdx = null; } else { speaking = true; if ($settings?.speech?.engine === 'openai') { loadingSpeech = true; - const res = await synthesizeOpenAISpeech( - localStorage.token, - $settings?.speech?.speaker, - message.content - ).catch((error) => { - toast.error(error); - return null; - }); - if (res) { - const blob = await res.blob(); - const blobUrl = URL.createObjectURL(blob); - console.log(blobUrl); + const sentences = extractSentences(message.content); + console.log(sentences); - loadingSpeech = false; + sentencesAudio = sentences.reduce((a, e, i, arr) => { + a[i] = null; + return a; + }, {}); - const audio = new Audio(blobUrl); - audioMap[message.id] = audio; + let lastPlayedAudioPromise = Promise.resolve(); // Initialize a promise that resolves immediately - audio.onended = () => { - speaking = null; - if ($settings.conversationMode) { - document.getElementById('voice-input-button')?.click(); - } - }; - audio.play().catch((e) => console.error('Error playing audio:', e)); + for (const [idx, sentence] of sentences.entries()) { + const res = await synthesizeOpenAISpeech( + localStorage.token, + $settings?.speech?.speaker, + sentence + ).catch((error) => { + toast.error(error); + return null; + }); + + if (res) { + const blob = await res.blob(); + const blobUrl = URL.createObjectURL(blob); + const audio = new Audio(blobUrl); + sentencesAudio[idx] = audio; + loadingSpeech = false; + + lastPlayedAudioPromise = lastPlayedAudioPromise.then(() => playAudio(idx)); + } } } else { let voices = []; diff --git a/src/lib/utils/index.ts b/src/lib/utils/index.ts index 16bf1cd5..fa0963ea 100644 --- a/src/lib/utils/index.ts +++ b/src/lib/utils/index.ts @@ -324,3 +324,20 @@ export const isValidHttpUrl = (string) => { return url.protocol === 'http:' || url.protocol === 'https:'; }; + +export const removeEmojis = (str) => { + // Regular expression to match emojis + const emojiRegex = /[\uD800-\uDBFF][\uDC00-\uDFFF]|\uD83C[\uDC00-\uDFFF]|\uD83D[\uDC00-\uDE4F]/g; + + // Replace emojis with an empty string + return str.replace(emojiRegex, ''); +}; + +export const extractSentences = (text) => { + // Split the paragraph into sentences based on common punctuation marks + const sentences = text.split(/(?<=[.!?])/); + + return sentences + .map((sentence) => removeEmojis(sentence.trim())) + .filter((sentence) => sentence !== ''); +}; From f01428f502a109e91ccab9e3f93d4e419e5d12b5 Mon Sep 17 00:00:00 2001 From: "Timothy J. Baek" Date: Sat, 10 Feb 2024 19:29:13 -0800 Subject: [PATCH 2/3] fix: sleep duration --- src/lib/components/chat/Messages/ResponseMessage.svelte | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib/components/chat/Messages/ResponseMessage.svelte b/src/lib/components/chat/Messages/ResponseMessage.svelte index 5ab41fcd..7ea44950 100644 --- a/src/lib/components/chat/Messages/ResponseMessage.svelte +++ b/src/lib/components/chat/Messages/ResponseMessage.svelte @@ -125,7 +125,7 @@ const audio = sentencesAudio[idx]; audio.play(); audio.onended = async (e) => { - await new Promise((r) => setTimeout(r, 500)); + await new Promise((r) => setTimeout(r, 300)); if (Object.keys(sentencesAudio).length - 1 === idx) { speaking = null; From 66f86062b81bcfbf831ec084a93577cb7fb19c25 Mon Sep 17 00:00:00 2001 From: "Timothy J. Baek" Date: Sat, 10 Feb 2024 19:33:29 -0800 Subject: [PATCH 3/3] feat: merge one word sentence --- .../chat/Messages/ResponseMessage.svelte | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/src/lib/components/chat/Messages/ResponseMessage.svelte b/src/lib/components/chat/Messages/ResponseMessage.svelte index 7ea44950..06229769 100644 --- a/src/lib/components/chat/Messages/ResponseMessage.svelte +++ b/src/lib/components/chat/Messages/ResponseMessage.svelte @@ -151,7 +151,22 @@ if ($settings?.speech?.engine === 'openai') { loadingSpeech = true; - const sentences = extractSentences(message.content); + const sentences = extractSentences(message.content).reduce((mergedTexts, currentText) => { + const lastIndex = mergedTexts.length - 1; + if (lastIndex >= 0) { + const previousText = mergedTexts[lastIndex]; + const wordCount = previousText.split(/\s+/).length; + if (wordCount < 2) { + mergedTexts[lastIndex] = previousText + ' ' + currentText; + } else { + mergedTexts.push(currentText); + } + } else { + mergedTexts.push(currentText); + } + return mergedTexts; + }, []); + console.log(sentences); sentencesAudio = sentences.reduce((a, e, i, arr) => {