Merge pull request #704 from ollama-webui/tts

feat: tts optimisation
2024-02-10 19:38:07 -08:00 · 2024-02-10 19:38:07 -08:00 · 182ab8b8a2
commit 182ab8b8a2
parent 68ed24b7d7 66f86062b8
2 changed files with 86 additions and 28 deletions
--- a/src/lib/components/chat/Messages/ResponseMessage.svelte
+++ b/src/lib/components/chat/Messages/ResponseMessage.svelte
@ -15,6 +15,7 @@
 	import CodeBlock from './CodeBlock.svelte';

 	import { synthesizeOpenAISpeech } from '$lib/apis/openai';
+	import { extractSentences } from '$lib/utils';

 	export let modelfiles = [];
 	export let message;
@ -35,8 +36,10 @@

 	let tooltipInstance = null;

-	let audioMap = {};
+	let sentencesAudio = {};
 	let speaking = null;
+	let speakingIdx = null;
+
 	let loadingSpeech = false;

 	$: tokens = marked.lexer(message.content);
@ -116,22 +119,68 @@
 		}
 	};

+	const playAudio = (idx) => {
+		return new Promise((res) => {
+			speakingIdx = idx;
+			const audio = sentencesAudio[idx];
+			audio.play();
+			audio.onended = async (e) => {
+				await new Promise((r) => setTimeout(r, 300));
+
+				if (Object.keys(sentencesAudio).length - 1 === idx) {
+					speaking = null;
+				}
+
+				res(e);
+			};
+		});
+	};
+
 	const toggleSpeakMessage = async () => {
 		if (speaking) {
 			speechSynthesis.cancel();
-			speaking = null;

-			audioMap[message.id].pause();
-			audioMap[message.id].currentTime = 0;
+			sentencesAudio[speakingIdx].pause();
+			sentencesAudio[speakingIdx].currentTime = 0;
+
+			speaking = null;
+			speakingIdx = null;
 		} else {
 			speaking = true;

 			if ($settings?.speech?.engine === 'openai') {
 				loadingSpeech = true;
+
+				const sentences = extractSentences(message.content).reduce((mergedTexts, currentText) => {
+					const lastIndex = mergedTexts.length - 1;
+					if (lastIndex >= 0) {
+						const previousText = mergedTexts[lastIndex];
+						const wordCount = previousText.split(/\s+/).length;
+						if (wordCount < 2) {
+							mergedTexts[lastIndex] = previousText + ' ' + currentText;
+						} else {
+							mergedTexts.push(currentText);
+						}
+					} else {
+						mergedTexts.push(currentText);
+					}
+					return mergedTexts;
+				}, []);
+
+				console.log(sentences);
+
+				sentencesAudio = sentences.reduce((a, e, i, arr) => {
+					a[i] = null;
+					return a;
+				}, {});
+
+				let lastPlayedAudioPromise = Promise.resolve(); // Initialize a promise that resolves immediately
+
+				for (const [idx, sentence] of sentences.entries()) {
 					const res = await synthesizeOpenAISpeech(
 						localStorage.token,
 						$settings?.speech?.speaker,
-					message.content
+						sentence
 					).catch((error) => {
 						toast.error(error);
 						return null;
@ -140,20 +189,12 @@
 					if (res) {
 						const blob = await res.blob();
 						const blobUrl = URL.createObjectURL(blob);
-					console.log(blobUrl);
-
+						const audio = new Audio(blobUrl);
+						sentencesAudio[idx] = audio;
 						loadingSpeech = false;

-					const audio = new Audio(blobUrl);
-					audioMap[message.id] = audio;
-
-					audio.onended = () => {
-						speaking = null;
-						if ($settings.conversationMode) {
-							document.getElementById('voice-input-button')?.click();
+						lastPlayedAudioPromise = lastPlayedAudioPromise.then(() => playAudio(idx));
 					}
-					};
-					audio.play().catch((e) => console.error('Error playing audio:', e));
 				}
 			} else {
 				let voices = [];
--- a/src/lib/utils/index.ts
+++ b/src/lib/utils/index.ts
@ -324,3 +324,20 @@ export const isValidHttpUrl = (string) => {

 	return url.protocol === 'http:' || url.protocol === 'https:';
 };
+
+export const removeEmojis = (str) => {
+	// Regular expression to match emojis
+	const emojiRegex = /[\uD800-\uDBFF][\uDC00-\uDFFF]|\uD83C[\uDC00-\uDFFF]|\uD83D[\uDC00-\uDE4F]/g;
+
+	// Replace emojis with an empty string
+	return str.replace(emojiRegex, '');
+};
+
+export const extractSentences = (text) => {
+	// Split the paragraph into sentences based on common punctuation marks
+	const sentences = text.split(/(?<=[.!?])/);
+
+	return sentences
+		.map((sentence) => removeEmojis(sentence.trim()))
+		.filter((sentence) => sentence !== '');
+};