Merge pull request #704 from ollama-webui/tts

feat: tts optimisation
This commit is contained in:
Timothy Jaeryang Baek 2024-02-10 19:38:07 -08:00 committed by GitHub
commit 182ab8b8a2
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 86 additions and 28 deletions

View file

@ -15,6 +15,7 @@
import CodeBlock from './CodeBlock.svelte'; import CodeBlock from './CodeBlock.svelte';
import { synthesizeOpenAISpeech } from '$lib/apis/openai'; import { synthesizeOpenAISpeech } from '$lib/apis/openai';
import { extractSentences } from '$lib/utils';
export let modelfiles = []; export let modelfiles = [];
export let message; export let message;
@ -35,8 +36,10 @@
let tooltipInstance = null; let tooltipInstance = null;
let audioMap = {}; let sentencesAudio = {};
let speaking = null; let speaking = null;
let speakingIdx = null;
let loadingSpeech = false; let loadingSpeech = false;
$: tokens = marked.lexer(message.content); $: tokens = marked.lexer(message.content);
@ -116,44 +119,82 @@
} }
}; };
const playAudio = (idx) => {
return new Promise((res) => {
speakingIdx = idx;
const audio = sentencesAudio[idx];
audio.play();
audio.onended = async (e) => {
await new Promise((r) => setTimeout(r, 300));
if (Object.keys(sentencesAudio).length - 1 === idx) {
speaking = null;
}
res(e);
};
});
};
const toggleSpeakMessage = async () => { const toggleSpeakMessage = async () => {
if (speaking) { if (speaking) {
speechSynthesis.cancel(); speechSynthesis.cancel();
speaking = null;
audioMap[message.id].pause(); sentencesAudio[speakingIdx].pause();
audioMap[message.id].currentTime = 0; sentencesAudio[speakingIdx].currentTime = 0;
speaking = null;
speakingIdx = null;
} else { } else {
speaking = true; speaking = true;
if ($settings?.speech?.engine === 'openai') { if ($settings?.speech?.engine === 'openai') {
loadingSpeech = true; loadingSpeech = true;
const res = await synthesizeOpenAISpeech(
localStorage.token,
$settings?.speech?.speaker,
message.content
).catch((error) => {
toast.error(error);
return null;
});
if (res) { const sentences = extractSentences(message.content).reduce((mergedTexts, currentText) => {
const blob = await res.blob(); const lastIndex = mergedTexts.length - 1;
const blobUrl = URL.createObjectURL(blob); if (lastIndex >= 0) {
console.log(blobUrl); const previousText = mergedTexts[lastIndex];
const wordCount = previousText.split(/\s+/).length;
loadingSpeech = false; if (wordCount < 2) {
mergedTexts[lastIndex] = previousText + ' ' + currentText;
const audio = new Audio(blobUrl); } else {
audioMap[message.id] = audio; mergedTexts.push(currentText);
audio.onended = () => {
speaking = null;
if ($settings.conversationMode) {
document.getElementById('voice-input-button')?.click();
} }
}; } else {
audio.play().catch((e) => console.error('Error playing audio:', e)); mergedTexts.push(currentText);
}
return mergedTexts;
}, []);
console.log(sentences);
sentencesAudio = sentences.reduce((a, e, i, arr) => {
a[i] = null;
return a;
}, {});
let lastPlayedAudioPromise = Promise.resolve(); // Initialize a promise that resolves immediately
for (const [idx, sentence] of sentences.entries()) {
const res = await synthesizeOpenAISpeech(
localStorage.token,
$settings?.speech?.speaker,
sentence
).catch((error) => {
toast.error(error);
return null;
});
if (res) {
const blob = await res.blob();
const blobUrl = URL.createObjectURL(blob);
const audio = new Audio(blobUrl);
sentencesAudio[idx] = audio;
loadingSpeech = false;
lastPlayedAudioPromise = lastPlayedAudioPromise.then(() => playAudio(idx));
}
} }
} else { } else {
let voices = []; let voices = [];

View file

@ -324,3 +324,20 @@ export const isValidHttpUrl = (string) => {
return url.protocol === 'http:' || url.protocol === 'https:'; return url.protocol === 'http:' || url.protocol === 'https:';
}; };
export const removeEmojis = (str) => {
// Regular expression to match emojis
const emojiRegex = /[\uD800-\uDBFF][\uDC00-\uDFFF]|\uD83C[\uDC00-\uDFFF]|\uD83D[\uDC00-\uDE4F]/g;
// Replace emojis with an empty string
return str.replace(emojiRegex, '');
};
export const extractSentences = (text) => {
// Split the paragraph into sentences based on common punctuation marks
const sentences = text.split(/(?<=[.!?])/);
return sentences
.map((sentence) => removeEmojis(sentence.trim()))
.filter((sentence) => sentence !== '');
};