Merge pull request #704 from ollama-webui/tts

feat: tts optimisation
This commit is contained in:
Timothy Jaeryang Baek 2024-02-10 19:38:07 -08:00 committed by GitHub
commit 182ab8b8a2
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 86 additions and 28 deletions

View file

@ -15,6 +15,7 @@
import CodeBlock from './CodeBlock.svelte';
import { synthesizeOpenAISpeech } from '$lib/apis/openai';
import { extractSentences } from '$lib/utils';
export let modelfiles = [];
export let message;
@ -35,8 +36,10 @@
let tooltipInstance = null;
let audioMap = {};
let sentencesAudio = {};
let speaking = null;
let speakingIdx = null;
let loadingSpeech = false;
$: tokens = marked.lexer(message.content);
@ -116,22 +119,68 @@
}
};
const playAudio = (idx) => {
return new Promise((res) => {
speakingIdx = idx;
const audio = sentencesAudio[idx];
audio.play();
audio.onended = async (e) => {
await new Promise((r) => setTimeout(r, 300));
if (Object.keys(sentencesAudio).length - 1 === idx) {
speaking = null;
}
res(e);
};
});
};
const toggleSpeakMessage = async () => {
if (speaking) {
speechSynthesis.cancel();
speaking = null;
audioMap[message.id].pause();
audioMap[message.id].currentTime = 0;
sentencesAudio[speakingIdx].pause();
sentencesAudio[speakingIdx].currentTime = 0;
speaking = null;
speakingIdx = null;
} else {
speaking = true;
if ($settings?.speech?.engine === 'openai') {
loadingSpeech = true;
const sentences = extractSentences(message.content).reduce((mergedTexts, currentText) => {
const lastIndex = mergedTexts.length - 1;
if (lastIndex >= 0) {
const previousText = mergedTexts[lastIndex];
const wordCount = previousText.split(/\s+/).length;
if (wordCount < 2) {
mergedTexts[lastIndex] = previousText + ' ' + currentText;
} else {
mergedTexts.push(currentText);
}
} else {
mergedTexts.push(currentText);
}
return mergedTexts;
}, []);
console.log(sentences);
sentencesAudio = sentences.reduce((a, e, i, arr) => {
a[i] = null;
return a;
}, {});
let lastPlayedAudioPromise = Promise.resolve(); // Initialize a promise that resolves immediately
for (const [idx, sentence] of sentences.entries()) {
const res = await synthesizeOpenAISpeech(
localStorage.token,
$settings?.speech?.speaker,
message.content
sentence
).catch((error) => {
toast.error(error);
return null;
@ -140,20 +189,12 @@
if (res) {
const blob = await res.blob();
const blobUrl = URL.createObjectURL(blob);
console.log(blobUrl);
const audio = new Audio(blobUrl);
sentencesAudio[idx] = audio;
loadingSpeech = false;
const audio = new Audio(blobUrl);
audioMap[message.id] = audio;
audio.onended = () => {
speaking = null;
if ($settings.conversationMode) {
document.getElementById('voice-input-button')?.click();
lastPlayedAudioPromise = lastPlayedAudioPromise.then(() => playAudio(idx));
}
};
audio.play().catch((e) => console.error('Error playing audio:', e));
}
} else {
let voices = [];

View file

@ -324,3 +324,20 @@ export const isValidHttpUrl = (string) => {
return url.protocol === 'http:' || url.protocol === 'https:';
};
export const removeEmojis = (str) => {
// Regular expression to match emojis
const emojiRegex = /[\uD800-\uDBFF][\uDC00-\uDFFF]|\uD83C[\uDC00-\uDFFF]|\uD83D[\uDC00-\uDE4F]/g;
// Replace emojis with an empty string
return str.replace(emojiRegex, '');
};
export const extractSentences = (text) => {
// Split the paragraph into sentences based on common punctuation marks
const sentences = text.split(/(?<=[.!?])/);
return sentences
.map((sentence) => removeEmojis(sentence.trim()))
.filter((sentence) => sentence !== '');
};