forked from open-webui/open-webui
feat: tts optimisation
This commit is contained in:
parent
68ed24b7d7
commit
401799c6fa
2 changed files with 69 additions and 26 deletions
|
@ -15,6 +15,7 @@
|
|||
import CodeBlock from './CodeBlock.svelte';
|
||||
|
||||
import { synthesizeOpenAISpeech } from '$lib/apis/openai';
|
||||
import { extractSentences } from '$lib/utils';
|
||||
|
||||
export let modelfiles = [];
|
||||
export let message;
|
||||
|
@ -35,8 +36,10 @@
|
|||
|
||||
let tooltipInstance = null;
|
||||
|
||||
let audioMap = {};
|
||||
let sentencesAudio = {};
|
||||
let speaking = null;
|
||||
let speakingIdx = null;
|
||||
|
||||
let loadingSpeech = false;
|
||||
|
||||
$: tokens = marked.lexer(message.content);
|
||||
|
@ -116,22 +119,53 @@
|
|||
}
|
||||
};
|
||||
|
||||
const playAudio = (idx) => {
|
||||
return new Promise((res) => {
|
||||
speakingIdx = idx;
|
||||
const audio = sentencesAudio[idx];
|
||||
audio.play();
|
||||
audio.onended = async (e) => {
|
||||
await new Promise((r) => setTimeout(r, 500));
|
||||
|
||||
if (Object.keys(sentencesAudio).length - 1 === idx) {
|
||||
speaking = null;
|
||||
}
|
||||
|
||||
res(e);
|
||||
};
|
||||
});
|
||||
};
|
||||
|
||||
const toggleSpeakMessage = async () => {
|
||||
if (speaking) {
|
||||
speechSynthesis.cancel();
|
||||
speaking = null;
|
||||
|
||||
audioMap[message.id].pause();
|
||||
audioMap[message.id].currentTime = 0;
|
||||
sentencesAudio[speakingIdx].pause();
|
||||
sentencesAudio[speakingIdx].currentTime = 0;
|
||||
|
||||
speaking = null;
|
||||
speakingIdx = null;
|
||||
} else {
|
||||
speaking = true;
|
||||
|
||||
if ($settings?.speech?.engine === 'openai') {
|
||||
loadingSpeech = true;
|
||||
|
||||
const sentences = extractSentences(message.content);
|
||||
console.log(sentences);
|
||||
|
||||
sentencesAudio = sentences.reduce((a, e, i, arr) => {
|
||||
a[i] = null;
|
||||
return a;
|
||||
}, {});
|
||||
|
||||
let lastPlayedAudioPromise = Promise.resolve(); // Initialize a promise that resolves immediately
|
||||
|
||||
for (const [idx, sentence] of sentences.entries()) {
|
||||
const res = await synthesizeOpenAISpeech(
|
||||
localStorage.token,
|
||||
$settings?.speech?.speaker,
|
||||
message.content
|
||||
sentence
|
||||
).catch((error) => {
|
||||
toast.error(error);
|
||||
return null;
|
||||
|
@ -140,20 +174,12 @@
|
|||
if (res) {
|
||||
const blob = await res.blob();
|
||||
const blobUrl = URL.createObjectURL(blob);
|
||||
console.log(blobUrl);
|
||||
|
||||
const audio = new Audio(blobUrl);
|
||||
sentencesAudio[idx] = audio;
|
||||
loadingSpeech = false;
|
||||
|
||||
const audio = new Audio(blobUrl);
|
||||
audioMap[message.id] = audio;
|
||||
|
||||
audio.onended = () => {
|
||||
speaking = null;
|
||||
if ($settings.conversationMode) {
|
||||
document.getElementById('voice-input-button')?.click();
|
||||
lastPlayedAudioPromise = lastPlayedAudioPromise.then(() => playAudio(idx));
|
||||
}
|
||||
};
|
||||
audio.play().catch((e) => console.error('Error playing audio:', e));
|
||||
}
|
||||
} else {
|
||||
let voices = [];
|
||||
|
|
|
@ -324,3 +324,20 @@ export const isValidHttpUrl = (string) => {
|
|||
|
||||
return url.protocol === 'http:' || url.protocol === 'https:';
|
||||
};
|
||||
|
||||
export const removeEmojis = (str) => {
|
||||
// Regular expression to match emojis
|
||||
const emojiRegex = /[\uD800-\uDBFF][\uDC00-\uDFFF]|\uD83C[\uDC00-\uDFFF]|\uD83D[\uDC00-\uDE4F]/g;
|
||||
|
||||
// Replace emojis with an empty string
|
||||
return str.replace(emojiRegex, '');
|
||||
};
|
||||
|
||||
export const extractSentences = (text) => {
|
||||
// Split the paragraph into sentences based on common punctuation marks
|
||||
const sentences = text.split(/(?<=[.!?])/);
|
||||
|
||||
return sentences
|
||||
.map((sentence) => removeEmojis(sentence.trim()))
|
||||
.filter((sentence) => sentence !== '');
|
||||
};
|
||||
|
|
Loading…
Reference in a new issue