forked from open-webui/open-webui
feat: tts optimisation
This commit is contained in:
parent
68ed24b7d7
commit
401799c6fa
2 changed files with 69 additions and 26 deletions
|
@ -15,6 +15,7 @@
|
||||||
import CodeBlock from './CodeBlock.svelte';
|
import CodeBlock from './CodeBlock.svelte';
|
||||||
|
|
||||||
import { synthesizeOpenAISpeech } from '$lib/apis/openai';
|
import { synthesizeOpenAISpeech } from '$lib/apis/openai';
|
||||||
|
import { extractSentences } from '$lib/utils';
|
||||||
|
|
||||||
export let modelfiles = [];
|
export let modelfiles = [];
|
||||||
export let message;
|
export let message;
|
||||||
|
@ -35,8 +36,10 @@
|
||||||
|
|
||||||
let tooltipInstance = null;
|
let tooltipInstance = null;
|
||||||
|
|
||||||
let audioMap = {};
|
let sentencesAudio = {};
|
||||||
let speaking = null;
|
let speaking = null;
|
||||||
|
let speakingIdx = null;
|
||||||
|
|
||||||
let loadingSpeech = false;
|
let loadingSpeech = false;
|
||||||
|
|
||||||
$: tokens = marked.lexer(message.content);
|
$: tokens = marked.lexer(message.content);
|
||||||
|
@ -116,22 +119,53 @@
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
const playAudio = (idx) => {
|
||||||
|
return new Promise((res) => {
|
||||||
|
speakingIdx = idx;
|
||||||
|
const audio = sentencesAudio[idx];
|
||||||
|
audio.play();
|
||||||
|
audio.onended = async (e) => {
|
||||||
|
await new Promise((r) => setTimeout(r, 500));
|
||||||
|
|
||||||
|
if (Object.keys(sentencesAudio).length - 1 === idx) {
|
||||||
|
speaking = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
res(e);
|
||||||
|
};
|
||||||
|
});
|
||||||
|
};
|
||||||
|
|
||||||
const toggleSpeakMessage = async () => {
|
const toggleSpeakMessage = async () => {
|
||||||
if (speaking) {
|
if (speaking) {
|
||||||
speechSynthesis.cancel();
|
speechSynthesis.cancel();
|
||||||
speaking = null;
|
|
||||||
|
|
||||||
audioMap[message.id].pause();
|
sentencesAudio[speakingIdx].pause();
|
||||||
audioMap[message.id].currentTime = 0;
|
sentencesAudio[speakingIdx].currentTime = 0;
|
||||||
|
|
||||||
|
speaking = null;
|
||||||
|
speakingIdx = null;
|
||||||
} else {
|
} else {
|
||||||
speaking = true;
|
speaking = true;
|
||||||
|
|
||||||
if ($settings?.speech?.engine === 'openai') {
|
if ($settings?.speech?.engine === 'openai') {
|
||||||
loadingSpeech = true;
|
loadingSpeech = true;
|
||||||
|
|
||||||
|
const sentences = extractSentences(message.content);
|
||||||
|
console.log(sentences);
|
||||||
|
|
||||||
|
sentencesAudio = sentences.reduce((a, e, i, arr) => {
|
||||||
|
a[i] = null;
|
||||||
|
return a;
|
||||||
|
}, {});
|
||||||
|
|
||||||
|
let lastPlayedAudioPromise = Promise.resolve(); // Initialize a promise that resolves immediately
|
||||||
|
|
||||||
|
for (const [idx, sentence] of sentences.entries()) {
|
||||||
const res = await synthesizeOpenAISpeech(
|
const res = await synthesizeOpenAISpeech(
|
||||||
localStorage.token,
|
localStorage.token,
|
||||||
$settings?.speech?.speaker,
|
$settings?.speech?.speaker,
|
||||||
message.content
|
sentence
|
||||||
).catch((error) => {
|
).catch((error) => {
|
||||||
toast.error(error);
|
toast.error(error);
|
||||||
return null;
|
return null;
|
||||||
|
@ -140,20 +174,12 @@
|
||||||
if (res) {
|
if (res) {
|
||||||
const blob = await res.blob();
|
const blob = await res.blob();
|
||||||
const blobUrl = URL.createObjectURL(blob);
|
const blobUrl = URL.createObjectURL(blob);
|
||||||
console.log(blobUrl);
|
const audio = new Audio(blobUrl);
|
||||||
|
sentencesAudio[idx] = audio;
|
||||||
loadingSpeech = false;
|
loadingSpeech = false;
|
||||||
|
|
||||||
const audio = new Audio(blobUrl);
|
lastPlayedAudioPromise = lastPlayedAudioPromise.then(() => playAudio(idx));
|
||||||
audioMap[message.id] = audio;
|
|
||||||
|
|
||||||
audio.onended = () => {
|
|
||||||
speaking = null;
|
|
||||||
if ($settings.conversationMode) {
|
|
||||||
document.getElementById('voice-input-button')?.click();
|
|
||||||
}
|
}
|
||||||
};
|
|
||||||
audio.play().catch((e) => console.error('Error playing audio:', e));
|
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
let voices = [];
|
let voices = [];
|
||||||
|
|
|
@ -324,3 +324,20 @@ export const isValidHttpUrl = (string) => {
|
||||||
|
|
||||||
return url.protocol === 'http:' || url.protocol === 'https:';
|
return url.protocol === 'http:' || url.protocol === 'https:';
|
||||||
};
|
};
|
||||||
|
|
||||||
|
export const removeEmojis = (str) => {
|
||||||
|
// Regular expression to match emojis
|
||||||
|
const emojiRegex = /[\uD800-\uDBFF][\uDC00-\uDFFF]|\uD83C[\uDC00-\uDFFF]|\uD83D[\uDC00-\uDE4F]/g;
|
||||||
|
|
||||||
|
// Replace emojis with an empty string
|
||||||
|
return str.replace(emojiRegex, '');
|
||||||
|
};
|
||||||
|
|
||||||
|
export const extractSentences = (text) => {
|
||||||
|
// Split the paragraph into sentences based on common punctuation marks
|
||||||
|
const sentences = text.split(/(?<=[.!?])/);
|
||||||
|
|
||||||
|
return sentences
|
||||||
|
.map((sentence) => removeEmojis(sentence.trim()))
|
||||||
|
.filter((sentence) => sentence !== '');
|
||||||
|
};
|
||||||
|
|
Loading…
Reference in a new issue