feat: tts optimisation

This commit is contained in:
Timothy J. Baek 2024-02-10 19:20:56 -08:00
parent 68ed24b7d7
commit 401799c6fa
2 changed files with 69 additions and 26 deletions

View file

@ -15,6 +15,7 @@
import CodeBlock from './CodeBlock.svelte';
import { synthesizeOpenAISpeech } from '$lib/apis/openai';
import { extractSentences } from '$lib/utils';
export let modelfiles = [];
export let message;
@ -35,8 +36,10 @@
let tooltipInstance = null;
let audioMap = {};
let sentencesAudio = {};
let speaking = null;
let speakingIdx = null;
let loadingSpeech = false;
$: tokens = marked.lexer(message.content);
@ -116,44 +119,67 @@
}
};
const playAudio = (idx) => {
return new Promise((res) => {
speakingIdx = idx;
const audio = sentencesAudio[idx];
audio.play();
audio.onended = async (e) => {
await new Promise((r) => setTimeout(r, 500));
if (Object.keys(sentencesAudio).length - 1 === idx) {
speaking = null;
}
res(e);
};
});
};
const toggleSpeakMessage = async () => {
if (speaking) {
speechSynthesis.cancel();
speaking = null;
audioMap[message.id].pause();
audioMap[message.id].currentTime = 0;
sentencesAudio[speakingIdx].pause();
sentencesAudio[speakingIdx].currentTime = 0;
speaking = null;
speakingIdx = null;
} else {
speaking = true;
if ($settings?.speech?.engine === 'openai') {
loadingSpeech = true;
const res = await synthesizeOpenAISpeech(
localStorage.token,
$settings?.speech?.speaker,
message.content
).catch((error) => {
toast.error(error);
return null;
});
if (res) {
const blob = await res.blob();
const blobUrl = URL.createObjectURL(blob);
console.log(blobUrl);
const sentences = extractSentences(message.content);
console.log(sentences);
loadingSpeech = false;
sentencesAudio = sentences.reduce((a, e, i, arr) => {
a[i] = null;
return a;
}, {});
const audio = new Audio(blobUrl);
audioMap[message.id] = audio;
let lastPlayedAudioPromise = Promise.resolve(); // Initialize a promise that resolves immediately
audio.onended = () => {
speaking = null;
if ($settings.conversationMode) {
document.getElementById('voice-input-button')?.click();
}
};
audio.play().catch((e) => console.error('Error playing audio:', e));
for (const [idx, sentence] of sentences.entries()) {
const res = await synthesizeOpenAISpeech(
localStorage.token,
$settings?.speech?.speaker,
sentence
).catch((error) => {
toast.error(error);
return null;
});
if (res) {
const blob = await res.blob();
const blobUrl = URL.createObjectURL(blob);
const audio = new Audio(blobUrl);
sentencesAudio[idx] = audio;
loadingSpeech = false;
lastPlayedAudioPromise = lastPlayedAudioPromise.then(() => playAudio(idx));
}
}
} else {
let voices = [];

View file

@ -324,3 +324,20 @@ export const isValidHttpUrl = (string) => {
return url.protocol === 'http:' || url.protocol === 'https:';
};
export const removeEmojis = (str) => {
// Regular expression to match emojis
const emojiRegex = /[\uD800-\uDBFF][\uDC00-\uDFFF]|\uD83C[\uDC00-\uDFFF]|\uD83D[\uDC00-\uDE4F]/g;
// Replace emojis with an empty string
return str.replace(emojiRegex, '');
};
export const extractSentences = (text) => {
// Split the paragraph into sentences based on common punctuation marks
const sentences = text.split(/(?<=[.!?])/);
return sentences
.map((sentence) => removeEmojis(sentence.trim()))
.filter((sentence) => sentence !== '');
};