forked from open-webui/open-webui
Merge pull request #1630 from cheahjs/feat/split-large-chunks
feat: split large openai responses into smaller chunks
This commit is contained in:
commit
2d7d6cfffc
5 changed files with 121 additions and 52 deletions
70
src/lib/apis/streaming/index.ts
Normal file
70
src/lib/apis/streaming/index.ts
Normal file
|
@ -0,0 +1,70 @@
|
||||||
|
type TextStreamUpdate = {
|
||||||
|
done: boolean;
|
||||||
|
value: string;
|
||||||
|
};
|
||||||
|
|
||||||
|
// createOpenAITextStream takes a ReadableStreamDefaultReader from an SSE response,
|
||||||
|
// and returns an async generator that emits delta updates with large deltas chunked into random sized chunks
|
||||||
|
export async function createOpenAITextStream(
|
||||||
|
messageStream: ReadableStreamDefaultReader,
|
||||||
|
splitLargeDeltas: boolean
|
||||||
|
): Promise<AsyncGenerator<TextStreamUpdate>> {
|
||||||
|
let iterator = openAIStreamToIterator(messageStream);
|
||||||
|
if (splitLargeDeltas) {
|
||||||
|
iterator = streamLargeDeltasAsRandomChunks(iterator);
|
||||||
|
}
|
||||||
|
return iterator;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function* openAIStreamToIterator(
|
||||||
|
reader: ReadableStreamDefaultReader
|
||||||
|
): AsyncGenerator<TextStreamUpdate> {
|
||||||
|
while (true) {
|
||||||
|
const { value, done } = await reader.read();
|
||||||
|
if (done) {
|
||||||
|
yield { done: true, value: '' };
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
const lines = value.split('\n');
|
||||||
|
for (const line of lines) {
|
||||||
|
if (line !== '') {
|
||||||
|
console.log(line);
|
||||||
|
if (line === 'data: [DONE]') {
|
||||||
|
yield { done: true, value: '' };
|
||||||
|
} else {
|
||||||
|
const data = JSON.parse(line.replace(/^data: /, ''));
|
||||||
|
console.log(data);
|
||||||
|
|
||||||
|
yield { done: false, value: data.choices[0].delta.content ?? '' };
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// streamLargeDeltasAsRandomChunks will chunk large deltas (length > 5) into random sized chunks between 1-3 characters
|
||||||
|
// This is to simulate a more fluid streaming, even though some providers may send large chunks of text at once
|
||||||
|
async function* streamLargeDeltasAsRandomChunks(
|
||||||
|
iterator: AsyncGenerator<TextStreamUpdate>
|
||||||
|
): AsyncGenerator<TextStreamUpdate> {
|
||||||
|
for await (const textStreamUpdate of iterator) {
|
||||||
|
if (textStreamUpdate.done) {
|
||||||
|
yield textStreamUpdate;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
let content = textStreamUpdate.value;
|
||||||
|
if (content.length < 5) {
|
||||||
|
yield { done: false, value: content };
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
while (content != '') {
|
||||||
|
const chunkSize = Math.min(Math.floor(Math.random() * 3) + 1, content.length);
|
||||||
|
const chunk = content.slice(0, chunkSize);
|
||||||
|
yield { done: false, value: chunk };
|
||||||
|
await sleep(5);
|
||||||
|
content = content.slice(chunkSize);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const sleep = (ms: number) => new Promise((resolve) => setTimeout(resolve, ms));
|
|
@ -17,11 +17,17 @@
|
||||||
let titleAutoGenerateModelExternal = '';
|
let titleAutoGenerateModelExternal = '';
|
||||||
let fullScreenMode = false;
|
let fullScreenMode = false;
|
||||||
let titleGenerationPrompt = '';
|
let titleGenerationPrompt = '';
|
||||||
|
let splitLargeChunks = false;
|
||||||
|
|
||||||
// Interface
|
// Interface
|
||||||
let promptSuggestions = [];
|
let promptSuggestions = [];
|
||||||
let showUsername = false;
|
let showUsername = false;
|
||||||
|
|
||||||
|
const toggleSplitLargeChunks = async () => {
|
||||||
|
splitLargeChunks = !splitLargeChunks;
|
||||||
|
saveSettings({ splitLargeChunks: splitLargeChunks });
|
||||||
|
};
|
||||||
|
|
||||||
const toggleFullScreenMode = async () => {
|
const toggleFullScreenMode = async () => {
|
||||||
fullScreenMode = !fullScreenMode;
|
fullScreenMode = !fullScreenMode;
|
||||||
saveSettings({ fullScreenMode: fullScreenMode });
|
saveSettings({ fullScreenMode: fullScreenMode });
|
||||||
|
@ -197,6 +203,28 @@
|
||||||
</button>
|
</button>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
<div>
|
||||||
|
<div class=" py-0.5 flex w-full justify-between">
|
||||||
|
<div class=" self-center text-xs font-medium">
|
||||||
|
{$i18n.t('Fluidly stream large external response chunks')}
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<button
|
||||||
|
class="p-1 px-3 text-xs flex rounded transition"
|
||||||
|
on:click={() => {
|
||||||
|
toggleSplitLargeChunks();
|
||||||
|
}}
|
||||||
|
type="button"
|
||||||
|
>
|
||||||
|
{#if splitLargeChunks === true}
|
||||||
|
<span class="ml-2 self-center">{$i18n.t('On')}</span>
|
||||||
|
{:else}
|
||||||
|
<span class="ml-2 self-center">{$i18n.t('Off')}</span>
|
||||||
|
{/if}
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<hr class=" dark:border-gray-700" />
|
<hr class=" dark:border-gray-700" />
|
||||||
|
|
|
@ -152,6 +152,7 @@
|
||||||
"File Mode": "",
|
"File Mode": "",
|
||||||
"File not found.": "",
|
"File not found.": "",
|
||||||
"Fingerprint spoofing detected: Unable to use initials as avatar. Defaulting to default profile image.": "",
|
"Fingerprint spoofing detected: Unable to use initials as avatar. Defaulting to default profile image.": "",
|
||||||
|
"Fluidly stream large external response chunks": "",
|
||||||
"Focus chat input": "",
|
"Focus chat input": "",
|
||||||
"Format your variables using square brackets like this:": "",
|
"Format your variables using square brackets like this:": "",
|
||||||
"From (Base Model)": "",
|
"From (Base Model)": "",
|
||||||
|
|
|
@ -39,6 +39,7 @@
|
||||||
import { RAGTemplate } from '$lib/utils/rag';
|
import { RAGTemplate } from '$lib/utils/rag';
|
||||||
import { LITELLM_API_BASE_URL, OLLAMA_API_BASE_URL, OPENAI_API_BASE_URL } from '$lib/constants';
|
import { LITELLM_API_BASE_URL, OLLAMA_API_BASE_URL, OPENAI_API_BASE_URL } from '$lib/constants';
|
||||||
import { WEBUI_BASE_URL } from '$lib/constants';
|
import { WEBUI_BASE_URL } from '$lib/constants';
|
||||||
|
import { createOpenAITextStream } from '$lib/apis/streaming';
|
||||||
|
|
||||||
const i18n = getContext('i18n');
|
const i18n = getContext('i18n');
|
||||||
|
|
||||||
|
@ -599,38 +600,22 @@
|
||||||
.pipeThrough(splitStream('\n'))
|
.pipeThrough(splitStream('\n'))
|
||||||
.getReader();
|
.getReader();
|
||||||
|
|
||||||
while (true) {
|
const textStream = await createOpenAITextStream(reader, $settings.splitLargeChunks);
|
||||||
const { value, done } = await reader.read();
|
console.log(textStream);
|
||||||
|
|
||||||
|
for await (const update of textStream) {
|
||||||
|
const { value, done } = update;
|
||||||
if (done || stopResponseFlag || _chatId !== $chatId) {
|
if (done || stopResponseFlag || _chatId !== $chatId) {
|
||||||
responseMessage.done = true;
|
responseMessage.done = true;
|
||||||
messages = messages;
|
messages = messages;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
if (responseMessage.content == '' && value == '\n') {
|
||||||
let lines = value.split('\n');
|
continue;
|
||||||
|
} else {
|
||||||
for (const line of lines) {
|
responseMessage.content += value;
|
||||||
if (line !== '') {
|
messages = messages;
|
||||||
console.log(line);
|
|
||||||
if (line === 'data: [DONE]') {
|
|
||||||
responseMessage.done = true;
|
|
||||||
messages = messages;
|
|
||||||
} else {
|
|
||||||
let data = JSON.parse(line.replace(/^data: /, ''));
|
|
||||||
console.log(data);
|
|
||||||
|
|
||||||
if (responseMessage.content == '' && data.choices[0].delta.content == '\n') {
|
|
||||||
continue;
|
|
||||||
} else {
|
|
||||||
responseMessage.content += data.choices[0].delta.content ?? '';
|
|
||||||
messages = messages;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} catch (error) {
|
|
||||||
console.log(error);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if ($settings.notificationEnabled && !document.hasFocus()) {
|
if ($settings.notificationEnabled && !document.hasFocus()) {
|
||||||
|
|
|
@ -42,6 +42,7 @@
|
||||||
OLLAMA_API_BASE_URL,
|
OLLAMA_API_BASE_URL,
|
||||||
WEBUI_BASE_URL
|
WEBUI_BASE_URL
|
||||||
} from '$lib/constants';
|
} from '$lib/constants';
|
||||||
|
import { createOpenAITextStream } from '$lib/apis/streaming';
|
||||||
|
|
||||||
const i18n = getContext('i18n');
|
const i18n = getContext('i18n');
|
||||||
|
|
||||||
|
@ -611,38 +612,22 @@
|
||||||
.pipeThrough(splitStream('\n'))
|
.pipeThrough(splitStream('\n'))
|
||||||
.getReader();
|
.getReader();
|
||||||
|
|
||||||
while (true) {
|
const textStream = await createOpenAITextStream(reader, $settings.splitLargeChunks);
|
||||||
const { value, done } = await reader.read();
|
console.log(textStream);
|
||||||
|
|
||||||
|
for await (const update of textStream) {
|
||||||
|
const { value, done } = update;
|
||||||
if (done || stopResponseFlag || _chatId !== $chatId) {
|
if (done || stopResponseFlag || _chatId !== $chatId) {
|
||||||
responseMessage.done = true;
|
responseMessage.done = true;
|
||||||
messages = messages;
|
messages = messages;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
if (responseMessage.content == '' && value == '\n') {
|
||||||
let lines = value.split('\n');
|
continue;
|
||||||
|
} else {
|
||||||
for (const line of lines) {
|
responseMessage.content += value;
|
||||||
if (line !== '') {
|
messages = messages;
|
||||||
console.log(line);
|
|
||||||
if (line === 'data: [DONE]') {
|
|
||||||
responseMessage.done = true;
|
|
||||||
messages = messages;
|
|
||||||
} else {
|
|
||||||
let data = JSON.parse(line.replace(/^data: /, ''));
|
|
||||||
console.log(data);
|
|
||||||
|
|
||||||
if (responseMessage.content == '' && data.choices[0].delta.content == '\n') {
|
|
||||||
continue;
|
|
||||||
} else {
|
|
||||||
responseMessage.content += data.choices[0].delta.content ?? '';
|
|
||||||
messages = messages;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} catch (error) {
|
|
||||||
console.log(error);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if ($settings.notificationEnabled && !document.hasFocus()) {
|
if ($settings.notificationEnabled && !document.hasFocus()) {
|
||||||
|
|
Loading…
Reference in a new issue