feat: terminate request on user stop

2024-01-17 19:19:44 -08:00 · 2024-01-17 19:19:44 -08:00 · 442e3d978a
commit 442e3d978a
parent 684bdf5151
4 changed files with 170 additions and 86 deletions
--- a/backend/apps/ollama/main.py
+++ b/backend/apps/ollama/main.py
@ -5,6 +5,7 @@ from fastapi.concurrency import run_in_threadpool

 import requests
 import json
+import uuid
 from pydantic import BaseModel

 from apps.web.models.users import Users
@ -26,6 +27,9 @@ app.state.OLLAMA_API_BASE_URL = OLLAMA_API_BASE_URL
 # TARGET_SERVER_URL = OLLAMA_API_BASE_URL


+REQUEST_POOL = []
+
+
@app.get("/url")
 async def get_ollama_api_url(user=Depends(get_current_user)):
    if user and user.role == "admin":
@ -49,6 +53,16 @@ async def update_ollama_api_url(
        raise HTTPException(status_code=401, detail=ERROR_MESSAGES.ACCESS_PROHIBITED)


+@app.get("/cancel/{request_id}")
+async def cancel_ollama_request(request_id: str, user=Depends(get_current_user)):
+    if user:
+        if request_id in REQUEST_POOL:
+            REQUEST_POOL.remove(request_id)
+        return True
+    else:
+        raise HTTPException(status_code=401, detail=ERROR_MESSAGES.ACCESS_PROHIBITED)
+
+
@app.api_route("/{path:path}", methods=["GET", "POST", "PUT", "DELETE"])
 async def proxy(path: str, request: Request, user=Depends(get_current_user)):
    target_url = f"{app.state.OLLAMA_API_BASE_URL}/{path}"
@ -74,7 +88,27 @@ async def proxy(path: str, request: Request, user=Depends(get_current_user)):

    def get_request():
        nonlocal r
+
+        request_id = str(uuid.uuid4())
        try:
+            REQUEST_POOL.append(request_id)
+
+            def stream_content():
+                try:
+                    if path in ["chat"]:
+                        yield json.dumps({"id": request_id, "done": False}) + "\n"
+
+                    for chunk in r.iter_content(chunk_size=8192):
+                        if request_id in REQUEST_POOL:
+                            yield chunk
+                        else:
+                            print("User: canceled request")
+                            break
+                finally:
+                    if hasattr(r, "close"):
+                        r.close()
+                        REQUEST_POOL.remove(request_id)
+
            r = requests.request(
                method=request.method,
                url=target_url,
@ -85,8 +119,10 @@ async def proxy(path: str, request: Request, user=Depends(get_current_user)):

            r.raise_for_status()

+            # r.close()
+
            return StreamingResponse(
-                r.iter_content(chunk_size=8192),
+                stream_content(),
                status_code=r.status_code,
                headers=dict(r.headers),
            )
--- a/src/lib/apis/ollama/index.ts
+++ b/src/lib/apis/ollama/index.ts
@ -206,9 +206,11 @@ export const generatePrompt = async (token: string = '', model: string, conversa
 };

 export const generateChatCompletion = async (token: string = '', body: object) => {
+	let controller = new AbortController();
 	let error = null;

 	const res = await fetch(`${OLLAMA_API_BASE_URL}/chat`, {
+		signal: controller.signal,
 		method: 'POST',
 		headers: {
 			'Content-Type': 'text/event-stream',
@ -224,6 +226,27 @@ export const generateChatCompletion = async (token: string = '', body: object) =
 		throw error;
 	}

+	return [res, controller];
+};
+
+export const cancelChatCompletion = async (token: string = '', requestId: string) => {
+	let error = null;
+
+	const res = await fetch(`${OLLAMA_API_BASE_URL}/cancel/${requestId}`, {
+		method: 'GET',
+		headers: {
+			'Content-Type': 'text/event-stream',
+			Authorization: `Bearer ${token}`
+		}
+	}).catch((err) => {
+		error = err;
+		return null;
+	});
+
+	if (error) {
+		throw error;
+	}
+
 	return res;
 };

--- a/src/routes/(app)/+page.svelte
+++ b/src/routes/(app)/+page.svelte
@ -9,7 +9,7 @@
 	import { models, modelfiles, user, settings, chats, chatId, config } from '$lib/stores';
 	import { copyToClipboard, splitStream } from '$lib/utils';

-	import { generateChatCompletion, generateTitle } from '$lib/apis/ollama';
+	import { generateChatCompletion, cancelChatCompletion, generateTitle } from '$lib/apis/ollama';
 	import { createNewChat, getChatList, updateChatById } from '$lib/apis/chats';
 	import { queryVectorDB } from '$lib/apis/rag';
 	import { generateOpenAIChatCompletion } from '$lib/apis/openai';
@ -24,6 +24,8 @@
 	let autoScroll = true;
 	let processing = '';

+	let currentRequestId = null;
+
 	let selectedModels = [''];

 	let selectedModelfile = null;
@ -279,7 +281,7 @@
 		// Scroll down
 		window.scrollTo({ top: document.body.scrollHeight });

-		const res = await generateChatCompletion(localStorage.token, {
+		const [res, controller] = await generateChatCompletion(localStorage.token, {
 			model: model,
 			messages: [
 				$settings.system
@ -307,6 +309,8 @@
 		});

 		if (res && res.ok) {
+			console.log('controller', controller);
+
 			const reader = res.body
 				.pipeThrough(new TextDecoderStream())
 				.pipeThrough(splitStream('\n'))
@ -317,6 +321,14 @@
 				if (done || stopResponseFlag || _chatId !== $chatId) {
 					responseMessage.done = true;
 					messages = messages;
+
+					if (stopResponseFlag) {
+						controller.abort('User: Stop Response');
+						await cancelChatCompletion(localStorage.token, currentRequestId);
+					}
+
+					currentRequestId = null;
+
 					break;
 				}

@ -332,6 +344,10 @@
 								throw data;
 							}

+							if ('id' in data) {
+								console.log(data);
+								currentRequestId = data.id;
+							} else {
 								if (data.done == false) {
 									if (responseMessage.content == '' && data.message.content == '\n') {
 										continue;
@ -382,6 +398,7 @@
 								}
 							}
 						}
+					}
 				} catch (error) {
 					console.log(error);
 					if ('detail' in error) {
--- a/src/routes/(app)/c/[id]/+page.svelte
+++ b/src/routes/(app)/c/[id]/+page.svelte
@ -297,7 +297,7 @@
 		// Scroll down
 		window.scrollTo({ top: document.body.scrollHeight });

-		const res = await generateChatCompletion(localStorage.token, {
+		const [res, controller] = await generateChatCompletion(localStorage.token, {
 			model: model,
 			messages: [
 				$settings.system
@ -335,6 +335,10 @@
 				if (done || stopResponseFlag || _chatId !== $chatId) {
 					responseMessage.done = true;
 					messages = messages;
+
+					if (stopResponseFlag) {
+						controller.abort('User: Stop Response');
+					}
 					break;
 				}

@ -350,6 +354,9 @@
 								throw data;
 							}

+							if ('id' in data) {
+								console.log(data);
+							} else {
 								if (data.done == false) {
 									if (responseMessage.content == '' && data.message.content == '\n') {
 										continue;
@ -400,6 +407,7 @@
 								}
 							}
 						}
+					}
 				} catch (error) {
 					console.log(error);
 					if ('detail' in error) {