From 14c38d31a7d4e635617366b83295a0070f84d61f Mon Sep 17 00:00:00 2001 From: officialsahyaboutorabi <152934862+officialsahyaboutorabi@users.noreply.github.com> Date: Thu, 7 Mar 2024 15:06:12 +1100 Subject: [PATCH 01/41] Update Models.svelte --- src/lib/components/chat/Settings/Models.svelte | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib/components/chat/Settings/Models.svelte b/src/lib/components/chat/Settings/Models.svelte index 391dc008..f669a1fe 100644 --- a/src/lib/components/chat/Settings/Models.svelte +++ b/src/lib/components/chat/Settings/Models.svelte @@ -589,7 +589,7 @@ on:change={() => { console.log(modelInputFile); }} - accept=".gguf" + accept=".gguf",".safetensors" required hidden /> From a901031896aa7085d03064ba6ee046d695f03fcb Mon Sep 17 00:00:00 2001 From: "Timothy J. Baek" Date: Thu, 7 Mar 2024 02:17:57 -0800 Subject: [PATCH 02/41] fix: accept attribute --- src/lib/components/chat/Settings/Models.svelte | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib/components/chat/Settings/Models.svelte b/src/lib/components/chat/Settings/Models.svelte index f669a1fe..bdd2cc77 100644 --- a/src/lib/components/chat/Settings/Models.svelte +++ b/src/lib/components/chat/Settings/Models.svelte @@ -589,7 +589,7 @@ on:change={() => { console.log(modelInputFile); }} - accept=".gguf",".safetensors" + accept=".gguf,.safetensors" required hidden /> From bbf9deabf017494845ebfd62fb01315e6ad35d18 Mon Sep 17 00:00:00 2001 From: Jannik S <69747628+jannikstdl@users.noreply.github.com> Date: Thu, 7 Mar 2024 11:41:23 +0100 Subject: [PATCH 03/41] fix: show lates changes in releases fix for ##1048 --- .github/workflows/build-release.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/build-release.yml b/.github/workflows/build-release.yml index e45a8316..259f0c5f 100644 --- a/.github/workflows/build-release.yml +++ b/.github/workflows/build-release.yml @@ -29,11 +29,11 @@ jobs: - name: Extract latest CHANGELOG entry id: changelog run: | - CHANGELOG_CONTENT=$(awk '/^## \[/{n++} n==1' CHANGELOG.md) - echo "CHANGELOG_CONTENT< Date: Thu, 7 Mar 2024 15:13:51 +0100 Subject: [PATCH 04/41] Update Dockerfile Tiny typo. --- Dockerfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index 99fd0e61..de501838 100644 --- a/Dockerfile +++ b/Dockerfile @@ -41,7 +41,7 @@ ENV WHISPER_MODEL_DIR="/app/backend/data/cache/whisper/models" # for better persormance and multilangauge support use "intfloat/multilingual-e5-large" (~2.5GB) or "intfloat/multilingual-e5-base" (~1.5GB) # IMPORTANT: If you change the default model (all-MiniLM-L6-v2) and vice versa, you aren't able to use RAG Chat with your previous documents loaded in the WebUI! You need to re-embed them. ENV RAG_EMBEDDING_MODEL="all-MiniLM-L6-v2" -# device type for whisper tts and ebbeding models - "cpu" (default), "cuda" (nvidia gpu and CUDA required) or "mps" (apple silicon) - choosing this right can lead to better performance +# device type for whisper tts and embbeding models - "cpu" (default), "cuda" (nvidia gpu and CUDA required) or "mps" (apple silicon) - choosing this right can lead to better performance ENV RAG_EMBEDDING_MODEL_DEVICE_TYPE="cpu" ENV RAG_EMBEDDING_MODEL_DIR="/app/backend/data/cache/embedding/models" ENV SENTENCE_TRANSFORMERS_HOME $RAG_EMBEDDING_MODEL_DIR @@ -81,4 +81,4 @@ COPY --from=build /app/package.json /app/package.json # copy backend files COPY ./backend . -CMD [ "bash", "start.sh"] \ No newline at end of file +CMD [ "bash", "start.sh"] From 2b84af878a2bd0deab5423761a48705dcd8cb984 Mon Sep 17 00:00:00 2001 From: "Timothy J. Baek" Date: Fri, 8 Mar 2024 13:33:56 -0800 Subject: [PATCH 05/41] refac: litellm --- backend/apps/litellm/main.py | 41 ++++++++++++++++++++++++++++++++++++ backend/main.py | 39 ++-------------------------------- 2 files changed, 43 insertions(+), 37 deletions(-) create mode 100644 backend/apps/litellm/main.py diff --git a/backend/apps/litellm/main.py b/backend/apps/litellm/main.py new file mode 100644 index 00000000..21b9e58a --- /dev/null +++ b/backend/apps/litellm/main.py @@ -0,0 +1,41 @@ +from litellm.proxy.proxy_server import ProxyConfig, initialize +from litellm.proxy.proxy_server import app + +from fastapi import FastAPI, Request, Depends, status +from fastapi.responses import JSONResponse +from utils.utils import get_http_authorization_cred, get_current_user +from config import ENV + +proxy_config = ProxyConfig() + + +async def config(): + router, model_list, general_settings = await proxy_config.load_config( + router=None, config_file_path="./data/litellm/config.yaml" + ) + + await initialize(config="./data/litellm/config.yaml", telemetry=False) + + +async def startup(): + await config() + + +@app.on_event("startup") +async def on_startup(): + await startup() + + +@app.middleware("http") +async def auth_middleware(request: Request, call_next): + auth_header = request.headers.get("Authorization", "") + + if ENV != "dev": + try: + user = get_current_user(get_http_authorization_cred(auth_header)) + print(user) + except Exception as e: + return JSONResponse(status_code=400, content={"detail": str(e)}) + + response = await call_next(request) + return response diff --git a/backend/main.py b/backend/main.py index 5f6b4441..9e04ee48 100644 --- a/backend/main.py +++ b/backend/main.py @@ -9,17 +9,14 @@ import requests from fastapi import FastAPI, Request, Depends, status from fastapi.staticfiles import StaticFiles from fastapi import HTTPException -from fastapi.responses import JSONResponse from fastapi.middleware.wsgi import WSGIMiddleware from fastapi.middleware.cors import CORSMiddleware from starlette.exceptions import HTTPException as StarletteHTTPException -from litellm.proxy.proxy_server import ProxyConfig, initialize -from litellm.proxy.proxy_server import app as litellm_app - from apps.ollama.main import app as ollama_app from apps.openai.main import app as openai_app +from apps.litellm.main import app as litellm_app, startup as litellm_app_startup from apps.audio.main import app as audio_app from apps.images.main import app as images_app from apps.rag.main import app as rag_app @@ -29,8 +26,6 @@ from apps.web.main import app as webui_app from config import WEBUI_NAME, ENV, VERSION, CHANGELOG, FRONTEND_BUILD_DIR from constants import ERROR_MESSAGES -from utils.utils import get_http_authorization_cred, get_current_user - class SPAStaticFiles(StaticFiles): async def get_response(self, path: str, scope): @@ -43,21 +38,6 @@ class SPAStaticFiles(StaticFiles): raise ex -proxy_config = ProxyConfig() - - -async def config(): - router, model_list, general_settings = await proxy_config.load_config( - router=None, config_file_path="./data/litellm/config.yaml" - ) - - await initialize(config="./data/litellm/config.yaml", telemetry=False) - - -async def startup(): - await config() - - app = FastAPI(docs_url="/docs" if ENV == "dev" else None, redoc_url=None) origins = ["*"] @@ -73,7 +53,7 @@ app.add_middleware( @app.on_event("startup") async def on_startup(): - await startup() + await litellm_app_startup() @app.middleware("http") @@ -86,21 +66,6 @@ async def check_url(request: Request, call_next): return response -@litellm_app.middleware("http") -async def auth_middleware(request: Request, call_next): - auth_header = request.headers.get("Authorization", "") - - if ENV != "dev": - try: - user = get_current_user(get_http_authorization_cred(auth_header)) - print(user) - except Exception as e: - return JSONResponse(status_code=400, content={"detail": str(e)}) - - response = await call_next(request) - return response - - app.mount("/api/v1", webui_app) app.mount("/litellm/api", litellm_app) From 9a83bde7e5d5745d3001542a04d8456088fc4986 Mon Sep 17 00:00:00 2001 From: "Timothy J. Baek" Date: Fri, 8 Mar 2024 13:41:38 -0800 Subject: [PATCH 06/41] feat: max token option for litellm models --- src/lib/apis/litellm/index.ts | 4 +++- .../components/chat/Settings/Models.svelte | 20 ++++++++++++++++++- 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/src/lib/apis/litellm/index.ts b/src/lib/apis/litellm/index.ts index 6466ee35..302e9c4a 100644 --- a/src/lib/apis/litellm/index.ts +++ b/src/lib/apis/litellm/index.ts @@ -77,6 +77,7 @@ type AddLiteLLMModelForm = { api_base: string; api_key: string; rpm: string; + max_tokens: string; }; export const addLiteLLMModel = async (token: string = '', payload: AddLiteLLMModelForm) => { @@ -95,7 +96,8 @@ export const addLiteLLMModel = async (token: string = '', payload: AddLiteLLMMod model: payload.model, ...(payload.api_base === '' ? {} : { api_base: payload.api_base }), ...(payload.api_key === '' ? {} : { api_key: payload.api_key }), - ...(isNaN(parseInt(payload.rpm)) ? {} : { rpm: parseInt(payload.rpm) }) + ...(isNaN(parseInt(payload.rpm)) ? {} : { rpm: parseInt(payload.rpm) }), + ...(payload.max_tokens === '' ? {} : { max_tokens: payload.max_tokens }) } }) }) diff --git a/src/lib/components/chat/Settings/Models.svelte b/src/lib/components/chat/Settings/Models.svelte index bdd2cc77..9a079972 100644 --- a/src/lib/components/chat/Settings/Models.svelte +++ b/src/lib/components/chat/Settings/Models.svelte @@ -27,6 +27,7 @@ let liteLLMAPIBase = ''; let liteLLMAPIKey = ''; let liteLLMRPM = ''; + let liteLLMMaxTokens = ''; let deleteLiteLLMModelId = ''; @@ -326,7 +327,8 @@ model: liteLLMModel, api_base: liteLLMAPIBase, api_key: liteLLMAPIKey, - rpm: liteLLMRPM + rpm: liteLLMRPM, + max_tokens: liteLLMMaxTokens }).catch((error) => { toast.error(error); return null; @@ -821,6 +823,22 @@ + +
+
Max Tokens
+
+
+ +
+
+
{/if} From 9881022b11a6a8cf650e2afcafd716230a1d976b Mon Sep 17 00:00:00 2001 From: "Timothy J. Baek" Date: Fri, 8 Mar 2024 13:45:21 -0800 Subject: [PATCH 07/41] fix --- src/lib/components/chat/Settings/Models.svelte | 1 + 1 file changed, 1 insertion(+) diff --git a/src/lib/components/chat/Settings/Models.svelte b/src/lib/components/chat/Settings/Models.svelte index 9a079972..316deb0f 100644 --- a/src/lib/components/chat/Settings/Models.svelte +++ b/src/lib/components/chat/Settings/Models.svelte @@ -348,6 +348,7 @@ liteLLMAPIBase = ''; liteLLMAPIKey = ''; liteLLMRPM = ''; + liteLLMMaxTokens = ''; liteLLMModelInfo = await getLiteLLMModelInfo(localStorage.token); models.set(await getModels()); From 404aa62a8ae841e2252906df76560d3ce2d34b0e Mon Sep 17 00:00:00 2001 From: "Timothy J. Baek" Date: Fri, 8 Mar 2024 13:58:56 -0800 Subject: [PATCH 08/41] fix: chat list issues --- src/lib/components/chat/Settings/General.svelte | 2 +- src/lib/components/layout/Sidebar.svelte | 15 ++++++++++----- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/src/lib/components/chat/Settings/General.svelte b/src/lib/components/chat/Settings/General.svelte index 567c67a8..8825b790 100644 --- a/src/lib/components/chat/Settings/General.svelte +++ b/src/lib/components/chat/Settings/General.svelte @@ -176,7 +176,7 @@
System Prompt