diff --git a/README.md b/README.md index a1d089e7..466e3af6 100644 --- a/README.md +++ b/README.md @@ -219,9 +219,48 @@ docker rm -f open-webui docker pull ghcr.io/open-webui/open-webui:main [insert command you used to install] ``` - In the last line, you need to use the very same command you used to install (local install, remote server, etc.) +### Moving from Ollama WebUI to Open WebUI + +Given recent name changes, the docker image has been renamed. Additional steps are required to update for those people that used Ollama WebUI previously and want to start using the new images. + +#### Updating to Open WebUI without keeping your data + +If you want to update to the new image but don't want to keep any previous data like conversations, prompts, documents, etc. you can perform the following steps: + +```bash +docker rm -f ollama-webui +docker pull ghcr.io/open-webui/open-webui:main +[insert the equivalent command that you used to install with the new Docker image name] +docker volume rm ollama-webui +``` + +For example, for local installation it would be `docker run -d -p 3000:8080 --add-host=host.docker.internal:host-gateway -v open-webui:/app/backend/data --name open-webui --restart always ghcr.io/open-webui/open-webui:main`. For other installation commands, check the relevant parts of this README document. + +#### Migrating your contents from Ollama WebUI to Open WebUI + +If you want to update to the new image migrating all your previous settings like conversations, prompts, documents, etc. you can perform the following steps: + +```bash +docker rm -f ollama-webui +docker pull ghcr.io/open-webui/open-webui:main +# Creates a new volume and uses a temporary container to copy from one volume to another as per https://github.com/moby/moby/issues/31154#issuecomment-360531460 +docker volume create --name open-webui +docker run --rm -v ollama-webui:/from -v open-webui:/to alpine ash -c "cd /from ; cp -av . /to" +[insert the equivalent command that you used to install with the new Docker image name] +``` + +Once you verify that all the data has been migrated you can erase the old volumen using the following command: + +```bash +docker volume rm ollama-webui +``` + + + + + ## How to Install Without Docker While we strongly recommend using our convenient Docker container installation for optimal support, we understand that some situations may require a non-Docker setup, especially for development purposes. Please note that non-Docker installations are not officially supported, and you might need to troubleshoot on your own. diff --git a/backend/apps/rag/main.py b/backend/apps/rag/main.py index 07a30ade..6d06456f 100644 --- a/backend/apps/rag/main.py +++ b/backend/apps/rag/main.py @@ -10,6 +10,8 @@ from fastapi import ( ) from fastapi.middleware.cors import CORSMiddleware import os, shutil + +from pathlib import Path from typing import List # from chromadb.utils import embedding_functions @@ -28,19 +30,40 @@ from langchain_community.document_loaders import ( UnstructuredExcelLoader, ) from langchain.text_splitter import RecursiveCharacterTextSplitter -from langchain_community.vectorstores import Chroma from langchain.chains import RetrievalQA +from langchain_community.vectorstores import Chroma from pydantic import BaseModel from typing import Optional - +import mimetypes import uuid +import json import time -from utils.misc import calculate_sha256, calculate_sha256_string + +from apps.web.models.documents import ( + Documents, + DocumentForm, + DocumentResponse, +) + +from utils.misc import ( + calculate_sha256, + calculate_sha256_string, + sanitize_filename, + extract_folders_after_data_docs, +) from utils.utils import get_current_user, get_admin_user -from config import UPLOAD_DIR, EMBED_MODEL, CHROMA_CLIENT, CHUNK_SIZE, CHUNK_OVERLAP +from config import ( + UPLOAD_DIR, + DOCS_DIR, + EMBED_MODEL, + CHROMA_CLIENT, + CHUNK_SIZE, + CHUNK_OVERLAP, + RAG_TEMPLATE, +) from constants import ERROR_MESSAGES # EMBEDDING_FUNC = embedding_functions.SentenceTransformerEmbeddingFunction( @@ -49,6 +72,11 @@ from constants import ERROR_MESSAGES app = FastAPI() +app.state.CHUNK_SIZE = CHUNK_SIZE +app.state.CHUNK_OVERLAP = CHUNK_OVERLAP +app.state.RAG_TEMPLATE = RAG_TEMPLATE + + origins = ["*"] app.add_middleware( @@ -70,7 +98,7 @@ class StoreWebForm(CollectionNameForm): def store_data_in_vector_db(data, collection_name) -> bool: text_splitter = RecursiveCharacterTextSplitter( - chunk_size=CHUNK_SIZE, chunk_overlap=CHUNK_OVERLAP + chunk_size=app.state.CHUNK_SIZE, chunk_overlap=app.state.CHUNK_OVERLAP ) docs = text_splitter.split_documents(data) @@ -94,7 +122,60 @@ def store_data_in_vector_db(data, collection_name) -> bool: @app.get("/") async def get_status(): - return {"status": True} + return { + "status": True, + "chunk_size": app.state.CHUNK_SIZE, + "chunk_overlap": app.state.CHUNK_OVERLAP, + } + + +@app.get("/chunk") +async def get_chunk_params(user=Depends(get_admin_user)): + return { + "status": True, + "chunk_size": app.state.CHUNK_SIZE, + "chunk_overlap": app.state.CHUNK_OVERLAP, + } + + +class ChunkParamUpdateForm(BaseModel): + chunk_size: int + chunk_overlap: int + + +@app.post("/chunk/update") +async def update_chunk_params( + form_data: ChunkParamUpdateForm, user=Depends(get_admin_user) +): + app.state.CHUNK_SIZE = form_data.chunk_size + app.state.CHUNK_OVERLAP = form_data.chunk_overlap + + return { + "status": True, + "chunk_size": app.state.CHUNK_SIZE, + "chunk_overlap": app.state.CHUNK_OVERLAP, + } + + +@app.get("/template") +async def get_rag_template(user=Depends(get_current_user)): + return { + "status": True, + "template": app.state.RAG_TEMPLATE, + } + + +class RAGTemplateForm(BaseModel): + template: str + + +@app.post("/template/update") +async def update_rag_template(form_data: RAGTemplateForm, user=Depends(get_admin_user)): + # TODO: check template requirements + app.state.RAG_TEMPLATE = ( + form_data.template if form_data.template != "" else RAG_TEMPLATE + ) + return {"status": True, "template": app.state.RAG_TEMPLATE} class QueryDocForm(BaseModel): @@ -220,8 +301,8 @@ def store_web(form_data: StoreWebForm, user=Depends(get_current_user)): ) -def get_loader(file, file_path): - file_ext = file.filename.split(".")[-1].lower() +def get_loader(filename: str, file_content_type: str, file_path: str): + file_ext = filename.split(".")[-1].lower() known_type = True known_source_ext = [ @@ -279,20 +360,20 @@ def get_loader(file, file_path): loader = UnstructuredXMLLoader(file_path) elif file_ext == "md": loader = UnstructuredMarkdownLoader(file_path) - elif file.content_type == "application/epub+zip": + elif file_content_type == "application/epub+zip": loader = UnstructuredEPubLoader(file_path) elif ( - file.content_type + file_content_type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document" or file_ext in ["doc", "docx"] ): loader = Docx2txtLoader(file_path) - elif file.content_type in [ + elif file_content_type in [ "application/vnd.ms-excel", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", ] or file_ext in ["xls", "xlsx"]: loader = UnstructuredExcelLoader(file_path) - elif file_ext in known_source_ext or file.content_type.find("text/") >= 0: + elif file_ext in known_source_ext or file_content_type.find("text/") >= 0: loader = TextLoader(file_path) else: loader = TextLoader(file_path) @@ -323,7 +404,7 @@ def store_doc( collection_name = calculate_sha256(f)[:63] f.close() - loader, known_type = get_loader(file, file_path) + loader, known_type = get_loader(file.filename, file.content_type, file_path) data = loader.load() result = store_data_in_vector_db(data, collection_name) @@ -353,6 +434,63 @@ def store_doc( ) +@app.get("/scan") +def scan_docs_dir(user=Depends(get_admin_user)): + try: + for path in Path(DOCS_DIR).rglob("./**/*"): + if path.is_file() and not path.name.startswith("."): + tags = extract_folders_after_data_docs(path) + filename = path.name + file_content_type = mimetypes.guess_type(path) + + f = open(path, "rb") + collection_name = calculate_sha256(f)[:63] + f.close() + + loader, known_type = get_loader( + filename, file_content_type[0], str(path) + ) + data = loader.load() + + result = store_data_in_vector_db(data, collection_name) + + if result: + sanitized_filename = sanitize_filename(filename) + doc = Documents.get_doc_by_name(sanitized_filename) + + if doc == None: + doc = Documents.insert_new_doc( + user.id, + DocumentForm( + **{ + "name": sanitized_filename, + "title": filename, + "collection_name": collection_name, + "filename": filename, + "content": ( + json.dumps( + { + "tags": list( + map( + lambda name: {"name": name}, + tags, + ) + ) + } + ) + if len(tags) + else "{}" + ), + } + ), + ) + + except Exception as e: + print(e) + + return True + + @app.get("/reset/db") def reset_vector_db(user=Depends(get_admin_user)): CHROMA_CLIENT.reset() diff --git a/backend/apps/web/routers/documents.py b/backend/apps/web/routers/documents.py index 5bc473fa..7c69514f 100644 --- a/backend/apps/web/routers/documents.py +++ b/backend/apps/web/routers/documents.py @@ -96,6 +96,10 @@ async def get_doc_by_name(name: str, user=Depends(get_current_user)): ############################ +class TagItem(BaseModel): + name: str + + class TagDocumentForm(BaseModel): name: str tags: List[dict] diff --git a/backend/config.py b/backend/config.py index d7c89b3b..440256c4 100644 --- a/backend/config.py +++ b/backend/config.py @@ -43,6 +43,14 @@ Path(UPLOAD_DIR).mkdir(parents=True, exist_ok=True) CACHE_DIR = f"{DATA_DIR}/cache" Path(CACHE_DIR).mkdir(parents=True, exist_ok=True) + +#################################### +# Docs DIR +#################################### + +DOCS_DIR = f"{DATA_DIR}/docs" +Path(DOCS_DIR).mkdir(parents=True, exist_ok=True) + #################################### # OLLAMA_API_BASE_URL #################################### @@ -136,6 +144,21 @@ CHROMA_CLIENT = chromadb.PersistentClient( CHUNK_SIZE = 1500 CHUNK_OVERLAP = 100 + +RAG_TEMPLATE = """Use the following context as your learned knowledge, inside XML tags. + + [context] + + +When answer to user: +- If you don't know, just say that you don't know. +- If you don't know when you are not sure, ask for clarification. +Avoid mentioning that you obtained the information from the context. +And answer according to the language of the user's question. + +Given the context information, answer the query. +Query: [query]""" + #################################### # Transcribe #################################### diff --git a/backend/start_windows.bat b/backend/start_windows.bat new file mode 100644 index 00000000..b2c37017 --- /dev/null +++ b/backend/start_windows.bat @@ -0,0 +1,32 @@ +:: This method is not recommended, and we recommend you use the `start.sh` file with WSL instead. +@echo off +SETLOCAL ENABLEDELAYEDEXPANSION + +:: Get the directory of the current script +SET "SCRIPT_DIR=%~dp0" +cd /d "%SCRIPT_DIR%" || exit /b + +SET "KEY_FILE=.webui_secret_key" +SET "PORT=%PORT:8080%" +SET "WEBUI_SECRET_KEY=%WEBUI_SECRET_KEY%" +SET "WEBUI_JWT_SECRET_KEY=%WEBUI_JWT_SECRET_KEY%" + +:: Check if WEBUI_SECRET_KEY and WEBUI_JWT_SECRET_KEY are not set +IF "%WEBUI_SECRET_KEY%%WEBUI_JWT_SECRET_KEY%" == " " ( + echo No WEBUI_SECRET_KEY provided + + IF NOT EXIST "%KEY_FILE%" ( + echo Generating WEBUI_SECRET_KEY + :: Generate a random value to use as a WEBUI_SECRET_KEY in case the user didn't provide one + SET /p WEBUI_SECRET_KEY=>%KEY_FILE% + echo WEBUI_SECRET_KEY generated + ) + + echo Loading WEBUI_SECRET_KEY from %KEY_FILE% + SET /p WEBUI_SECRET_KEY=<%KEY_FILE% +) + +:: Execute uvicorn +SET "WEBUI_SECRET_KEY=%WEBUI_SECRET_KEY%" +uvicorn main:app --host 0.0.0.0 --port "%PORT%" --forwarded-allow-ips '*' diff --git a/backend/utils/misc.py b/backend/utils/misc.py index 385a2c41..5e9d5876 100644 --- a/backend/utils/misc.py +++ b/backend/utils/misc.py @@ -1,3 +1,4 @@ +from pathlib import Path import hashlib import re @@ -38,3 +39,40 @@ def validate_email_format(email: str) -> bool: if not re.match(r"[^@]+@[^@]+\.[^@]+", email): return False return True + + +def sanitize_filename(file_name): + # Convert to lowercase + lower_case_file_name = file_name.lower() + + # Remove special characters using regular expression + sanitized_file_name = re.sub(r"[^\w\s]", "", lower_case_file_name) + + # Replace spaces with dashes + final_file_name = re.sub(r"\s+", "-", sanitized_file_name) + + return final_file_name + + +def extract_folders_after_data_docs(path): + # Convert the path to a Path object if it's not already + path = Path(path) + + # Extract parts of the path + parts = path.parts + + # Find the index of '/data/docs' in the path + try: + index_data_docs = parts.index("data") + 1 + index_docs = parts.index("docs", index_data_docs) + 1 + except ValueError: + return [] + + # Exclude the filename and accumulate folder names + tags = [] + + folders = parts[index_docs:-1] + for idx, part in enumerate(folders): + tags.append("/".join(folders[: idx + 1])) + + return tags diff --git a/kubernetes/helm/values.yaml b/kubernetes/helm/values.yaml index 997dbba1..63781f6c 100644 --- a/kubernetes/helm/values.yaml +++ b/kubernetes/helm/values.yaml @@ -5,11 +5,14 @@ ollama: image: ollama/ollama:latest servicePort: 11434 resources: - limits: + requests: cpu: "2000m" memory: "2Gi" + limits: + cpu: "4000m" + memory: "4Gi" nvidia.com/gpu: "0" - volumeSize: 1Gi + volumeSize: 30Gi nodeSelector: {} tolerations: [] service: @@ -22,16 +25,19 @@ webui: image: ghcr.io/open-webui/open-webui:main servicePort: 8080 resources: - limits: + requests: cpu: "500m" memory: "500Mi" + limits: + cpu: "1000m" + memory: "1Gi" ingress: enabled: true annotations: # Use appropriate annotations for your Ingress controller, e.g., for NGINX: # nginx.ingress.kubernetes.io/rewrite-target: / host: open-webui.minikube.local - volumeSize: 1Gi + volumeSize: 2Gi nodeSelector: {} tolerations: [] service: diff --git a/kubernetes/manifest/base/ollama-statefulset.yaml b/kubernetes/manifest/base/ollama-statefulset.yaml index 125e0c62..cd1144ca 100644 --- a/kubernetes/manifest/base/ollama-statefulset.yaml +++ b/kubernetes/manifest/base/ollama-statefulset.yaml @@ -20,9 +20,13 @@ spec: ports: - containerPort: 11434 resources: - limits: + requests: cpu: "2000m" memory: "2Gi" + limits: + cpu: "4000m" + memory: "4Gi" + nvidia.com/gpu: "0" volumeMounts: - name: ollama-volume mountPath: /root/.ollama @@ -34,4 +38,4 @@ spec: accessModes: [ "ReadWriteOnce" ] resources: requests: - storage: 1Gi \ No newline at end of file + storage: 30Gi \ No newline at end of file diff --git a/kubernetes/manifest/base/webui-deployment.yaml b/kubernetes/manifest/base/webui-deployment.yaml index bd2c2273..174025a9 100644 --- a/kubernetes/manifest/base/webui-deployment.yaml +++ b/kubernetes/manifest/base/webui-deployment.yaml @@ -19,10 +19,20 @@ spec: ports: - containerPort: 8080 resources: - limits: + requests: cpu: "500m" memory: "500Mi" + limits: + cpu: "1000m" + memory: "1Gi" env: - name: OLLAMA_API_BASE_URL value: "http://ollama-service.open-webui.svc.cluster.local:11434/api" - tty: true \ No newline at end of file + tty: true + volumeMounts: + - name: webui-volume + mountPath: /app/backend/data + volumes: + - name: webui-volume + persistentVolumeClaim: + claimName: ollama-webui-pvc \ No newline at end of file diff --git a/kubernetes/manifest/base/webui-pvc.yaml b/kubernetes/manifest/base/webui-pvc.yaml new file mode 100644 index 00000000..285dfeef --- /dev/null +++ b/kubernetes/manifest/base/webui-pvc.yaml @@ -0,0 +1,12 @@ +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + labels: + app: ollama-webui + name: ollama-webui-pvc + namespace: ollama-namespace +spec: + accessModes: ["ReadWriteOnce"] + resources: + requests: + storage: 2Gi \ No newline at end of file diff --git a/src/lib/apis/rag/index.ts b/src/lib/apis/rag/index.ts index 3f4f30bf..ed36f014 100644 --- a/src/lib/apis/rag/index.ts +++ b/src/lib/apis/rag/index.ts @@ -1,5 +1,120 @@ import { RAG_API_BASE_URL } from '$lib/constants'; +export const getChunkParams = async (token: string) => { + let error = null; + + const res = await fetch(`${RAG_API_BASE_URL}/chunk`, { + method: 'GET', + headers: { + 'Content-Type': 'application/json', + Authorization: `Bearer ${token}` + } + }) + .then(async (res) => { + if (!res.ok) throw await res.json(); + return res.json(); + }) + .catch((err) => { + console.log(err); + error = err.detail; + return null; + }); + + if (error) { + throw error; + } + + return res; +}; + +export const updateChunkParams = async (token: string, size: number, overlap: number) => { + let error = null; + + const res = await fetch(`${RAG_API_BASE_URL}/chunk/update`, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + Authorization: `Bearer ${token}` + }, + body: JSON.stringify({ + chunk_size: size, + chunk_overlap: overlap + }) + }) + .then(async (res) => { + if (!res.ok) throw await res.json(); + return res.json(); + }) + .catch((err) => { + console.log(err); + error = err.detail; + return null; + }); + + if (error) { + throw error; + } + + return res; +}; + +export const getRAGTemplate = async (token: string) => { + let error = null; + + const res = await fetch(`${RAG_API_BASE_URL}/template`, { + method: 'GET', + headers: { + 'Content-Type': 'application/json', + Authorization: `Bearer ${token}` + } + }) + .then(async (res) => { + if (!res.ok) throw await res.json(); + return res.json(); + }) + .catch((err) => { + console.log(err); + error = err.detail; + return null; + }); + + if (error) { + throw error; + } + + return res?.template ?? ''; +}; + +export const updateRAGTemplate = async (token: string, template: string) => { + let error = null; + + const res = await fetch(`${RAG_API_BASE_URL}/template/update`, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + Authorization: `Bearer ${token}` + }, + body: JSON.stringify({ + template: template + }) + }) + .then(async (res) => { + if (!res.ok) throw await res.json(); + return res.json(); + }) + .catch((err) => { + console.log(err); + error = err.detail; + return null; + }); + + if (error) { + throw error; + } + + return res; +}; + export const uploadDocToVectorDB = async (token: string, collection_name: string, file: File) => { const data = new FormData(); data.append('file', file); @@ -138,6 +253,32 @@ export const queryCollection = async ( return res; }; +export const scanDocs = async (token: string) => { + let error = null; + + const res = await fetch(`${RAG_API_BASE_URL}/scan`, { + method: 'GET', + headers: { + Accept: 'application/json', + authorization: `Bearer ${token}` + } + }) + .then(async (res) => { + if (!res.ok) throw await res.json(); + return res.json(); + }) + .catch((err) => { + error = err.detail; + return null; + }); + + if (error) { + throw error; + } + + return res; +}; + export const resetVectorDB = async (token: string) => { let error = null; diff --git a/src/lib/components/chat/MessageInput/PromptCommands.svelte b/src/lib/components/chat/MessageInput/PromptCommands.svelte index 83e38561..04fb4c92 100644 --- a/src/lib/components/chat/MessageInput/PromptCommands.svelte +++ b/src/lib/components/chat/MessageInput/PromptCommands.svelte @@ -2,6 +2,7 @@ import { prompts } from '$lib/stores'; import { findWordIndices } from '$lib/utils'; import { tick } from 'svelte'; + import toast from 'svelte-french-toast'; export let prompt = ''; let selectedCommandIdx = 0; @@ -24,7 +25,18 @@ }; const confirmCommand = async (command) => { - prompt = command.content; + let text = command.content; + + if (command.content.includes('{{CLIPBOARD}}')) { + const clipboardText = await navigator.clipboard.readText().catch((err) => { + toast.error('Failed to read clipboard contents'); + return '{{CLIPBOARD}}'; + }); + + text = command.content.replaceAll('{{CLIPBOARD}}', clipboardText); + } + + prompt = text; const chatInputElement = document.getElementById('chat-textarea'); diff --git a/src/lib/components/chat/Messages/ResponseMessage.svelte b/src/lib/components/chat/Messages/ResponseMessage.svelte index a0ffc83c..1360a5c9 100644 --- a/src/lib/components/chat/Messages/ResponseMessage.svelte +++ b/src/lib/components/chat/Messages/ResponseMessage.svelte @@ -72,13 +72,18 @@ if (message.info) { tooltipInstance = tippy(`#info-${message.id}`, { - content: `token/s: ${ + content: `response_token/s: ${ `${ Math.round( ((message.info.eval_count ?? 0) / (message.info.eval_duration / 1000000000)) * 100 ) / 100 } tokens` ?? 'N/A' }
+ prompt_token/s: ${ + Math.round( + ((message.info.prompt_eval_count ?? 0) / (message.info.prompt_eval_duration / 1000000000)) * 100 + ) / 100 ?? 'N/A' + } tokens
total_duration: ${ Math.round(((message.info.total_duration ?? 0) / 1000000) * 100) / 100 ?? 'N/A' @@ -366,7 +371,7 @@ {#if message.done}
{#if siblings.length > 1}
diff --git a/src/lib/components/documents/Settings/General.svelte b/src/lib/components/documents/Settings/General.svelte new file mode 100644 index 00000000..503cbc84 --- /dev/null +++ b/src/lib/components/documents/Settings/General.svelte @@ -0,0 +1,178 @@ + + +
{ + submitHandler(); + saveHandler(); + }} +> +
+
+
General Settings
+ +
+
Scan for documents from '/data/docs'
+ + +
+
+ +
+ +
+
Chunk Params
+ +
+
+
Chunk Size
+ +
+ +
+
+ +
+
Chunk Overlap
+ +
+ +
+
+
+ +
+
RAG Template
+