diff --git a/.env.example b/.env.example index 3d2aafc0..05854cd0 100644 --- a/.env.example +++ b/.env.example @@ -9,4 +9,8 @@ OPENAI_API_KEY='' # DO NOT TRACK SCARF_NO_ANALYTICS=true -DO_NOT_TRACK=true \ No newline at end of file +DO_NOT_TRACK=true + +# Use locally bundled version of the LiteLLM cost map json +# to avoid repetitive startup connections +LITELLM_LOCAL_MODEL_COST_MAP="True" diff --git a/.github/workflows/build-release.yml b/.github/workflows/build-release.yml index 259f0c5f..036bb97a 100644 --- a/.github/workflows/build-release.yml +++ b/.github/workflows/build-release.yml @@ -57,3 +57,14 @@ jobs: path: . env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + + - name: Trigger Docker build workflow + uses: actions/github-script@v7 + with: + script: | + github.rest.actions.createWorkflowDispatch({ + owner: context.repo.owner, + repo: context.repo.repo, + workflow_id: 'docker-build.yaml', + ref: 'v${{ steps.get_version.outputs.version }}', + }) diff --git a/.github/workflows/docker-build.yaml b/.github/workflows/docker-build.yaml index bb71de8b..b3fa778a 100644 --- a/.github/workflows/docker-build.yaml +++ b/.github/workflows/docker-build.yaml @@ -1,8 +1,8 @@ -# -name: Create and publish a Docker image +name: Create and publish Docker images with specific build args # Configures this workflow to run every time a change is pushed to the branch called `release`. on: + workflow_dispatch: push: branches: - main @@ -23,7 +23,7 @@ jobs: permissions: contents: read packages: write - # + steps: - name: Checkout repository uses: actions/checkout@v4 @@ -41,12 +41,11 @@ jobs: username: ${{ github.actor }} password: ${{ secrets.GITHUB_TOKEN }} - - name: Extract metadata for Docker images - id: meta + - name: Extract metadata for Docker images (default latest tag) + id: meta-latest uses: docker/metadata-action@v5 with: images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} - # This configuration dynamically generates tags based on the branch, tag, commit, and custom suffix for lite version. tags: | type=ref,event=branch type=ref,event=tag @@ -56,11 +55,29 @@ jobs: flavor: | latest=${{ github.ref == 'refs/heads/main' }} - - name: Build and push Docker image + - name: Build and push Docker image (latest) uses: docker/build-push-action@v5 with: context: . push: true platforms: linux/amd64,linux/arm64 - tags: ${{ steps.meta.outputs.tags }} - labels: ${{ steps.meta.outputs.labels }} + tags: ${{ steps.meta-latest.outputs.tags }} + labels: ${{ steps.meta-latest.outputs.labels }} + + - name: Build and push Docker image with CUDA + uses: docker/build-push-action@v5 + with: + context: . + push: true + platforms: linux/amd64,linux/arm64 + tags: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:cuda + build-args: USE_CUDA=true + + - name: Build and push Docker image with Ollama + uses: docker/build-push-action@v5 + with: + context: . + push: true + platforms: linux/amd64,linux/arm64 + tags: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:ollama + build-args: USE_OLLAMA=true diff --git a/Dockerfile b/Dockerfile index 5f0c13cb..4a23d54f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,78 +1,116 @@ # syntax=docker/dockerfile:1 +# Initialize device type args +# use build args in the docker build commmand with --build-arg="BUILDARG=true" +ARG USE_CUDA=false +ARG USE_OLLAMA=false +# Tested with cu117 for CUDA 11 and cu121 for CUDA 12 (default) +ARG USE_CUDA_VER=cu121 +# any sentence transformer model; models to use can be found at https://huggingface.co/models?library=sentence-transformers +# Leaderboard: https://huggingface.co/spaces/mteb/leaderboard +# for better performance and multilangauge support use "intfloat/multilingual-e5-large" (~2.5GB) or "intfloat/multilingual-e5-base" (~1.5GB) +# IMPORTANT: If you change the default model (all-MiniLM-L6-v2) and vice versa, you aren't able to use RAG Chat with your previous documents loaded in the WebUI! You need to re-embed them. +ARG USE_EMBEDDING_MODEL=all-MiniLM-L6-v2 -FROM node:alpine as build +######## WebUI frontend ######## +FROM node:21-alpine3.19 as build WORKDIR /app -# wget embedding model weight from alpine (does not exist from slim-buster) -RUN wget "https://chroma-onnx-models.s3.amazonaws.com/all-MiniLM-L6-v2/onnx.tar.gz" -O - | \ - tar -xzf - -C /app - COPY package.json package-lock.json ./ RUN npm ci COPY . . RUN npm run build - +######## WebUI backend ######## FROM python:3.11-slim-bookworm as base -ENV ENV=prod -ENV PORT "" +# Use args +ARG USE_CUDA +ARG USE_OLLAMA +ARG USE_CUDA_VER +ARG USE_EMBEDDING_MODEL -ENV OLLAMA_BASE_URL "/ollama" +## Basis ## +ENV ENV=prod \ + PORT=8080 \ + # pass build args to the build + USE_OLLAMA_DOCKER=${USE_OLLAMA} \ + USE_CUDA_DOCKER=${USE_CUDA} \ + USE_CUDA_DOCKER_VER=${USE_CUDA_VER} \ + USE_EMBEDDING_MODEL_DOCKER=${USE_EMBEDDING_MODEL} -ENV OPENAI_API_BASE_URL "" -ENV OPENAI_API_KEY "" +## Basis URL Config ## +ENV OLLAMA_BASE_URL="/ollama" \ + OPENAI_API_BASE_URL="" -ENV WEBUI_SECRET_KEY "" -ENV WEBUI_AUTH_TRUSTED_EMAIL_HEADER "" +## API Key and Security Config ## +ENV OPENAI_API_KEY="" \ + WEBUI_SECRET_KEY="" \ + SCARF_NO_ANALYTICS=true \ + DO_NOT_TRACK=true -ENV SCARF_NO_ANALYTICS true -ENV DO_NOT_TRACK true +# Use locally bundled version of the LiteLLM cost map json +# to avoid repetitive startup connections +ENV LITELLM_LOCAL_MODEL_COST_MAP="True" -######## Preloaded models ######## -# whisper TTS Settings -ENV WHISPER_MODEL="base" -ENV WHISPER_MODEL_DIR="/app/backend/data/cache/whisper/models" -# RAG Embedding Model Settings -# any sentence transformer model; models to use can be found at https://huggingface.co/models?library=sentence-transformers -# Leaderboard: https://huggingface.co/spaces/mteb/leaderboard -# for better persormance and multilangauge support use "intfloat/multilingual-e5-large" (~2.5GB) or "intfloat/multilingual-e5-base" (~1.5GB) -# IMPORTANT: If you change the default model (all-MiniLM-L6-v2) and vice versa, you aren't able to use RAG Chat with your previous documents loaded in the WebUI! You need to re-embed them. -ENV RAG_EMBEDDING_MODEL="all-MiniLM-L6-v2" -# device type for whisper tts and embbeding models - "cpu" (default), "cuda" (nvidia gpu and CUDA required) or "mps" (apple silicon) - choosing this right can lead to better performance -ENV RAG_EMBEDDING_MODEL_DEVICE_TYPE="cpu" -ENV RAG_EMBEDDING_MODEL_DIR="/app/backend/data/cache/embedding/models" -ENV SENTENCE_TRANSFORMERS_HOME $RAG_EMBEDDING_MODEL_DIR +#### Other models ######################################################### +## whisper TTS model settings ## +ENV WHISPER_MODEL="base" \ + WHISPER_MODEL_DIR="/app/backend/data/cache/whisper/models" -######## Preloaded models ######## +## RAG Embedding model settings ## +ENV RAG_EMBEDDING_MODEL="$USE_EMBEDDING_MODEL_DOCKER" \ + RAG_EMBEDDING_MODEL_DIR="/app/backend/data/cache/embedding/models" \ + SENTENCE_TRANSFORMERS_HOME="/app/backend/data/cache/embedding/models" +#### Other models ########################################################## WORKDIR /app/backend - # install python dependencies COPY ./backend/requirements.txt ./requirements.txt -RUN apt-get update && apt-get install ffmpeg libsm6 libxext6 -y +RUN if [ "$USE_CUDA" = "true" ]; then \ + # If you use CUDA the whisper and embedding modell will be downloaded on first use + pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/$USE_CUDA_DOCKER_VER --no-cache-dir && \ + pip3 install -r requirements.txt --no-cache-dir && \ + python -c "import os; from faster_whisper import WhisperModel; WhisperModel(os.environ['WHISPER_MODEL'], device='cpu', compute_type='int8', download_root=os.environ['WHISPER_MODEL_DIR'])" && \ + python -c "import os; from chromadb.utils import embedding_functions; sentence_transformer_ef = embedding_functions.SentenceTransformerEmbeddingFunction(model_name=os.environ['RAG_EMBEDDING_MODEL'], device='cpu')"; \ + else \ + pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu --no-cache-dir && \ + pip3 install -r requirements.txt --no-cache-dir && \ + python -c "import os; from faster_whisper import WhisperModel; WhisperModel(os.environ['WHISPER_MODEL'], device='cpu', compute_type='int8', download_root=os.environ['WHISPER_MODEL_DIR'])" && \ + python -c "import os; from chromadb.utils import embedding_functions; sentence_transformer_ef = embedding_functions.SentenceTransformerEmbeddingFunction(model_name=os.environ['RAG_EMBEDDING_MODEL'], device='cpu')"; \ + fi -RUN pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu --no-cache-dir -RUN pip3 install -r requirements.txt --no-cache-dir -# Install pandoc and netcat -# RUN python -c "import pypandoc; pypandoc.download_pandoc()" -RUN apt-get update \ - && apt-get install -y pandoc netcat-openbsd \ - && rm -rf /var/lib/apt/lists/* +RUN if [ "$USE_OLLAMA" = "true" ]; then \ + apt-get update && \ + # Install pandoc and netcat + apt-get install -y --no-install-recommends pandoc netcat-openbsd && \ + # for RAG OCR + apt-get install -y --no-install-recommends ffmpeg libsm6 libxext6 && \ + # install helper tools + apt-get install -y --no-install-recommends curl && \ + # install ollama + curl -fsSL https://ollama.com/install.sh | sh && \ + # cleanup + rm -rf /var/lib/apt/lists/*; \ + else \ + apt-get update && \ + # Install pandoc and netcat + apt-get install -y --no-install-recommends pandoc netcat-openbsd && \ + # for RAG OCR + apt-get install -y --no-install-recommends ffmpeg libsm6 libxext6 && \ + # cleanup + rm -rf /var/lib/apt/lists/*; \ + fi + -# preload embedding model -RUN python -c "import os; from chromadb.utils import embedding_functions; sentence_transformer_ef = embedding_functions.SentenceTransformerEmbeddingFunction(model_name=os.environ['RAG_EMBEDDING_MODEL'], device=os.environ['RAG_EMBEDDING_MODEL_DEVICE_TYPE'])" -# preload tts model -RUN python -c "import os; from faster_whisper import WhisperModel; WhisperModel(os.environ['WHISPER_MODEL'], device='auto', compute_type='int8', download_root=os.environ['WHISPER_MODEL_DIR'])" # copy embedding weight from build -RUN mkdir -p /root/.cache/chroma/onnx_models/all-MiniLM-L6-v2 -COPY --from=build /app/onnx /root/.cache/chroma/onnx_models/all-MiniLM-L6-v2/onnx +# RUN mkdir -p /root/.cache/chroma/onnx_models/all-MiniLM-L6-v2 +# COPY --from=build /app/onnx /root/.cache/chroma/onnx_models/all-MiniLM-L6-v2/onnx # copy built frontend files COPY --from=build /app/build /app/build @@ -82,4 +120,6 @@ COPY --from=build /app/package.json /app/package.json # copy backend files COPY ./backend . -CMD [ "bash", "start.sh"] +EXPOSE 8080 + +CMD [ "bash", "start.sh"] \ No newline at end of file diff --git a/README.md b/README.md index e2ee284e..3c0093e7 100644 --- a/README.md +++ b/README.md @@ -113,6 +113,65 @@ Don't forget to explore our sibling project, [Open WebUI Community](https://open - After installation, you can access Open WebUI at [http://localhost:3000](http://localhost:3000). Enjoy! 😄 +- **If you want to customize your build with additional args**, use this commands: + + > [!NOTE] + > If you only want to use Open WebUI with Ollama included or CUDA acelleration it's recomented to use our official images with the tags :cuda or :with-ollama + > If you want a combination of both or more customisation options like a different embedding model and/or CUDA version you need to build the image yourself following the instructions below. + + **For the build:** + + ```bash + docker build -t open-webui + ``` + + Optional build ARGS (use them in the docker build command below if needed): + + e.g. + + ```bash + --build-arg="USE_EMBEDDING_MODEL=intfloat/multilingual-e5-large" + ``` + + For "intfloat/multilingual-e5-large" custom embedding model (default is all-MiniLM-L6-v2), only works with [sentence transforer models](https://huggingface.co/models?library=sentence-transformers). Current [Leaderbord](https://huggingface.co/spaces/mteb/leaderboard) of embedding models. + + ```bash + --build-arg="USE_OLLAMA=true" + ``` + + For including ollama in the image. + + ```bash + --build-arg="USE_CUDA=true" + ``` + + To use CUDA exeleration for the embedding and whisper models. + + > [!NOTE] + > You need to install the [Nvidia CUDA container toolkit](https://docs.nvidia.com/dgx/nvidia-container-runtime-upgrade/) on your machine to be able to set CUDA as the Docker engine. Only works with Linux - use WSL for Windows! + + ```bash + --build-arg="USE_CUDA_VER=cu117" + ``` + + For CUDA 11 (default is CUDA 12) + + **To run the image:** + + - **If you DID NOT use the USE_CUDA=true build ARG**, use this command: + + ```bash + docker run -d -p 3000:8080 -v open-webui:/app/backend/data --name open-webui --restart always ghcr.io/open-webui/open-webui:main + ``` + + - **If you DID use the USE_CUDA=true build ARG**, use this command: + + ```bash + docker run --gpus all -d -p 3000:8080 -v open-webui:/app/backend/data --name open-webui --restart always ghcr.io/open-webui/open-webui:main + ``` + + - After installation, you can access Open WebUI at [http://localhost:3000](http://localhost:3000). Enjoy! 😄 + #### Open WebUI: Server Connection Error If you're experiencing connection issues, it’s often due to the WebUI docker container not being able to reach the Ollama server at 127.0.0.1:11434 (host.docker.internal:11434) inside the container . Use the `--network=host` flag in your docker command to resolve this. Note that the port changes from 3000 to 8080, resulting in the link: `http://localhost:8080`. diff --git a/backend/apps/audio/main.py b/backend/apps/audio/main.py index bb3cd053..02d1f5e8 100644 --- a/backend/apps/audio/main.py +++ b/backend/apps/audio/main.py @@ -28,6 +28,7 @@ from config import ( UPLOAD_DIR, WHISPER_MODEL, WHISPER_MODEL_DIR, + DEVICE_TYPE, ) log = logging.getLogger(__name__) @@ -42,6 +43,10 @@ app.add_middleware( allow_headers=["*"], ) +# setting device type for whisper model +whisper_device_type = DEVICE_TYPE if DEVICE_TYPE and DEVICE_TYPE == "cuda" else "cpu" +log.info(f"whisper_device_type: {whisper_device_type}") + @app.post("/transcribe") def transcribe( @@ -66,7 +71,7 @@ def transcribe( model = WhisperModel( WHISPER_MODEL, - device="auto", + device=whisper_device_type, compute_type="int8", download_root=WHISPER_MODEL_DIR, ) diff --git a/backend/apps/ollama/main.py b/backend/apps/ollama/main.py index b89d7bf5..5e19a8e3 100644 --- a/backend/apps/ollama/main.py +++ b/backend/apps/ollama/main.py @@ -215,7 +215,8 @@ async def get_ollama_versions(url_idx: Optional[int] = None): if len(responses) > 0: lowest_version = min( - responses, key=lambda x: tuple(map(int, x["version"].split("."))) + responses, + key=lambda x: tuple(map(int, x["version"].split("-")[0].split("."))), ) return {"version": lowest_version["version"]} diff --git a/backend/apps/rag/main.py b/backend/apps/rag/main.py index 25e93110..15de0cc4 100644 --- a/backend/apps/rag/main.py +++ b/backend/apps/rag/main.py @@ -58,8 +58,8 @@ from config import ( UPLOAD_DIR, DOCS_DIR, RAG_EMBEDDING_MODEL, - RAG_EMBEDDING_MODEL_DEVICE_TYPE, RAG_EMBEDDING_MODEL_AUTO_UPDATE, + DEVICE_TYPE, CHROMA_CLIENT, CHUNK_SIZE, CHUNK_OVERLAP, @@ -86,7 +86,7 @@ app.state.TOP_K = 4 app.state.sentence_transformer_ef = ( embedding_functions.SentenceTransformerEmbeddingFunction( model_name=app.state.RAG_EMBEDDING_MODEL_PATH, - device=RAG_EMBEDDING_MODEL_DEVICE_TYPE, + device=DEVICE_TYPE, ) ) @@ -154,7 +154,7 @@ async def update_embedding_model( app.state.sentence_transformer_ef = ( embedding_functions.SentenceTransformerEmbeddingFunction( model_name=app.state.RAG_EMBEDDING_MODEL_PATH, - device=RAG_EMBEDDING_MODEL_DEVICE_TYPE, + device=DEVICE_TYPE, ) ) except Exception as e: @@ -471,25 +471,11 @@ def store_doc( log.info(f"file.content_type: {file.content_type}") try: - is_valid_filename = True unsanitized_filename = file.filename - if re.search(r'[\\/:"\*\?<>|\n\t ]', unsanitized_filename) is not None: - is_valid_filename = False + filename = os.path.basename(unsanitized_filename) - unvalidated_file_path = f"{UPLOAD_DIR}/{unsanitized_filename}" - dereferenced_file_path = str(Path(unvalidated_file_path).resolve(strict=False)) - if not dereferenced_file_path.startswith(UPLOAD_DIR): - is_valid_filename = False + file_path = f"{UPLOAD_DIR}/{filename}" - if is_valid_filename: - file_path = dereferenced_file_path - else: - raise HTTPException( - status_code=status.HTTP_400_BAD_REQUEST, - detail=ERROR_MESSAGES.DEFAULT(), - ) - - filename = file.filename contents = file.file.read() with open(file_path, "wb") as f: f.write(contents) @@ -500,7 +486,7 @@ def store_doc( collection_name = calculate_sha256(f)[:63] f.close() - loader, known_type = get_loader(file.filename, file.content_type, file_path) + loader, known_type = get_loader(filename, file.content_type, file_path) data = loader.load() try: diff --git a/backend/apps/web/models/auths.py b/backend/apps/web/models/auths.py index 06986503..a97312ff 100644 --- a/backend/apps/web/models/auths.py +++ b/backend/apps/web/models/auths.py @@ -86,6 +86,7 @@ class SignupForm(BaseModel): name: str email: str password: str + profile_image_url: Optional[str] = "/user.png" class AuthsTable: @@ -94,7 +95,12 @@ class AuthsTable: self.db.create_tables([Auth]) def insert_new_auth( - self, email: str, password: str, name: str, role: str = "pending" + self, + email: str, + password: str, + name: str, + profile_image_url: str = "/user.png", + role: str = "pending", ) -> Optional[UserModel]: log.info("insert_new_auth") @@ -105,7 +111,7 @@ class AuthsTable: ) result = Auth.create(**auth.model_dump()) - user = Users.insert_new_user(id, name, email, role) + user = Users.insert_new_user(id, name, email, profile_image_url, role) if result and user: return user diff --git a/backend/apps/web/models/chats.py b/backend/apps/web/models/chats.py index 95a673cb..ef16ce73 100644 --- a/backend/apps/web/models/chats.py +++ b/backend/apps/web/models/chats.py @@ -206,6 +206,18 @@ class ChatTable: except: return None + def get_chat_by_share_id(self, id: str) -> Optional[ChatModel]: + try: + chat = Chat.get(Chat.share_id == id) + + if chat: + chat = Chat.get(Chat.id == id) + return ChatModel(**model_to_dict(chat)) + else: + return None + except: + return None + def get_chat_by_id_and_user_id(self, id: str, user_id: str) -> Optional[ChatModel]: try: chat = Chat.get(Chat.id == id, Chat.user_id == user_id) diff --git a/backend/apps/web/models/users.py b/backend/apps/web/models/users.py index a01e595e..7d1e182d 100644 --- a/backend/apps/web/models/users.py +++ b/backend/apps/web/models/users.py @@ -31,7 +31,7 @@ class UserModel(BaseModel): name: str email: str role: str = "pending" - profile_image_url: str = "/user.png" + profile_image_url: str timestamp: int # timestamp in epoch api_key: Optional[str] = None @@ -59,7 +59,12 @@ class UsersTable: self.db.create_tables([User]) def insert_new_user( - self, id: str, name: str, email: str, role: str = "pending" + self, + id: str, + name: str, + email: str, + profile_image_url: str = "/user.png", + role: str = "pending", ) -> Optional[UserModel]: user = UserModel( **{ @@ -67,7 +72,7 @@ class UsersTable: "name": name, "email": email, "role": role, - "profile_image_url": "/user.png", + "profile_image_url": profile_image_url, "timestamp": int(time.time()), } ) diff --git a/backend/apps/web/routers/auths.py b/backend/apps/web/routers/auths.py index 293cb55b..89d8c1c8 100644 --- a/backend/apps/web/routers/auths.py +++ b/backend/apps/web/routers/auths.py @@ -163,7 +163,11 @@ async def signup(request: Request, form_data: SignupForm): ) hashed = get_password_hash(form_data.password) user = Auths.insert_new_auth( - form_data.email.lower(), hashed, form_data.name, role + form_data.email.lower(), + hashed, + form_data.name, + form_data.profile_image_url, + role, ) if user: diff --git a/backend/apps/web/routers/chats.py b/backend/apps/web/routers/chats.py index 660a0d7f..2e2bb5b0 100644 --- a/backend/apps/web/routers/chats.py +++ b/backend/apps/web/routers/chats.py @@ -251,7 +251,15 @@ async def delete_shared_chat_by_id(id: str, user=Depends(get_current_user)): @router.get("/share/{share_id}", response_model=Optional[ChatResponse]) async def get_shared_chat_by_id(share_id: str, user=Depends(get_current_user)): - chat = Chats.get_chat_by_id(share_id) + if user.role == "pending": + raise HTTPException( + status_code=status.HTTP_401_UNAUTHORIZED, detail=ERROR_MESSAGES.NOT_FOUND + ) + + if user.role == "user": + chat = Chats.get_chat_by_share_id(share_id) + elif user.role == "admin": + chat = Chats.get_chat_by_id(share_id) if chat: return ChatResponse(**{**chat.model_dump(), "chat": json.loads(chat.chat)}) diff --git a/backend/apps/web/routers/utils.py b/backend/apps/web/routers/utils.py index ef5717f1..0ee75cfe 100644 --- a/backend/apps/web/routers/utils.py +++ b/backend/apps/web/routers/utils.py @@ -1,16 +1,11 @@ -from fastapi import APIRouter, UploadFile, File, BackgroundTasks +from fastapi import APIRouter, UploadFile, File, Response from fastapi import Depends, HTTPException, status from starlette.responses import StreamingResponse, FileResponse - - from pydantic import BaseModel +from fpdf import FPDF import markdown -import requests -import os -import aiohttp -import json from utils.utils import get_admin_user @@ -18,7 +13,7 @@ from utils.misc import calculate_sha256, get_gravatar_url from config import OLLAMA_BASE_URLS, DATA_DIR, UPLOAD_DIR from constants import ERROR_MESSAGES - +from typing import List router = APIRouter() @@ -41,6 +36,59 @@ async def get_html_from_markdown( return {"html": markdown.markdown(form_data.md)} +class ChatForm(BaseModel): + title: str + messages: List[dict] + + +@router.post("/pdf") +async def download_chat_as_pdf( + form_data: ChatForm, +): + pdf = FPDF() + pdf.add_page() + + STATIC_DIR = "./static" + FONTS_DIR = f"{STATIC_DIR}/fonts" + + pdf.add_font("NotoSans", "", f"{FONTS_DIR}/NotoSans-Regular.ttf") + pdf.add_font("NotoSans", "b", f"{FONTS_DIR}/NotoSans-Bold.ttf") + pdf.add_font("NotoSans", "i", f"{FONTS_DIR}/NotoSans-Italic.ttf") + pdf.add_font("NotoSansKR", "", f"{FONTS_DIR}/NotoSansKR-Regular.ttf") + pdf.add_font("NotoSansJP", "", f"{FONTS_DIR}/NotoSansJP-Regular.ttf") + + pdf.set_font("NotoSans", size=12) + pdf.set_fallback_fonts(["NotoSansKR", "NotoSansJP"]) + + pdf.set_auto_page_break(auto=True, margin=15) + + # Adjust the effective page width for multi_cell + effective_page_width = ( + pdf.w - 2 * pdf.l_margin - 10 + ) # Subtracted an additional 10 for extra padding + + # Add chat messages + for message in form_data.messages: + role = message["role"] + content = message["content"] + pdf.set_font("NotoSans", "B", size=14) # Bold for the role + pdf.multi_cell(effective_page_width, 10, f"{role.upper()}", 0, "L") + pdf.ln(1) # Extra space between messages + + pdf.set_font("NotoSans", size=10) # Regular for content + pdf.multi_cell(effective_page_width, 6, content, 0, "L") + pdf.ln(1.5) # Extra space between messages + + # Save the pdf with name .pdf + pdf_bytes = pdf.output() + + return Response( + content=bytes(pdf_bytes), + media_type="application/pdf", + headers={"Content-Disposition": f"attachment;filename=chat.pdf"}, + ) + + @router.get("/db/download") async def download_db(user=Depends(get_admin_user)): diff --git a/backend/config.py b/backend/config.py index b3299a97..364f498e 100644 --- a/backend/config.py +++ b/backend/config.py @@ -257,6 +257,7 @@ OLLAMA_API_BASE_URL = os.environ.get( OLLAMA_BASE_URL = os.environ.get("OLLAMA_BASE_URL", "") K8S_FLAG = os.environ.get("K8S_FLAG", "") +USE_OLLAMA_DOCKER = os.environ.get("USE_OLLAMA_DOCKER", "false") if OLLAMA_BASE_URL == "" and OLLAMA_API_BASE_URL != "": OLLAMA_BASE_URL = ( @@ -266,9 +267,13 @@ if OLLAMA_BASE_URL == "" and OLLAMA_API_BASE_URL != "": ) if ENV == "prod": - if OLLAMA_BASE_URL == "/ollama": - OLLAMA_BASE_URL = "http://host.docker.internal:11434" - + if OLLAMA_BASE_URL == "/ollama" and not K8S_FLAG: + if USE_OLLAMA_DOCKER.lower() == "true": + # if you use all-in-one docker container (Open WebUI + Ollama) + # with the docker build arg USE_OLLAMA=true (--build-arg="USE_OLLAMA=true") this only works with http://localhost:11434 + OLLAMA_BASE_URL = "http://localhost:11434" + else: + OLLAMA_BASE_URL = "http://host.docker.internal:11434" elif K8S_FLAG: OLLAMA_BASE_URL = "http://ollama-service.open-webui.svc.cluster.local:11434" @@ -391,13 +396,21 @@ if WEBUI_AUTH and WEBUI_SECRET_KEY == "": CHROMA_DATA_PATH = f"{DATA_DIR}/vector_db" # this uses the model defined in the Dockerfile ENV variable. If you dont use docker or docker based deployments such as k8s, the default embedding model will be used (all-MiniLM-L6-v2) RAG_EMBEDDING_MODEL = os.environ.get("RAG_EMBEDDING_MODEL", "all-MiniLM-L6-v2") -# device type ebbeding models - "cpu" (default), "cuda" (nvidia gpu required) or "mps" (apple silicon) - choosing this right can lead to better performance -RAG_EMBEDDING_MODEL_DEVICE_TYPE = os.environ.get( - "RAG_EMBEDDING_MODEL_DEVICE_TYPE", "cpu" -) +log.info(f"Embedding model set: {RAG_EMBEDDING_MODEL}"), RAG_EMBEDDING_MODEL_AUTO_UPDATE = False if os.environ.get("RAG_EMBEDDING_MODEL_AUTO_UPDATE", "").lower() == "true": RAG_EMBEDDING_MODEL_AUTO_UPDATE = True + + +# device type ebbeding models - "cpu" (default), "cuda" (nvidia gpu required) or "mps" (apple silicon) - choosing this right can lead to better performance +USE_CUDA = os.environ.get("USE_CUDA_DOCKER", "false") + +if USE_CUDA.lower() == "true": + DEVICE_TYPE = "cuda" +else: + DEVICE_TYPE = "cpu" + + CHROMA_CLIENT = chromadb.PersistentClient( path=CHROMA_DATA_PATH, settings=Settings(allow_reset=True, anonymized_telemetry=False), diff --git a/backend/requirements.txt b/backend/requirements.txt index 66f3ee0f..c815d93d 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -42,6 +42,8 @@ xlrd opencv-python-headless rapidocr-onnxruntime +fpdf2 + faster-whisper PyJWT diff --git a/backend/start.sh b/backend/start.sh index f9ed5948..06adf1ff 100755 --- a/backend/start.sh +++ b/backend/start.sh @@ -7,16 +7,26 @@ KEY_FILE=.webui_secret_key PORT="${PORT:-8080}" if test "$WEBUI_SECRET_KEY $WEBUI_JWT_SECRET_KEY" = " "; then - echo No WEBUI_SECRET_KEY provided + echo "No WEBUI_SECRET_KEY provided" if ! [ -e "$KEY_FILE" ]; then - echo Generating WEBUI_SECRET_KEY + echo "Generating WEBUI_SECRET_KEY" # Generate a random value to use as a WEBUI_SECRET_KEY in case the user didn't provide one. - echo $(head -c 12 /dev/random | base64) > $KEY_FILE + echo $(head -c 12 /dev/random | base64) > "$KEY_FILE" fi - echo Loading WEBUI_SECRET_KEY from $KEY_FILE - WEBUI_SECRET_KEY=`cat $KEY_FILE` + echo "Loading WEBUI_SECRET_KEY from $KEY_FILE" + WEBUI_SECRET_KEY=$(cat "$KEY_FILE") fi -WEBUI_SECRET_KEY="$WEBUI_SECRET_KEY" exec uvicorn main:app --host 0.0.0.0 --port "$PORT" --forwarded-allow-ips '*' \ No newline at end of file +if [ "$USE_OLLAMA_DOCKER" = "true" ]; then + echo "USE_OLLAMA is set to true, starting ollama serve." + ollama serve & +fi + +if [ "$USE_CUDA_DOCKER" = "true" ]; then + echo "CUDA is enabled, appending LD_LIBRARY_PATH to include torch/cudnn & cublas libraries." + export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/usr/local/lib/python3.11/site-packages/torch/lib:/usr/local/lib/python3.11/site-packages/nvidia/cudnn/lib" +fi + +WEBUI_SECRET_KEY="$WEBUI_SECRET_KEY" exec uvicorn main:app --host 0.0.0.0 --port "$PORT" --forwarded-allow-ips '*' diff --git a/backend/static/fonts/NotoSans-Bold.ttf b/backend/static/fonts/NotoSans-Bold.ttf new file mode 100644 index 00000000..d84248ed Binary files /dev/null and b/backend/static/fonts/NotoSans-Bold.ttf differ diff --git a/backend/static/fonts/NotoSans-Italic.ttf b/backend/static/fonts/NotoSans-Italic.ttf new file mode 100644 index 00000000..c40c3562 Binary files /dev/null and b/backend/static/fonts/NotoSans-Italic.ttf differ diff --git a/backend/static/fonts/NotoSans-Regular.ttf b/backend/static/fonts/NotoSans-Regular.ttf new file mode 100644 index 00000000..fa4cff50 Binary files /dev/null and b/backend/static/fonts/NotoSans-Regular.ttf differ diff --git a/backend/static/fonts/NotoSansJP-Regular.ttf b/backend/static/fonts/NotoSansJP-Regular.ttf new file mode 100644 index 00000000..1583096a Binary files /dev/null and b/backend/static/fonts/NotoSansJP-Regular.ttf differ diff --git a/backend/static/fonts/NotoSansKR-Regular.ttf b/backend/static/fonts/NotoSansKR-Regular.ttf new file mode 100644 index 00000000..1b14d324 Binary files /dev/null and b/backend/static/fonts/NotoSansKR-Regular.ttf differ diff --git a/docker-compose.amdgpu.yaml b/docker-compose.amdgpu.yaml new file mode 100644 index 00000000..7a1295d9 --- /dev/null +++ b/docker-compose.amdgpu.yaml @@ -0,0 +1,8 @@ +services: + ollama: + devices: + - /dev/kfd:/dev/kfd + - /dev/dri:/dev/dri + image: ollama/ollama:${OLLAMA_DOCKER_TAG-rocm} + environment: + - 'HSA_OVERRIDE_GFX_VERSION=${HSA_OVERRIDE_GFX_VERSION-11.0.0}' \ No newline at end of file diff --git a/docker-compose.yaml b/docker-compose.yaml index f69084b8..9daba312 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -8,7 +8,7 @@ services: pull_policy: always tty: true restart: unless-stopped - image: ollama/ollama:latest + image: ollama/ollama:${OLLAMA_DOCKER_TAG-latest} open-webui: build: @@ -16,7 +16,7 @@ services: args: OLLAMA_BASE_URL: '/ollama' dockerfile: Dockerfile - image: ghcr.io/open-webui/open-webui:main + image: ghcr.io/open-webui/open-webui:${WEBUI_DOCKER_TAG-main} container_name: open-webui volumes: - open-webui:/app/backend/data diff --git a/kubernetes/helm/templates/_helpers.tpl b/kubernetes/helm/templates/_helpers.tpl index 0647a42a..3f42735a 100644 --- a/kubernetes/helm/templates/_helpers.tpl +++ b/kubernetes/helm/templates/_helpers.tpl @@ -7,7 +7,7 @@ ollama {{- end -}} {{- define "ollama.url" -}} -{{- printf "http://%s.%s.svc.cluster.local:%d/api" (include "ollama.name" .) (.Release.Namespace) (.Values.ollama.service.port | int) }} +{{- printf "http://%s.%s.svc.cluster.local:%d/" (include "ollama.name" .) (.Release.Namespace) (.Values.ollama.service.port | int) }} {{- end }} {{- define "chart.name" -}} diff --git a/src/lib/apis/auths/index.ts b/src/lib/apis/auths/index.ts index 548a9418..efeeff33 100644 --- a/src/lib/apis/auths/index.ts +++ b/src/lib/apis/auths/index.ts @@ -58,7 +58,12 @@ export const userSignIn = async (email: string, password: string) => { return res; }; -export const userSignUp = async (name: string, email: string, password: string) => { +export const userSignUp = async ( + name: string, + email: string, + password: string, + profile_image_url: string +) => { let error = null; const res = await fetch(`${WEBUI_API_BASE_URL}/auths/signup`, { @@ -69,7 +74,8 @@ export const userSignUp = async (name: string, email: string, password: string) body: JSON.stringify({ name: name, email: email, - password: password + password: password, + profile_image_url: profile_image_url }) }) .then(async (res) => { diff --git a/src/lib/apis/utils/index.ts b/src/lib/apis/utils/index.ts index 53e93688..ef6b0d25 100644 --- a/src/lib/apis/utils/index.ts +++ b/src/lib/apis/utils/index.ts @@ -22,6 +22,32 @@ export const getGravatarUrl = async (email: string) => { return res; }; +export const downloadChatAsPDF = async (chat: object) => { + let error = null; + + const blob = await fetch(`${WEBUI_API_BASE_URL}/utils/pdf`, { + method: 'POST', + headers: { + 'Content-Type': 'application/json' + }, + body: JSON.stringify({ + title: chat.title, + messages: chat.messages + }) + }) + .then(async (res) => { + if (!res.ok) throw await res.json(); + return res.blob(); + }) + .catch((err) => { + console.log(err); + error = err; + return null; + }); + + return blob; +}; + export const getHTMLFromMarkdown = async (md: string) => { let error = null; diff --git a/src/lib/components/chat/MessageInput.svelte b/src/lib/components/chat/MessageInput.svelte index a23649d8..eff65a25 100644 --- a/src/lib/components/chat/MessageInput.svelte +++ b/src/lib/components/chat/MessageInput.svelte @@ -295,6 +295,13 @@ const dropZone = document.querySelector('body'); + const handleKeyDown = (event: KeyboardEvent) => { + if (event.key === 'Escape') { + console.log('Escape'); + dragged = false; + } + }; + const onDragOver = (e) => { e.preventDefault(); dragged = true; @@ -350,11 +357,15 @@ dragged = false; }; + window.addEventListener('keydown', handleKeyDown); + dropZone?.addEventListener('dragover', onDragOver); dropZone?.addEventListener('drop', onDrop); dropZone?.addEventListener('dragleave', onDragLeave); return () => { + window.removeEventListener('keydown', handleKeyDown); + dropZone?.removeEventListener('dragover', onDragOver); dropZone?.removeEventListener('drop', onDrop); dropZone?.removeEventListener('dragleave', onDragLeave); diff --git a/src/lib/components/chat/Messages.svelte b/src/lib/components/chat/Messages.svelte index 4cd97ca8..89277766 100644 --- a/src/lib/components/chat/Messages.svelte +++ b/src/lib/components/chat/Messages.svelte @@ -107,12 +107,8 @@ await sendPrompt(userPrompt, userMessageId, chatId); }; - const confirmEditResponseMessage = async (messageId, content) => { - history.messages[messageId].originalContent = history.messages[messageId].content; - history.messages[messageId].content = content; - + const updateChatMessages = async () => { await tick(); - await updateChatById(localStorage.token, chatId, { messages: messages, history: history @@ -121,15 +117,20 @@ await chats.set(await getChatList(localStorage.token)); }; - const rateMessage = async (messageId, rating) => { - history.messages[messageId].rating = rating; - await tick(); - await updateChatById(localStorage.token, chatId, { - messages: messages, - history: history - }); + const confirmEditResponseMessage = async (messageId, content) => { + history.messages[messageId].originalContent = history.messages[messageId].content; + history.messages[messageId].content = content; - await chats.set(await getChatList(localStorage.token)); + await updateChatMessages(); + }; + + const rateMessage = async (messageId, rating) => { + history.messages[messageId].annotation = { + ...history.messages[messageId].annotation, + rating: rating + }; + + await updateChatMessages(); }; const showPreviousMessage = async (message) => { @@ -338,6 +339,7 @@ siblings={history.messages[message.parentId]?.childrenIds ?? []} isLastMessage={messageIdx + 1 === messages.length} {readOnly} + {updateChatMessages} {confirmEditResponseMessage} {showPreviousMessage} {showNextMessage} diff --git a/src/lib/components/chat/Messages/RateComment.svelte b/src/lib/components/chat/Messages/RateComment.svelte new file mode 100644 index 00000000..2bdc3d04 --- /dev/null +++ b/src/lib/components/chat/Messages/RateComment.svelte @@ -0,0 +1,117 @@ + + +
+
+
Tell us more:
+ + +
+ + {#if reasons.length > 0} +
+ {#each reasons as reason} + + {/each} +
+ {/if} + +
+