diff --git a/.github/workflows/build-release.yml b/.github/workflows/build-release.yml index 259f0c5f..036bb97a 100644 --- a/.github/workflows/build-release.yml +++ b/.github/workflows/build-release.yml @@ -57,3 +57,14 @@ jobs: path: . env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + + - name: Trigger Docker build workflow + uses: actions/github-script@v7 + with: + script: | + github.rest.actions.createWorkflowDispatch({ + owner: context.repo.owner, + repo: context.repo.repo, + workflow_id: 'docker-build.yaml', + ref: 'v${{ steps.get_version.outputs.version }}', + }) diff --git a/.github/workflows/docker-build.yaml b/.github/workflows/docker-build.yaml index bb71de8b..e14a5d8e 100644 --- a/.github/workflows/docker-build.yaml +++ b/.github/workflows/docker-build.yaml @@ -1,8 +1,7 @@ -# -name: Create and publish a Docker image +name: Create and publish Docker images with specific build args -# Configures this workflow to run every time a change is pushed to the branch called `release`. on: + workflow_dispatch: push: branches: - main @@ -10,30 +9,39 @@ on: tags: - v* -# Defines two custom environment variables for the workflow. These are used for the Container registry domain, and a name for the Docker image that this workflow builds. env: REGISTRY: ghcr.io IMAGE_NAME: ${{ github.repository }} + FULL_IMAGE_NAME: ghcr.io/${{ github.repository }} -# There is a single job in this workflow. It's configured to run on the latest available version of Ubuntu. jobs: - build-and-push-image: + build-main-image: runs-on: ubuntu-latest - # Sets the permissions granted to the `GITHUB_TOKEN` for the actions in this job. permissions: contents: read packages: write - # + strategy: + fail-fast: false + matrix: + platform: + - linux/amd64 + - linux/arm64 + steps: + - name: Prepare + run: | + platform=${{ matrix.platform }} + echo "PLATFORM_PAIR=${platform//\//-}" >> $GITHUB_ENV + - name: Checkout repository uses: actions/checkout@v4 - # Required for multi architecture build + - name: Set up QEMU uses: docker/setup-qemu-action@v3 - # Required for multi architecture build + - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 - # Uses the `docker/login-action` action to log in to the Container registry registry using the account and password that will publish the packages. Once published, the packages are scoped to the account defined here. + - name: Log in to the Container registry uses: docker/login-action@v3 with: @@ -41,12 +49,11 @@ jobs: username: ${{ github.actor }} password: ${{ secrets.GITHUB_TOKEN }} - - name: Extract metadata for Docker images + - name: Extract metadata for Docker images (default latest tag) id: meta uses: docker/metadata-action@v5 with: - images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} - # This configuration dynamically generates tags based on the branch, tag, commit, and custom suffix for lite version. + images: ${{ env.FULL_IMAGE_NAME }} tags: | type=ref,event=branch type=ref,event=tag @@ -56,11 +63,322 @@ jobs: flavor: | latest=${{ github.ref == 'refs/heads/main' }} - - name: Build and push Docker image + - name: Build Docker image (latest) uses: docker/build-push-action@v5 + id: build with: context: . push: true - platforms: linux/amd64,linux/arm64 - tags: ${{ steps.meta.outputs.tags }} + platforms: ${{ matrix.platform }} labels: ${{ steps.meta.outputs.labels }} + outputs: type=image,name=${{ env.FULL_IMAGE_NAME }},push-by-digest=true,name-canonical=true,push=true + cache-from: type=gha + cache-to: type=gha,mode=max + + - name: Export digest + run: | + mkdir -p /tmp/digests + digest="${{ steps.build.outputs.digest }}" + touch "/tmp/digests/${digest#sha256:}" + + - name: Upload digest + uses: actions/upload-artifact@v4 + with: + name: digests-main-${{ env.PLATFORM_PAIR }} + path: /tmp/digests/* + if-no-files-found: error + retention-days: 1 + + build-cuda-image: + runs-on: ubuntu-latest + permissions: + contents: read + packages: write + strategy: + fail-fast: false + matrix: + platform: + - linux/amd64 + - linux/arm64 + + steps: + - name: Prepare + run: | + platform=${{ matrix.platform }} + echo "PLATFORM_PAIR=${platform//\//-}" >> $GITHUB_ENV + + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Set up QEMU + uses: docker/setup-qemu-action@v3 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Log in to the Container registry + uses: docker/login-action@v3 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Extract metadata for Docker images (default latest tag) + id: meta + uses: docker/metadata-action@v5 + with: + images: ${{ env.FULL_IMAGE_NAME }} + tags: | + type=ref,event=branch + type=ref,event=tag + type=sha,prefix=git- + type=semver,pattern={{version}} + type=semver,pattern={{major}}.{{minor}} + type=raw,enable=${{ github.ref == 'refs/heads/main' }},prefix=,suffix=,value=cuda + flavor: | + latest=${{ github.ref == 'refs/heads/main' }} + suffix=-cuda,onlatest=true + + - name: Build Docker image (cuda) + uses: docker/build-push-action@v5 + id: build + with: + context: . + push: true + platforms: ${{ matrix.platform }} + labels: ${{ steps.meta.outputs.labels }} + outputs: type=image,name=${{ env.FULL_IMAGE_NAME }},push-by-digest=true,name-canonical=true,push=true + cache-from: type=gha + cache-to: type=gha,mode=max + build-args: USE_CUDA=true + + - name: Export digest + run: | + mkdir -p /tmp/digests + digest="${{ steps.build.outputs.digest }}" + touch "/tmp/digests/${digest#sha256:}" + + - name: Upload digest + uses: actions/upload-artifact@v4 + with: + name: digests-cuda-${{ env.PLATFORM_PAIR }} + path: /tmp/digests/* + if-no-files-found: error + retention-days: 1 + + build-ollama-image: + runs-on: ubuntu-latest + permissions: + contents: read + packages: write + strategy: + fail-fast: false + matrix: + platform: + - linux/amd64 + - linux/arm64 + + steps: + - name: Prepare + run: | + platform=${{ matrix.platform }} + echo "PLATFORM_PAIR=${platform//\//-}" >> $GITHUB_ENV + + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Set up QEMU + uses: docker/setup-qemu-action@v3 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Log in to the Container registry + uses: docker/login-action@v3 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Extract metadata for Docker images (ollama tag) + id: meta + uses: docker/metadata-action@v5 + with: + images: ${{ env.FULL_IMAGE_NAME }} + tags: | + type=ref,event=branch + type=ref,event=tag + type=sha,prefix=git- + type=semver,pattern={{version}} + type=semver,pattern={{major}}.{{minor}} + type=raw,enable=${{ github.ref == 'refs/heads/main' }},prefix=,suffix=,value=ollama + flavor: | + latest=${{ github.ref == 'refs/heads/main' }} + suffix=-ollama,onlatest=true + + - name: Build Docker image (ollama) + uses: docker/build-push-action@v5 + id: build + with: + context: . + push: true + platforms: ${{ matrix.platform }} + labels: ${{ steps.meta.outputs.labels }} + outputs: type=image,name=${{ env.FULL_IMAGE_NAME }},push-by-digest=true,name-canonical=true,push=true + cache-from: type=gha + cache-to: type=gha,mode=max + build-args: USE_OLLAMA=true + + - name: Export digest + run: | + mkdir -p /tmp/digests + digest="${{ steps.build.outputs.digest }}" + touch "/tmp/digests/${digest#sha256:}" + + - name: Upload digest + uses: actions/upload-artifact@v4 + with: + name: digests-ollama-${{ env.PLATFORM_PAIR }} + path: /tmp/digests/* + if-no-files-found: error + retention-days: 1 + + merge-main-images: + runs-on: ubuntu-latest + needs: [ build-main-image ] + steps: + - name: Download digests + uses: actions/download-artifact@v4 + with: + pattern: digests-main-* + path: /tmp/digests + merge-multiple: true + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Log in to the Container registry + uses: docker/login-action@v3 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Extract metadata for Docker images (default latest tag) + id: meta + uses: docker/metadata-action@v5 + with: + images: ${{ env.FULL_IMAGE_NAME }} + tags: | + type=ref,event=branch + type=ref,event=tag + type=sha,prefix=git- + type=semver,pattern={{version}} + type=semver,pattern={{major}}.{{minor}} + flavor: | + latest=${{ github.ref == 'refs/heads/main' }} + + - name: Create manifest list and push + working-directory: /tmp/digests + run: | + docker buildx imagetools create $(jq -cr '.tags | map("-t " + .) | join(" ")' <<< "$DOCKER_METADATA_OUTPUT_JSON") \ + $(printf '${{ env.FULL_IMAGE_NAME }}@sha256:%s ' *) + + - name: Inspect image + run: | + docker buildx imagetools inspect ${{ env.FULL_IMAGE_NAME }}:${{ steps.meta.outputs.version }} + + + merge-cuda-images: + runs-on: ubuntu-latest + needs: [ build-cuda-image ] + steps: + - name: Download digests + uses: actions/download-artifact@v4 + with: + pattern: digests-cuda-* + path: /tmp/digests + merge-multiple: true + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Log in to the Container registry + uses: docker/login-action@v3 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Extract metadata for Docker images (default latest tag) + id: meta + uses: docker/metadata-action@v5 + with: + images: ${{ env.FULL_IMAGE_NAME }} + tags: | + type=ref,event=branch + type=ref,event=tag + type=sha,prefix=git- + type=semver,pattern={{version}} + type=semver,pattern={{major}}.{{minor}} + type=raw,enable=${{ github.ref == 'refs/heads/main' }},prefix=,suffix=,value=cuda + flavor: | + latest=${{ github.ref == 'refs/heads/main' }} + suffix=-cuda,onlatest=true + + - name: Create manifest list and push + working-directory: /tmp/digests + run: | + docker buildx imagetools create $(jq -cr '.tags | map("-t " + .) | join(" ")' <<< "$DOCKER_METADATA_OUTPUT_JSON") \ + $(printf '${{ env.FULL_IMAGE_NAME }}@sha256:%s ' *) + + - name: Inspect image + run: | + docker buildx imagetools inspect ${{ env.FULL_IMAGE_NAME }}:${{ steps.meta.outputs.version }} + + merge-ollama-images: + runs-on: ubuntu-latest + needs: [ build-ollama-image ] + steps: + - name: Download digests + uses: actions/download-artifact@v4 + with: + pattern: digests-ollama-* + path: /tmp/digests + merge-multiple: true + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Log in to the Container registry + uses: docker/login-action@v3 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Extract metadata for Docker images (default ollama tag) + id: meta + uses: docker/metadata-action@v5 + with: + images: ${{ env.FULL_IMAGE_NAME }} + tags: | + type=ref,event=branch + type=ref,event=tag + type=sha,prefix=git- + type=semver,pattern={{version}} + type=semver,pattern={{major}}.{{minor}} + type=raw,enable=${{ github.ref == 'refs/heads/main' }},prefix=,suffix=,value=ollama + flavor: | + latest=${{ github.ref == 'refs/heads/main' }} + suffix=-ollama,onlatest=true + + - name: Create manifest list and push + working-directory: /tmp/digests + run: | + docker buildx imagetools create $(jq -cr '.tags | map("-t " + .) | join(" ")' <<< "$DOCKER_METADATA_OUTPUT_JSON") \ + $(printf '${{ env.FULL_IMAGE_NAME }}@sha256:%s ' *) + + - name: Inspect image + run: | + docker buildx imagetools inspect ${{ env.FULL_IMAGE_NAME }}:${{ steps.meta.outputs.version }} diff --git a/CHANGELOG.md b/CHANGELOG.md index b1fd38b7..34aa2bc1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,26 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [0.1.118] - 2024-04-10 + +### Added + +- **🦙 Ollama and CUDA Images**: Added support for `:ollama` and `:cuda` tagged images. +- **👍 Enhanced Response Rating**: Now you can annotate your ratings for better feedback. +- **👤 User Initials Profile Photo**: User initials are now the default profile photo. +- **🔍 Update RAG Embedding Model**: Customize RAG embedding model directly in document settings. +- **🌍 Additional Language Support**: Added Turkish language support. + +### Fixed + +- **🔒 Share Chat Permission**: Resolved issue with chat sharing permissions. +- **🛠 Modal Close**: Modals can now be closed using the Esc key. + +### Changed + +- **🎨 Admin Panel Styling**: Refreshed styling for the admin panel. +- **🐳 Docker Image Build**: Updated docker image build process for improved efficiency. + ## [0.1.117] - 2024-04-03 ### Added diff --git a/Dockerfile b/Dockerfile index f76f8c32..6cf8ded2 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,82 +1,116 @@ # syntax=docker/dockerfile:1 +# Initialize device type args +# use build args in the docker build commmand with --build-arg="BUILDARG=true" +ARG USE_CUDA=false +ARG USE_OLLAMA=false +# Tested with cu117 for CUDA 11 and cu121 for CUDA 12 (default) +ARG USE_CUDA_VER=cu121 +# any sentence transformer model; models to use can be found at https://huggingface.co/models?library=sentence-transformers +# Leaderboard: https://huggingface.co/spaces/mteb/leaderboard +# for better performance and multilangauge support use "intfloat/multilingual-e5-large" (~2.5GB) or "intfloat/multilingual-e5-base" (~1.5GB) +# IMPORTANT: If you change the default model (all-MiniLM-L6-v2) and vice versa, you aren't able to use RAG Chat with your previous documents loaded in the WebUI! You need to re-embed them. +ARG USE_EMBEDDING_MODEL=all-MiniLM-L6-v2 -FROM node:alpine as build +######## WebUI frontend ######## +FROM --platform=$BUILDPLATFORM node:21-alpine3.19 as build WORKDIR /app -# wget embedding model weight from alpine (does not exist from slim-buster) -RUN wget "https://chroma-onnx-models.s3.amazonaws.com/all-MiniLM-L6-v2/onnx.tar.gz" -O - | \ - tar -xzf - -C /app - COPY package.json package-lock.json ./ RUN npm ci COPY . . RUN npm run build - +######## WebUI backend ######## FROM python:3.11-slim-bookworm as base -ENV ENV=prod -ENV PORT "" +# Use args +ARG USE_CUDA +ARG USE_OLLAMA +ARG USE_CUDA_VER +ARG USE_EMBEDDING_MODEL -ENV OLLAMA_BASE_URL "/ollama" +## Basis ## +ENV ENV=prod \ + PORT=8080 \ + # pass build args to the build + USE_OLLAMA_DOCKER=${USE_OLLAMA} \ + USE_CUDA_DOCKER=${USE_CUDA} \ + USE_CUDA_DOCKER_VER=${USE_CUDA_VER} \ + USE_EMBEDDING_MODEL_DOCKER=${USE_EMBEDDING_MODEL} -ENV OPENAI_API_BASE_URL "" -ENV OPENAI_API_KEY "" +## Basis URL Config ## +ENV OLLAMA_BASE_URL="/ollama" \ + OPENAI_API_BASE_URL="" -ENV WEBUI_SECRET_KEY "" -ENV WEBUI_AUTH_TRUSTED_EMAIL_HEADER "" - -ENV SCARF_NO_ANALYTICS true -ENV DO_NOT_TRACK true +## API Key and Security Config ## +ENV OPENAI_API_KEY="" \ + WEBUI_SECRET_KEY="" \ + SCARF_NO_ANALYTICS=true \ + DO_NOT_TRACK=true # Use locally bundled version of the LiteLLM cost map json # to avoid repetitive startup connections ENV LITELLM_LOCAL_MODEL_COST_MAP="True" -######## Preloaded models ######## -# whisper TTS Settings -ENV WHISPER_MODEL="base" -ENV WHISPER_MODEL_DIR="/app/backend/data/cache/whisper/models" -# RAG Embedding Model Settings -# any sentence transformer model; models to use can be found at https://huggingface.co/models?library=sentence-transformers -# Leaderboard: https://huggingface.co/spaces/mteb/leaderboard -# for better persormance and multilangauge support use "intfloat/multilingual-e5-large" (~2.5GB) or "intfloat/multilingual-e5-base" (~1.5GB) -# IMPORTANT: If you change the default model (all-MiniLM-L6-v2) and vice versa, you aren't able to use RAG Chat with your previous documents loaded in the WebUI! You need to re-embed them. -ENV RAG_EMBEDDING_MODEL="all-MiniLM-L6-v2" -# device type for whisper tts and embbeding models - "cpu" (default), "cuda" (nvidia gpu and CUDA required) or "mps" (apple silicon) - choosing this right can lead to better performance -ENV RAG_EMBEDDING_MODEL_DEVICE_TYPE="cpu" -ENV RAG_EMBEDDING_MODEL_DIR="/app/backend/data/cache/embedding/models" -ENV SENTENCE_TRANSFORMERS_HOME $RAG_EMBEDDING_MODEL_DIR +#### Other models ######################################################### +## whisper TTS model settings ## +ENV WHISPER_MODEL="base" \ + WHISPER_MODEL_DIR="/app/backend/data/cache/whisper/models" -######## Preloaded models ######## +## RAG Embedding model settings ## +ENV RAG_EMBEDDING_MODEL="$USE_EMBEDDING_MODEL_DOCKER" \ + RAG_EMBEDDING_MODEL_DIR="/app/backend/data/cache/embedding/models" \ + SENTENCE_TRANSFORMERS_HOME="/app/backend/data/cache/embedding/models" +#### Other models ########################################################## WORKDIR /app/backend +RUN if [ "$USE_OLLAMA" = "true" ]; then \ + apt-get update && \ + # Install pandoc and netcat + apt-get install -y --no-install-recommends pandoc netcat-openbsd && \ + # for RAG OCR + apt-get install -y --no-install-recommends ffmpeg libsm6 libxext6 && \ + # install helper tools + apt-get install -y --no-install-recommends curl && \ + # install ollama + curl -fsSL https://ollama.com/install.sh | sh && \ + # cleanup + rm -rf /var/lib/apt/lists/*; \ + else \ + apt-get update && \ + # Install pandoc and netcat + apt-get install -y --no-install-recommends pandoc netcat-openbsd && \ + # for RAG OCR + apt-get install -y --no-install-recommends ffmpeg libsm6 libxext6 && \ + # cleanup + rm -rf /var/lib/apt/lists/*; \ + fi + # install python dependencies COPY ./backend/requirements.txt ./requirements.txt -RUN apt-get update && apt-get install ffmpeg libsm6 libxext6 -y +RUN if [ "$USE_CUDA" = "true" ]; then \ + # If you use CUDA the whisper and embedding model will be downloaded on first use + pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/$USE_CUDA_DOCKER_VER --no-cache-dir && \ + pip3 install -r requirements.txt --no-cache-dir && \ + python -c "import os; from faster_whisper import WhisperModel; WhisperModel(os.environ['WHISPER_MODEL'], device='cpu', compute_type='int8', download_root=os.environ['WHISPER_MODEL_DIR'])" && \ + python -c "import os; from chromadb.utils import embedding_functions; sentence_transformer_ef = embedding_functions.SentenceTransformerEmbeddingFunction(model_name=os.environ['RAG_EMBEDDING_MODEL'], device='cpu')"; \ + else \ + pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu --no-cache-dir && \ + pip3 install -r requirements.txt --no-cache-dir && \ + python -c "import os; from faster_whisper import WhisperModel; WhisperModel(os.environ['WHISPER_MODEL'], device='cpu', compute_type='int8', download_root=os.environ['WHISPER_MODEL_DIR'])" && \ + python -c "import os; from chromadb.utils import embedding_functions; sentence_transformer_ef = embedding_functions.SentenceTransformerEmbeddingFunction(model_name=os.environ['RAG_EMBEDDING_MODEL'], device='cpu')"; \ + fi -RUN pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu --no-cache-dir -RUN pip3 install -r requirements.txt --no-cache-dir -# Install pandoc and netcat -# RUN python -c "import pypandoc; pypandoc.download_pandoc()" -RUN apt-get update \ - && apt-get install -y pandoc netcat-openbsd \ - && rm -rf /var/lib/apt/lists/* - -# preload embedding model -RUN python -c "import os; from chromadb.utils import embedding_functions; sentence_transformer_ef = embedding_functions.SentenceTransformerEmbeddingFunction(model_name=os.environ['RAG_EMBEDDING_MODEL'], device=os.environ['RAG_EMBEDDING_MODEL_DEVICE_TYPE'])" -# preload tts model -RUN python -c "import os; from faster_whisper import WhisperModel; WhisperModel(os.environ['WHISPER_MODEL'], device='auto', compute_type='int8', download_root=os.environ['WHISPER_MODEL_DIR'])" # copy embedding weight from build -RUN mkdir -p /root/.cache/chroma/onnx_models/all-MiniLM-L6-v2 -COPY --from=build /app/onnx /root/.cache/chroma/onnx_models/all-MiniLM-L6-v2/onnx +# RUN mkdir -p /root/.cache/chroma/onnx_models/all-MiniLM-L6-v2 +# COPY --from=build /app/onnx /root/.cache/chroma/onnx_models/all-MiniLM-L6-v2/onnx # copy built frontend files COPY --from=build /app/build /app/build @@ -86,4 +120,6 @@ COPY --from=build /app/package.json /app/package.json # copy backend files COPY ./backend . -CMD [ "bash", "start.sh"] +EXPOSE 8080 + +CMD [ "bash", "start.sh"] \ No newline at end of file diff --git a/README.md b/README.md index e2ee284e..a5ccb541 100644 --- a/README.md +++ b/README.md @@ -94,24 +94,27 @@ Don't forget to explore our sibling project, [Open WebUI Community](https://open ### Quick Start with Docker 🐳 -> [!IMPORTANT] +> [!WARNING] > When using Docker to install Open WebUI, make sure to include the `-v open-webui:/app/backend/data` in your Docker command. This step is crucial as it ensures your database is properly mounted and prevents any loss of data. -- **If Ollama is on your computer**, use this command: +> [!TIP] +> If you wish to utilize Open WebUI with Ollama included or CUDA acceleration, we recommend utilizing our official images tagged with either `:cuda` or `:ollama`. To enable CUDA, you must install the [Nvidia CUDA container toolkit](https://docs.nvidia.com/dgx/nvidia-container-runtime-upgrade/) on your Linux/WSL system. - ```bash - docker run -d -p 3000:8080 --add-host=host.docker.internal:host-gateway -v open-webui:/app/backend/data --name open-webui --restart always ghcr.io/open-webui/open-webui:main - ``` +**If Ollama is on your computer**, use this command: -- **If Ollama is on a Different Server**, use this command: +```bash +docker run -d -p 3000:8080 --add-host=host.docker.internal:host-gateway -v open-webui:/app/backend/data --name open-webui --restart always ghcr.io/open-webui/open-webui:main +``` -- To connect to Ollama on another server, change the `OLLAMA_BASE_URL` to the server's URL: +**If Ollama is on a Different Server**, use this command: - ```bash - docker run -d -p 3000:8080 -e OLLAMA_BASE_URL=https://example.com -v open-webui:/app/backend/data --name open-webui --restart always ghcr.io/open-webui/open-webui:main - ``` +To connect to Ollama on another server, change the `OLLAMA_BASE_URL` to the server's URL: -- After installation, you can access Open WebUI at [http://localhost:3000](http://localhost:3000). Enjoy! 😄 +```bash +docker run -d -p 3000:8080 -e OLLAMA_BASE_URL=https://example.com -v open-webui:/app/backend/data --name open-webui --restart always ghcr.io/open-webui/open-webui:main +``` + +After installation, you can access Open WebUI at [http://localhost:3000](http://localhost:3000). Enjoy! 😄 #### Open WebUI: Server Connection Error diff --git a/backend/apps/audio/main.py b/backend/apps/audio/main.py index bb3cd053..02d1f5e8 100644 --- a/backend/apps/audio/main.py +++ b/backend/apps/audio/main.py @@ -28,6 +28,7 @@ from config import ( UPLOAD_DIR, WHISPER_MODEL, WHISPER_MODEL_DIR, + DEVICE_TYPE, ) log = logging.getLogger(__name__) @@ -42,6 +43,10 @@ app.add_middleware( allow_headers=["*"], ) +# setting device type for whisper model +whisper_device_type = DEVICE_TYPE if DEVICE_TYPE and DEVICE_TYPE == "cuda" else "cpu" +log.info(f"whisper_device_type: {whisper_device_type}") + @app.post("/transcribe") def transcribe( @@ -66,7 +71,7 @@ def transcribe( model = WhisperModel( WHISPER_MODEL, - device="auto", + device=whisper_device_type, compute_type="int8", download_root=WHISPER_MODEL_DIR, ) diff --git a/backend/apps/ollama/main.py b/backend/apps/ollama/main.py index b89d7bf5..5e19a8e3 100644 --- a/backend/apps/ollama/main.py +++ b/backend/apps/ollama/main.py @@ -215,7 +215,8 @@ async def get_ollama_versions(url_idx: Optional[int] = None): if len(responses) > 0: lowest_version = min( - responses, key=lambda x: tuple(map(int, x["version"].split("."))) + responses, + key=lambda x: tuple(map(int, x["version"].split("-")[0].split("."))), ) return {"version": lowest_version["version"]} diff --git a/backend/apps/rag/main.py b/backend/apps/rag/main.py index 671429bb..f03aa4b7 100644 --- a/backend/apps/rag/main.py +++ b/backend/apps/rag/main.py @@ -13,8 +13,8 @@ import os, shutil, logging, re from pathlib import Path from typing import List -from sentence_transformers import SentenceTransformer from chromadb.utils import embedding_functions +from chromadb.utils.batch_utils import create_batches from langchain_community.document_loaders import ( WebBaseLoader, @@ -45,7 +45,7 @@ from apps.web.models.documents import ( DocumentResponse, ) -from apps.rag.utils import query_doc, query_collection +from apps.rag.utils import query_doc, query_collection, get_embedding_model_path from utils.misc import ( calculate_sha256, @@ -59,7 +59,8 @@ from config import ( UPLOAD_DIR, DOCS_DIR, RAG_EMBEDDING_MODEL, - RAG_EMBEDDING_MODEL_DEVICE_TYPE, + RAG_EMBEDDING_MODEL_AUTO_UPDATE, + DEVICE_TYPE, CHROMA_CLIENT, CHUNK_SIZE, CHUNK_OVERLAP, @@ -71,28 +72,25 @@ from constants import ERROR_MESSAGES log = logging.getLogger(__name__) log.setLevel(SRC_LOG_LEVELS["RAG"]) -# -# if RAG_EMBEDDING_MODEL: -# sentence_transformer_ef = SentenceTransformer( -# model_name_or_path=RAG_EMBEDDING_MODEL, -# cache_folder=RAG_EMBEDDING_MODEL_DIR, -# device=RAG_EMBEDDING_MODEL_DEVICE_TYPE, -# ) - - app = FastAPI() app.state.PDF_EXTRACT_IMAGES = False app.state.CHUNK_SIZE = CHUNK_SIZE app.state.CHUNK_OVERLAP = CHUNK_OVERLAP app.state.RAG_TEMPLATE = RAG_TEMPLATE + + app.state.RAG_EMBEDDING_MODEL = RAG_EMBEDDING_MODEL + + app.state.TOP_K = 4 app.state.sentence_transformer_ef = ( embedding_functions.SentenceTransformerEmbeddingFunction( - model_name=app.state.RAG_EMBEDDING_MODEL, - device=RAG_EMBEDDING_MODEL_DEVICE_TYPE, + model_name=get_embedding_model_path( + app.state.RAG_EMBEDDING_MODEL, RAG_EMBEDDING_MODEL_AUTO_UPDATE + ), + device=DEVICE_TYPE, ) ) @@ -143,18 +141,33 @@ class EmbeddingModelUpdateForm(BaseModel): async def update_embedding_model( form_data: EmbeddingModelUpdateForm, user=Depends(get_admin_user) ): - app.state.RAG_EMBEDDING_MODEL = form_data.embedding_model - app.state.sentence_transformer_ef = ( - embedding_functions.SentenceTransformerEmbeddingFunction( - model_name=app.state.RAG_EMBEDDING_MODEL, - device=RAG_EMBEDDING_MODEL_DEVICE_TYPE, - ) + + log.info( + f"Updating embedding model: {app.state.RAG_EMBEDDING_MODEL} to {form_data.embedding_model}" ) - return { - "status": True, - "embedding_model": app.state.RAG_EMBEDDING_MODEL, - } + try: + sentence_transformer_ef = ( + embedding_functions.SentenceTransformerEmbeddingFunction( + model_name=get_embedding_model_path(form_data.embedding_model, True), + device=DEVICE_TYPE, + ) + ) + + app.state.RAG_EMBEDDING_MODEL = form_data.embedding_model + app.state.sentence_transformer_ef = sentence_transformer_ef + + return { + "status": True, + "embedding_model": app.state.RAG_EMBEDDING_MODEL, + } + + except Exception as e: + log.exception(f"Problem updating embedding model: {e}") + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail=ERROR_MESSAGES.DEFAULT(e), + ) @app.get("/config") @@ -341,9 +354,14 @@ def store_docs_in_vector_db(docs, collection_name, overwrite: bool = False) -> b embedding_function=app.state.sentence_transformer_ef, ) - collection.add( - documents=texts, metadatas=metadatas, ids=[str(uuid.uuid1()) for _ in texts] - ) + for batch in create_batches( + api=CHROMA_CLIENT, + ids=[str(uuid.uuid1()) for _ in texts], + metadatas=metadatas, + documents=texts, + ): + collection.add(*batch) + return True except Exception as e: log.exception(e) diff --git a/backend/apps/rag/utils.py b/backend/apps/rag/utils.py index 7b9e6628..7bbfe0b8 100644 --- a/backend/apps/rag/utils.py +++ b/backend/apps/rag/utils.py @@ -1,6 +1,8 @@ +import os import re import logging from typing import List +from huggingface_hub import snapshot_download from config import SRC_LOG_LEVELS, CHROMA_CLIENT @@ -188,3 +190,43 @@ def rag_messages(docs, messages, template, k, embedding_function): messages[last_user_message_idx] = new_user_message return messages + + +def get_embedding_model_path( + embedding_model: str, update_embedding_model: bool = False +): + # Construct huggingface_hub kwargs with local_files_only to return the snapshot path + cache_dir = os.getenv("SENTENCE_TRANSFORMERS_HOME") + + local_files_only = not update_embedding_model + + snapshot_kwargs = { + "cache_dir": cache_dir, + "local_files_only": local_files_only, + } + + log.debug(f"embedding_model: {embedding_model}") + log.debug(f"snapshot_kwargs: {snapshot_kwargs}") + + # Inspiration from upstream sentence_transformers + if ( + os.path.exists(embedding_model) + or ("\\" in embedding_model or embedding_model.count("/") > 1) + and local_files_only + ): + # If fully qualified path exists, return input, else set repo_id + return embedding_model + elif "/" not in embedding_model: + # Set valid repo_id for model short-name + embedding_model = "sentence-transformers" + "/" + embedding_model + + snapshot_kwargs["repo_id"] = embedding_model + + # Attempt to query the huggingface_hub library to determine the local path and/or to update + try: + embedding_model_repo_path = snapshot_download(**snapshot_kwargs) + log.debug(f"embedding_model_repo_path: {embedding_model_repo_path}") + return embedding_model_repo_path + except Exception as e: + log.exception(f"Cannot determine embedding model snapshot path: {e}") + return embedding_model diff --git a/backend/apps/web/models/auths.py b/backend/apps/web/models/auths.py index 06986503..a97312ff 100644 --- a/backend/apps/web/models/auths.py +++ b/backend/apps/web/models/auths.py @@ -86,6 +86,7 @@ class SignupForm(BaseModel): name: str email: str password: str + profile_image_url: Optional[str] = "/user.png" class AuthsTable: @@ -94,7 +95,12 @@ class AuthsTable: self.db.create_tables([Auth]) def insert_new_auth( - self, email: str, password: str, name: str, role: str = "pending" + self, + email: str, + password: str, + name: str, + profile_image_url: str = "/user.png", + role: str = "pending", ) -> Optional[UserModel]: log.info("insert_new_auth") @@ -105,7 +111,7 @@ class AuthsTable: ) result = Auth.create(**auth.model_dump()) - user = Users.insert_new_user(id, name, email, role) + user = Users.insert_new_user(id, name, email, profile_image_url, role) if result and user: return user diff --git a/backend/apps/web/models/chats.py b/backend/apps/web/models/chats.py index 95a673cb..ef16ce73 100644 --- a/backend/apps/web/models/chats.py +++ b/backend/apps/web/models/chats.py @@ -206,6 +206,18 @@ class ChatTable: except: return None + def get_chat_by_share_id(self, id: str) -> Optional[ChatModel]: + try: + chat = Chat.get(Chat.share_id == id) + + if chat: + chat = Chat.get(Chat.id == id) + return ChatModel(**model_to_dict(chat)) + else: + return None + except: + return None + def get_chat_by_id_and_user_id(self, id: str, user_id: str) -> Optional[ChatModel]: try: chat = Chat.get(Chat.id == id, Chat.user_id == user_id) diff --git a/backend/apps/web/models/users.py b/backend/apps/web/models/users.py index a01e595e..7d1e182d 100644 --- a/backend/apps/web/models/users.py +++ b/backend/apps/web/models/users.py @@ -31,7 +31,7 @@ class UserModel(BaseModel): name: str email: str role: str = "pending" - profile_image_url: str = "/user.png" + profile_image_url: str timestamp: int # timestamp in epoch api_key: Optional[str] = None @@ -59,7 +59,12 @@ class UsersTable: self.db.create_tables([User]) def insert_new_user( - self, id: str, name: str, email: str, role: str = "pending" + self, + id: str, + name: str, + email: str, + profile_image_url: str = "/user.png", + role: str = "pending", ) -> Optional[UserModel]: user = UserModel( **{ @@ -67,7 +72,7 @@ class UsersTable: "name": name, "email": email, "role": role, - "profile_image_url": "/user.png", + "profile_image_url": profile_image_url, "timestamp": int(time.time()), } ) diff --git a/backend/apps/web/routers/auths.py b/backend/apps/web/routers/auths.py index 293cb55b..89d8c1c8 100644 --- a/backend/apps/web/routers/auths.py +++ b/backend/apps/web/routers/auths.py @@ -163,7 +163,11 @@ async def signup(request: Request, form_data: SignupForm): ) hashed = get_password_hash(form_data.password) user = Auths.insert_new_auth( - form_data.email.lower(), hashed, form_data.name, role + form_data.email.lower(), + hashed, + form_data.name, + form_data.profile_image_url, + role, ) if user: diff --git a/backend/apps/web/routers/chats.py b/backend/apps/web/routers/chats.py index 660a0d7f..2e2bb5b0 100644 --- a/backend/apps/web/routers/chats.py +++ b/backend/apps/web/routers/chats.py @@ -251,7 +251,15 @@ async def delete_shared_chat_by_id(id: str, user=Depends(get_current_user)): @router.get("/share/{share_id}", response_model=Optional[ChatResponse]) async def get_shared_chat_by_id(share_id: str, user=Depends(get_current_user)): - chat = Chats.get_chat_by_id(share_id) + if user.role == "pending": + raise HTTPException( + status_code=status.HTTP_401_UNAUTHORIZED, detail=ERROR_MESSAGES.NOT_FOUND + ) + + if user.role == "user": + chat = Chats.get_chat_by_share_id(share_id) + elif user.role == "admin": + chat = Chats.get_chat_by_id(share_id) if chat: return ChatResponse(**{**chat.model_dump(), "chat": json.loads(chat.chat)}) diff --git a/backend/config.py b/backend/config.py index 39411d25..6e3cf92a 100644 --- a/backend/config.py +++ b/backend/config.py @@ -28,8 +28,6 @@ except ImportError: WEBUI_NAME = os.environ.get("WEBUI_NAME", "Open WebUI") WEBUI_FAVICON_URL = "https://openwebui.com/favicon.png" -shutil.copyfile("../build/favicon.png", "./static/favicon.png") - #################################### # ENV (dev,test,prod) #################################### @@ -103,6 +101,26 @@ for version in soup.find_all("h2"): CHANGELOG = changelog_json +#################################### +# DATA/FRONTEND BUILD DIR +#################################### + +DATA_DIR = str(Path(os.getenv("DATA_DIR", "./data")).resolve()) +FRONTEND_BUILD_DIR = str(Path(os.getenv("FRONTEND_BUILD_DIR", "../build"))) + +try: + with open(f"{DATA_DIR}/config.json", "r") as f: + CONFIG_DATA = json.load(f) +except: + CONFIG_DATA = {} + +#################################### +# Static DIR +#################################### + +STATIC_DIR = str(Path(os.getenv("STATIC_DIR", "./static")).resolve()) + +shutil.copyfile(f"{FRONTEND_BUILD_DIR}/favicon.png", f"{STATIC_DIR}/favicon.png") #################################### # LOGGING @@ -165,7 +183,7 @@ if CUSTOM_NAME: r = requests.get(url, stream=True) if r.status_code == 200: - with open("./static/favicon.png", "wb") as f: + with open(f"{STATIC_DIR}/favicon.png", "wb") as f: r.raw.decode_content = True shutil.copyfileobj(r.raw, f) @@ -177,18 +195,6 @@ else: if WEBUI_NAME != "Open WebUI": WEBUI_NAME += " (Open WebUI)" -#################################### -# DATA/FRONTEND BUILD DIR -#################################### - -DATA_DIR = str(Path(os.getenv("DATA_DIR", "./data")).resolve()) -FRONTEND_BUILD_DIR = str(Path(os.getenv("FRONTEND_BUILD_DIR", "../build"))) - -try: - with open(f"{DATA_DIR}/config.json", "r") as f: - CONFIG_DATA = json.load(f) -except: - CONFIG_DATA = {} #################################### # File Upload DIR @@ -257,6 +263,7 @@ OLLAMA_API_BASE_URL = os.environ.get( OLLAMA_BASE_URL = os.environ.get("OLLAMA_BASE_URL", "") K8S_FLAG = os.environ.get("K8S_FLAG", "") +USE_OLLAMA_DOCKER = os.environ.get("USE_OLLAMA_DOCKER", "false") if OLLAMA_BASE_URL == "" and OLLAMA_API_BASE_URL != "": OLLAMA_BASE_URL = ( @@ -266,9 +273,13 @@ if OLLAMA_BASE_URL == "" and OLLAMA_API_BASE_URL != "": ) if ENV == "prod": - if OLLAMA_BASE_URL == "/ollama": - OLLAMA_BASE_URL = "http://host.docker.internal:11434" - + if OLLAMA_BASE_URL == "/ollama" and not K8S_FLAG: + if USE_OLLAMA_DOCKER.lower() == "true": + # if you use all-in-one docker container (Open WebUI + Ollama) + # with the docker build arg USE_OLLAMA=true (--build-arg="USE_OLLAMA=true") this only works with http://localhost:11434 + OLLAMA_BASE_URL = "http://localhost:11434" + else: + OLLAMA_BASE_URL = "http://host.docker.internal:11434" elif K8S_FLAG: OLLAMA_BASE_URL = "http://ollama-service.open-webui.svc.cluster.local:11434" @@ -391,10 +402,22 @@ if WEBUI_AUTH and WEBUI_SECRET_KEY == "": CHROMA_DATA_PATH = f"{DATA_DIR}/vector_db" # this uses the model defined in the Dockerfile ENV variable. If you dont use docker or docker based deployments such as k8s, the default embedding model will be used (all-MiniLM-L6-v2) RAG_EMBEDDING_MODEL = os.environ.get("RAG_EMBEDDING_MODEL", "all-MiniLM-L6-v2") -# device type ebbeding models - "cpu" (default), "cuda" (nvidia gpu required) or "mps" (apple silicon) - choosing this right can lead to better performance -RAG_EMBEDDING_MODEL_DEVICE_TYPE = os.environ.get( - "RAG_EMBEDDING_MODEL_DEVICE_TYPE", "cpu" +log.info(f"Embedding model set: {RAG_EMBEDDING_MODEL}"), + +RAG_EMBEDDING_MODEL_AUTO_UPDATE = ( + os.environ.get("RAG_EMBEDDING_MODEL_AUTO_UPDATE", "").lower() == "true" ) + + +# device type ebbeding models - "cpu" (default), "cuda" (nvidia gpu required) or "mps" (apple silicon) - choosing this right can lead to better performance +USE_CUDA = os.environ.get("USE_CUDA_DOCKER", "false") + +if USE_CUDA.lower() == "true": + DEVICE_TYPE = "cuda" +else: + DEVICE_TYPE = "cpu" + + CHROMA_CLIENT = chromadb.PersistentClient( path=CHROMA_DATA_PATH, settings=Settings(allow_reset=True, anonymized_telemetry=False), diff --git a/backend/main.py b/backend/main.py index f574e7ba..d63847bc 100644 --- a/backend/main.py +++ b/backend/main.py @@ -5,6 +5,7 @@ import time import os import sys import logging +import aiohttp import requests from fastapi import FastAPI, Request, Depends, status @@ -18,6 +19,7 @@ from starlette.middleware.base import BaseHTTPMiddleware from apps.ollama.main import app as ollama_app from apps.openai.main import app as openai_app + from apps.litellm.main import app as litellm_app, startup as litellm_app_startup from apps.audio.main import app as audio_app from apps.images.main import app as images_app @@ -38,6 +40,8 @@ from config import ( VERSION, CHANGELOG, FRONTEND_BUILD_DIR, + CACHE_DIR, + STATIC_DIR, MODEL_FILTER_ENABLED, MODEL_FILTER_LIST, GLOBAL_LOG_LEVEL, @@ -269,14 +273,16 @@ async def get_app_changelog(): @app.get("/api/version/updates") async def get_app_latest_release_version(): try: - response = requests.get( - f"https://api.github.com/repos/open-webui/open-webui/releases/latest" - ) - response.raise_for_status() - latest_version = response.json()["tag_name"] + async with aiohttp.ClientSession() as session: + async with session.get( + "https://api.github.com/repos/open-webui/open-webui/releases/latest" + ) as response: + response.raise_for_status() + data = await response.json() + latest_version = data["tag_name"] - return {"current": VERSION, "latest": latest_version[1:]} - except Exception as e: + return {"current": VERSION, "latest": latest_version[1:]} + except aiohttp.ClientError as e: raise HTTPException( status_code=status.HTTP_503_SERVICE_UNAVAILABLE, detail=ERROR_MESSAGES.RATE_LIMIT_EXCEEDED, @@ -297,9 +303,8 @@ async def get_manifest_json(): } -app.mount("/static", StaticFiles(directory="static"), name="static") -app.mount("/cache", StaticFiles(directory="data/cache"), name="cache") - +app.mount("/static", StaticFiles(directory=STATIC_DIR), name="static") +app.mount("/cache", StaticFiles(directory=CACHE_DIR), name="cache") app.mount( "/", diff --git a/backend/start.sh b/backend/start.sh index f9ed5948..06adf1ff 100755 --- a/backend/start.sh +++ b/backend/start.sh @@ -7,16 +7,26 @@ KEY_FILE=.webui_secret_key PORT="${PORT:-8080}" if test "$WEBUI_SECRET_KEY $WEBUI_JWT_SECRET_KEY" = " "; then - echo No WEBUI_SECRET_KEY provided + echo "No WEBUI_SECRET_KEY provided" if ! [ -e "$KEY_FILE" ]; then - echo Generating WEBUI_SECRET_KEY + echo "Generating WEBUI_SECRET_KEY" # Generate a random value to use as a WEBUI_SECRET_KEY in case the user didn't provide one. - echo $(head -c 12 /dev/random | base64) > $KEY_FILE + echo $(head -c 12 /dev/random | base64) > "$KEY_FILE" fi - echo Loading WEBUI_SECRET_KEY from $KEY_FILE - WEBUI_SECRET_KEY=`cat $KEY_FILE` + echo "Loading WEBUI_SECRET_KEY from $KEY_FILE" + WEBUI_SECRET_KEY=$(cat "$KEY_FILE") fi -WEBUI_SECRET_KEY="$WEBUI_SECRET_KEY" exec uvicorn main:app --host 0.0.0.0 --port "$PORT" --forwarded-allow-ips '*' \ No newline at end of file +if [ "$USE_OLLAMA_DOCKER" = "true" ]; then + echo "USE_OLLAMA is set to true, starting ollama serve." + ollama serve & +fi + +if [ "$USE_CUDA_DOCKER" = "true" ]; then + echo "CUDA is enabled, appending LD_LIBRARY_PATH to include torch/cudnn & cublas libraries." + export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/usr/local/lib/python3.11/site-packages/torch/lib:/usr/local/lib/python3.11/site-packages/nvidia/cudnn/lib" +fi + +WEBUI_SECRET_KEY="$WEBUI_SECRET_KEY" exec uvicorn main:app --host 0.0.0.0 --port "$PORT" --forwarded-allow-ips '*' diff --git a/docker-compose.amdgpu.yaml b/docker-compose.amdgpu.yaml new file mode 100644 index 00000000..7a1295d9 --- /dev/null +++ b/docker-compose.amdgpu.yaml @@ -0,0 +1,8 @@ +services: + ollama: + devices: + - /dev/kfd:/dev/kfd + - /dev/dri:/dev/dri + image: ollama/ollama:${OLLAMA_DOCKER_TAG-rocm} + environment: + - 'HSA_OVERRIDE_GFX_VERSION=${HSA_OVERRIDE_GFX_VERSION-11.0.0}' \ No newline at end of file diff --git a/docker-compose.yaml b/docker-compose.yaml index f69084b8..9daba312 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -8,7 +8,7 @@ services: pull_policy: always tty: true restart: unless-stopped - image: ollama/ollama:latest + image: ollama/ollama:${OLLAMA_DOCKER_TAG-latest} open-webui: build: @@ -16,7 +16,7 @@ services: args: OLLAMA_BASE_URL: '/ollama' dockerfile: Dockerfile - image: ghcr.io/open-webui/open-webui:main + image: ghcr.io/open-webui/open-webui:${WEBUI_DOCKER_TAG-main} container_name: open-webui volumes: - open-webui:/app/backend/data diff --git a/kubernetes/helm/templates/_helpers.tpl b/kubernetes/helm/templates/_helpers.tpl index 3f42735a..0f514523 100644 --- a/kubernetes/helm/templates/_helpers.tpl +++ b/kubernetes/helm/templates/_helpers.tpl @@ -7,8 +7,12 @@ ollama {{- end -}} {{- define "ollama.url" -}} +{{- if .Values.ollama.externalHost }} +{{- printf .Values.ollama.externalHost }} +{{- else }} {{- printf "http://%s.%s.svc.cluster.local:%d/" (include "ollama.name" .) (.Release.Namespace) (.Values.ollama.service.port | int) }} {{- end }} +{{- end }} {{- define "chart.name" -}} {{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} diff --git a/kubernetes/helm/templates/ollama-service.yaml b/kubernetes/helm/templates/ollama-service.yaml index becb6ad2..32c93cae 100644 --- a/kubernetes/helm/templates/ollama-service.yaml +++ b/kubernetes/helm/templates/ollama-service.yaml @@ -1,3 +1,4 @@ +{{- if not .Values.ollama.externalHost }} apiVersion: v1 kind: Service metadata: @@ -19,3 +20,4 @@ spec: port: {{ .port }} targetPort: http {{- end }} +{{- end }} diff --git a/kubernetes/helm/templates/ollama-statefulset.yaml b/kubernetes/helm/templates/ollama-statefulset.yaml index c348b04c..2750956a 100644 --- a/kubernetes/helm/templates/ollama-statefulset.yaml +++ b/kubernetes/helm/templates/ollama-statefulset.yaml @@ -1,3 +1,4 @@ +{{- if not .Values.ollama.externalHost }} apiVersion: apps/v1 kind: StatefulSet metadata: @@ -94,3 +95,4 @@ spec: {{- toYaml . | nindent 8 }} {{- end }} {{- end }} +{{- end }} diff --git a/kubernetes/helm/templates/webui-pvc.yaml b/kubernetes/helm/templates/webui-pvc.yaml index f06454e7..87833245 100644 --- a/kubernetes/helm/templates/webui-pvc.yaml +++ b/kubernetes/helm/templates/webui-pvc.yaml @@ -17,7 +17,9 @@ spec: resources: requests: storage: {{ .Values.webui.persistence.size }} + {{- if .Values.webui.persistence.storageClass }} storageClassName: {{ .Values.webui.persistence.storageClass }} + {{- end }} {{- with .Values.webui.persistence.selector }} selector: {{- toYaml . | nindent 4 }} diff --git a/kubernetes/helm/values.yaml b/kubernetes/helm/values.yaml index 394e5a49..4437973e 100644 --- a/kubernetes/helm/values.yaml +++ b/kubernetes/helm/values.yaml @@ -1,6 +1,7 @@ nameOverride: "" ollama: + externalHost: "" annotations: {} podAnnotations: {} replicaCount: 1 diff --git a/package-lock.json b/package-lock.json index cf149685..7c117e8a 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "open-webui", - "version": "0.1.117", + "version": "0.1.118", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "open-webui", - "version": "0.1.117", + "version": "0.1.118", "dependencies": { "@sveltejs/adapter-node": "^1.3.1", "async": "^3.2.5", @@ -5688,9 +5688,9 @@ } }, "node_modules/undici": { - "version": "5.28.3", - "resolved": "https://registry.npmjs.org/undici/-/undici-5.28.3.tgz", - "integrity": "sha512-3ItfzbrhDlINjaP0duwnNsKpDQk3acHI3gVJ1z4fmwMK31k5G9OVIAMLSIaP6w4FaGkaAkN6zaQO9LUvZ1t7VA==", + "version": "5.28.4", + "resolved": "https://registry.npmjs.org/undici/-/undici-5.28.4.tgz", + "integrity": "sha512-72RFADWFqKmUb2hmmvNODKL3p9hcB6Gt2DOQMis1SEBaV6a4MH8soBvzg+95CYhCKPFedut2JY9bMfrDl9D23g==", "dependencies": { "@fastify/busboy": "^2.0.0" }, diff --git a/package.json b/package.json index aec1bf2a..5f18eef2 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "open-webui", - "version": "0.1.117", + "version": "0.1.118", "private": true, "scripts": { "dev": "vite dev --host", diff --git a/src/lib/apis/auths/index.ts b/src/lib/apis/auths/index.ts index 548a9418..efeeff33 100644 --- a/src/lib/apis/auths/index.ts +++ b/src/lib/apis/auths/index.ts @@ -58,7 +58,12 @@ export const userSignIn = async (email: string, password: string) => { return res; }; -export const userSignUp = async (name: string, email: string, password: string) => { +export const userSignUp = async ( + name: string, + email: string, + password: string, + profile_image_url: string +) => { let error = null; const res = await fetch(`${WEBUI_API_BASE_URL}/auths/signup`, { @@ -69,7 +74,8 @@ export const userSignUp = async (name: string, email: string, password: string) body: JSON.stringify({ name: name, email: email, - password: password + password: password, + profile_image_url: profile_image_url }) }) .then(async (res) => { diff --git a/src/lib/apis/rag/index.ts b/src/lib/apis/rag/index.ts index 668fe227..33c70e2b 100644 --- a/src/lib/apis/rag/index.ts +++ b/src/lib/apis/rag/index.ts @@ -345,3 +345,64 @@ export const resetVectorDB = async (token: string) => { return res; }; + +export const getEmbeddingModel = async (token: string) => { + let error = null; + + const res = await fetch(`${RAG_API_BASE_URL}/embedding/model`, { + method: 'GET', + headers: { + 'Content-Type': 'application/json', + Authorization: `Bearer ${token}` + } + }) + .then(async (res) => { + if (!res.ok) throw await res.json(); + return res.json(); + }) + .catch((err) => { + console.log(err); + error = err.detail; + return null; + }); + + if (error) { + throw error; + } + + return res; +}; + +type EmbeddingModelUpdateForm = { + embedding_model: string; +}; + +export const updateEmbeddingModel = async (token: string, payload: EmbeddingModelUpdateForm) => { + let error = null; + + const res = await fetch(`${RAG_API_BASE_URL}/embedding/model/update`, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + Authorization: `Bearer ${token}` + }, + body: JSON.stringify({ + ...payload + }) + }) + .then(async (res) => { + if (!res.ok) throw await res.json(); + return res.json(); + }) + .catch((err) => { + console.log(err); + error = err.detail; + return null; + }); + + if (error) { + throw error; + } + + return res; +}; diff --git a/src/lib/components/chat/MessageInput.svelte b/src/lib/components/chat/MessageInput.svelte index a23649d8..eff65a25 100644 --- a/src/lib/components/chat/MessageInput.svelte +++ b/src/lib/components/chat/MessageInput.svelte @@ -295,6 +295,13 @@ const dropZone = document.querySelector('body'); + const handleKeyDown = (event: KeyboardEvent) => { + if (event.key === 'Escape') { + console.log('Escape'); + dragged = false; + } + }; + const onDragOver = (e) => { e.preventDefault(); dragged = true; @@ -350,11 +357,15 @@ dragged = false; }; + window.addEventListener('keydown', handleKeyDown); + dropZone?.addEventListener('dragover', onDragOver); dropZone?.addEventListener('drop', onDrop); dropZone?.addEventListener('dragleave', onDragLeave); return () => { + window.removeEventListener('keydown', handleKeyDown); + dropZone?.removeEventListener('dragover', onDragOver); dropZone?.removeEventListener('drop', onDrop); dropZone?.removeEventListener('dragleave', onDragLeave); diff --git a/src/lib/components/chat/Messages.svelte b/src/lib/components/chat/Messages.svelte index 4cd97ca8..89277766 100644 --- a/src/lib/components/chat/Messages.svelte +++ b/src/lib/components/chat/Messages.svelte @@ -107,12 +107,8 @@ await sendPrompt(userPrompt, userMessageId, chatId); }; - const confirmEditResponseMessage = async (messageId, content) => { - history.messages[messageId].originalContent = history.messages[messageId].content; - history.messages[messageId].content = content; - + const updateChatMessages = async () => { await tick(); - await updateChatById(localStorage.token, chatId, { messages: messages, history: history @@ -121,15 +117,20 @@ await chats.set(await getChatList(localStorage.token)); }; - const rateMessage = async (messageId, rating) => { - history.messages[messageId].rating = rating; - await tick(); - await updateChatById(localStorage.token, chatId, { - messages: messages, - history: history - }); + const confirmEditResponseMessage = async (messageId, content) => { + history.messages[messageId].originalContent = history.messages[messageId].content; + history.messages[messageId].content = content; - await chats.set(await getChatList(localStorage.token)); + await updateChatMessages(); + }; + + const rateMessage = async (messageId, rating) => { + history.messages[messageId].annotation = { + ...history.messages[messageId].annotation, + rating: rating + }; + + await updateChatMessages(); }; const showPreviousMessage = async (message) => { @@ -338,6 +339,7 @@ siblings={history.messages[message.parentId]?.childrenIds ?? []} isLastMessage={messageIdx + 1 === messages.length} {readOnly} + {updateChatMessages} {confirmEditResponseMessage} {showPreviousMessage} {showNextMessage} diff --git a/src/lib/components/chat/Messages/RateComment.svelte b/src/lib/components/chat/Messages/RateComment.svelte new file mode 100644 index 00000000..2bdc3d04 --- /dev/null +++ b/src/lib/components/chat/Messages/RateComment.svelte @@ -0,0 +1,117 @@ + + +
+
+
Tell us more:
+ + +
+ + {#if reasons.length > 0} +
+ {#each reasons as reason} + + {/each} +
+ {/if} + +
+