forked from open-webui/open-webui
Merge pull request #1165 from jannikstdl/dockerfile-optimisation
refac: Dockerfile
This commit is contained in:
commit
e844e7f708
7 changed files with 210 additions and 88 deletions
36
.github/workflows/docker-build.yaml
vendored
36
.github/workflows/docker-build.yaml
vendored
|
@ -1,5 +1,4 @@
|
||||||
#
|
name: Create and publish Docker images with specific build args
|
||||||
name: Create and publish a Docker image
|
|
||||||
|
|
||||||
# Configures this workflow to run every time a change is pushed to the branch called `release`.
|
# Configures this workflow to run every time a change is pushed to the branch called `release`.
|
||||||
on:
|
on:
|
||||||
|
@ -24,7 +23,7 @@ jobs:
|
||||||
permissions:
|
permissions:
|
||||||
contents: read
|
contents: read
|
||||||
packages: write
|
packages: write
|
||||||
#
|
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout repository
|
- name: Checkout repository
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
|
@ -42,8 +41,8 @@ jobs:
|
||||||
username: ${{ github.actor }}
|
username: ${{ github.actor }}
|
||||||
password: ${{ secrets.GITHUB_TOKEN }}
|
password: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
|
||||||
- name: Extract metadata for Docker images
|
- name: Extract metadata for Docker images (default latest tag)
|
||||||
id: meta
|
id: meta-latest
|
||||||
uses: docker/metadata-action@v5
|
uses: docker/metadata-action@v5
|
||||||
with:
|
with:
|
||||||
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
|
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
|
||||||
|
@ -54,14 +53,31 @@ jobs:
|
||||||
type=sha,prefix=git-
|
type=sha,prefix=git-
|
||||||
type=semver,pattern={{version}}
|
type=semver,pattern={{version}}
|
||||||
type=semver,pattern={{major}}.{{minor}}
|
type=semver,pattern={{major}}.{{minor}}
|
||||||
flavor: |
|
latest=true
|
||||||
latest=${{ github.ref == 'refs/heads/main' }}
|
|
||||||
|
|
||||||
- name: Build and push Docker image
|
- name: Build and push Docker image (latest)
|
||||||
uses: docker/build-push-action@v5
|
uses: docker/build-push-action@v5
|
||||||
with:
|
with:
|
||||||
context: .
|
context: .
|
||||||
push: true
|
push: true
|
||||||
platforms: linux/amd64,linux/arm64
|
platforms: linux/amd64,linux/arm64
|
||||||
tags: ${{ steps.meta.outputs.tags }}
|
tags: ${{ steps.meta-latest.outputs.tags }}
|
||||||
labels: ${{ steps.meta.outputs.labels }}
|
labels: ${{ steps.meta-latest.outputs.labels }}
|
||||||
|
|
||||||
|
- name: Build and push Docker image with CUDA
|
||||||
|
uses: docker/build-push-action@v5
|
||||||
|
with:
|
||||||
|
context: .
|
||||||
|
push: true
|
||||||
|
platforms: linux/amd64,linux/arm64
|
||||||
|
tags: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:cuda
|
||||||
|
build-args: USE_CUDA=true
|
||||||
|
|
||||||
|
- name: Build and push Docker image with Ollama
|
||||||
|
uses: docker/build-push-action@v5
|
||||||
|
with:
|
||||||
|
context: .
|
||||||
|
push: true
|
||||||
|
platforms: linux/amd64,linux/arm64
|
||||||
|
tags: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:ollama
|
||||||
|
build-args: USE_OLLAMA=true
|
||||||
|
|
135
Dockerfile
135
Dockerfile
|
@ -1,82 +1,111 @@
|
||||||
# syntax=docker/dockerfile:1
|
# syntax=docker/dockerfile:1
|
||||||
|
# Initialize device type args
|
||||||
|
# use build args in the docker build commmand with --build-arg="BUILDARG=true"
|
||||||
|
ARG USE_CUDA=false
|
||||||
|
ARG USE_OLLAMA=false
|
||||||
|
# Tested with cu117 for CUDA 11 and cu121 for CUDA 12 (default)
|
||||||
|
ARG USE_CUDA_VER=cu121
|
||||||
|
# any sentence transformer model; models to use can be found at https://huggingface.co/models?library=sentence-transformers
|
||||||
|
# Leaderboard: https://huggingface.co/spaces/mteb/leaderboard
|
||||||
|
# for better performance and multilangauge support use "intfloat/multilingual-e5-large" (~2.5GB) or "intfloat/multilingual-e5-base" (~1.5GB)
|
||||||
|
# IMPORTANT: If you change the default model (all-MiniLM-L6-v2) and vice versa, you aren't able to use RAG Chat with your previous documents loaded in the WebUI! You need to re-embed them.
|
||||||
|
ARG USE_EMBEDDING_MODEL=all-MiniLM-L6-v2
|
||||||
|
|
||||||
FROM node:alpine as build
|
######## WebUI frontend ########
|
||||||
|
FROM node:21-alpine3.19 as build
|
||||||
|
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
|
|
||||||
# wget embedding model weight from alpine (does not exist from slim-buster)
|
|
||||||
RUN wget "https://chroma-onnx-models.s3.amazonaws.com/all-MiniLM-L6-v2/onnx.tar.gz" -O - | \
|
|
||||||
tar -xzf - -C /app
|
|
||||||
|
|
||||||
COPY package.json package-lock.json ./
|
COPY package.json package-lock.json ./
|
||||||
RUN npm ci
|
RUN npm ci
|
||||||
|
|
||||||
COPY . .
|
COPY . .
|
||||||
RUN npm run build
|
RUN npm run build
|
||||||
|
|
||||||
|
######## WebUI backend ########
|
||||||
FROM python:3.11-slim-bookworm as base
|
FROM python:3.11-slim-bookworm as base
|
||||||
|
|
||||||
ENV ENV=prod
|
# Use args
|
||||||
ENV PORT ""
|
ARG USE_CUDA
|
||||||
|
ARG USE_OLLAMA
|
||||||
|
ARG USE_CUDA_VER
|
||||||
|
ARG USE_EMBEDDING_MODEL
|
||||||
|
|
||||||
ENV OLLAMA_BASE_URL "/ollama"
|
## Basis ##
|
||||||
|
ENV ENV=prod \
|
||||||
|
PORT=8080 \
|
||||||
|
# pass build args to the build
|
||||||
|
USE_OLLAMA_DOCKER=${USE_OLLAMA} \
|
||||||
|
USE_CUDA_DOCKER=${USE_CUDA} \
|
||||||
|
USE_CUDA_DOCKER_VER=${USE_CUDA_VER} \
|
||||||
|
USE_EMBEDDING_MODEL_DOCKER=${USE_EMBEDDING_MODEL}
|
||||||
|
|
||||||
ENV OPENAI_API_BASE_URL ""
|
## Basis URL Config ##
|
||||||
ENV OPENAI_API_KEY ""
|
ENV OLLAMA_BASE_URL="/ollama" \
|
||||||
|
OPENAI_API_BASE_URL=""
|
||||||
|
|
||||||
ENV WEBUI_SECRET_KEY ""
|
## API Key and Security Config ##
|
||||||
ENV WEBUI_AUTH_TRUSTED_EMAIL_HEADER ""
|
ENV OPENAI_API_KEY="" \
|
||||||
|
WEBUI_SECRET_KEY="" \
|
||||||
|
SCARF_NO_ANALYTICS=true \
|
||||||
|
DO_NOT_TRACK=true
|
||||||
|
|
||||||
ENV SCARF_NO_ANALYTICS true
|
#### Other models #########################################################
|
||||||
ENV DO_NOT_TRACK true
|
## whisper TTS model settings ##
|
||||||
|
ENV WHISPER_MODEL="base" \
|
||||||
|
WHISPER_MODEL_DIR="/app/backend/data/cache/whisper/models"
|
||||||
|
|
||||||
# Use locally bundled version of the LiteLLM cost map json
|
## RAG Embedding model settings ##
|
||||||
# to avoid repetitive startup connections
|
ENV RAG_EMBEDDING_MODEL="$USE_EMBEDDING_MODEL_DOCKER" \
|
||||||
ENV LITELLM_LOCAL_MODEL_COST_MAP="True"
|
RAG_EMBEDDING_MODEL_DIR="/app/backend/data/cache/embedding/models" \
|
||||||
|
SENTENCE_TRANSFORMERS_HOME="/app/backend/data/cache/embedding/models"
|
||||||
######## Preloaded models ########
|
#### Other models ##########################################################
|
||||||
# whisper TTS Settings
|
|
||||||
ENV WHISPER_MODEL="base"
|
|
||||||
ENV WHISPER_MODEL_DIR="/app/backend/data/cache/whisper/models"
|
|
||||||
|
|
||||||
# RAG Embedding Model Settings
|
|
||||||
# any sentence transformer model; models to use can be found at https://huggingface.co/models?library=sentence-transformers
|
|
||||||
# Leaderboard: https://huggingface.co/spaces/mteb/leaderboard
|
|
||||||
# for better persormance and multilangauge support use "intfloat/multilingual-e5-large" (~2.5GB) or "intfloat/multilingual-e5-base" (~1.5GB)
|
|
||||||
# IMPORTANT: If you change the default model (all-MiniLM-L6-v2) and vice versa, you aren't able to use RAG Chat with your previous documents loaded in the WebUI! You need to re-embed them.
|
|
||||||
ENV RAG_EMBEDDING_MODEL="all-MiniLM-L6-v2"
|
|
||||||
# device type for whisper tts and embbeding models - "cpu" (default), "cuda" (nvidia gpu and CUDA required) or "mps" (apple silicon) - choosing this right can lead to better performance
|
|
||||||
ENV RAG_EMBEDDING_MODEL_DEVICE_TYPE="cpu"
|
|
||||||
ENV RAG_EMBEDDING_MODEL_DIR="/app/backend/data/cache/embedding/models"
|
|
||||||
ENV SENTENCE_TRANSFORMERS_HOME $RAG_EMBEDDING_MODEL_DIR
|
|
||||||
|
|
||||||
######## Preloaded models ########
|
|
||||||
|
|
||||||
WORKDIR /app/backend
|
WORKDIR /app/backend
|
||||||
|
|
||||||
# install python dependencies
|
# install python dependencies
|
||||||
COPY ./backend/requirements.txt ./requirements.txt
|
COPY ./backend/requirements.txt ./requirements.txt
|
||||||
|
|
||||||
RUN apt-get update && apt-get install ffmpeg libsm6 libxext6 -y
|
RUN if [ "$USE_CUDA" = "true" ]; then \
|
||||||
|
# If you use CUDA the whisper and embedding modell will be downloaded on first use
|
||||||
|
pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/$USE_CUDA_DOCKER_VER --no-cache-dir && \
|
||||||
|
pip3 install -r requirements.txt --no-cache-dir && \
|
||||||
|
python -c "import os; from faster_whisper import WhisperModel; WhisperModel(os.environ['WHISPER_MODEL'], device='cpu', compute_type='int8', download_root=os.environ['WHISPER_MODEL_DIR'])" && \
|
||||||
|
python -c "import os; from chromadb.utils import embedding_functions; sentence_transformer_ef = embedding_functions.SentenceTransformerEmbeddingFunction(model_name=os.environ['RAG_EMBEDDING_MODEL'], device='cpu')"; \
|
||||||
|
else \
|
||||||
|
pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu --no-cache-dir && \
|
||||||
|
pip3 install -r requirements.txt --no-cache-dir && \
|
||||||
|
python -c "import os; from faster_whisper import WhisperModel; WhisperModel(os.environ['WHISPER_MODEL'], device='cpu', compute_type='int8', download_root=os.environ['WHISPER_MODEL_DIR'])" && \
|
||||||
|
python -c "import os; from chromadb.utils import embedding_functions; sentence_transformer_ef = embedding_functions.SentenceTransformerEmbeddingFunction(model_name=os.environ['RAG_EMBEDDING_MODEL'], device='cpu')"; \
|
||||||
|
fi
|
||||||
|
|
||||||
RUN pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu --no-cache-dir
|
|
||||||
RUN pip3 install -r requirements.txt --no-cache-dir
|
|
||||||
|
|
||||||
# Install pandoc and netcat
|
RUN if [ "$USE_OLLAMA" = "true" ]; then \
|
||||||
# RUN python -c "import pypandoc; pypandoc.download_pandoc()"
|
apt-get update && \
|
||||||
RUN apt-get update \
|
# Install pandoc and netcat
|
||||||
&& apt-get install -y pandoc netcat-openbsd \
|
apt-get install -y --no-install-recommends pandoc netcat-openbsd && \
|
||||||
&& rm -rf /var/lib/apt/lists/*
|
# for RAG OCR
|
||||||
|
apt-get install -y --no-install-recommends ffmpeg libsm6 libxext6 && \
|
||||||
|
# install helper tools
|
||||||
|
apt-get install -y --no-install-recommends curl && \
|
||||||
|
# install ollama
|
||||||
|
curl -fsSL https://ollama.com/install.sh | sh && \
|
||||||
|
# cleanup
|
||||||
|
rm -rf /var/lib/apt/lists/*; \
|
||||||
|
else \
|
||||||
|
apt-get update && \
|
||||||
|
# Install pandoc and netcat
|
||||||
|
apt-get install -y --no-install-recommends pandoc netcat-openbsd && \
|
||||||
|
# for RAG OCR
|
||||||
|
apt-get install -y --no-install-recommends ffmpeg libsm6 libxext6 && \
|
||||||
|
# cleanup
|
||||||
|
rm -rf /var/lib/apt/lists/*; \
|
||||||
|
fi
|
||||||
|
|
||||||
|
|
||||||
# preload embedding model
|
|
||||||
RUN python -c "import os; from chromadb.utils import embedding_functions; sentence_transformer_ef = embedding_functions.SentenceTransformerEmbeddingFunction(model_name=os.environ['RAG_EMBEDDING_MODEL'], device=os.environ['RAG_EMBEDDING_MODEL_DEVICE_TYPE'])"
|
|
||||||
# preload tts model
|
|
||||||
RUN python -c "import os; from faster_whisper import WhisperModel; WhisperModel(os.environ['WHISPER_MODEL'], device='auto', compute_type='int8', download_root=os.environ['WHISPER_MODEL_DIR'])"
|
|
||||||
|
|
||||||
# copy embedding weight from build
|
# copy embedding weight from build
|
||||||
RUN mkdir -p /root/.cache/chroma/onnx_models/all-MiniLM-L6-v2
|
# RUN mkdir -p /root/.cache/chroma/onnx_models/all-MiniLM-L6-v2
|
||||||
COPY --from=build /app/onnx /root/.cache/chroma/onnx_models/all-MiniLM-L6-v2/onnx
|
# COPY --from=build /app/onnx /root/.cache/chroma/onnx_models/all-MiniLM-L6-v2/onnx
|
||||||
|
|
||||||
# copy built frontend files
|
# copy built frontend files
|
||||||
COPY --from=build /app/build /app/build
|
COPY --from=build /app/build /app/build
|
||||||
|
@ -86,4 +115,6 @@ COPY --from=build /app/package.json /app/package.json
|
||||||
# copy backend files
|
# copy backend files
|
||||||
COPY ./backend .
|
COPY ./backend .
|
||||||
|
|
||||||
CMD [ "bash", "start.sh"]
|
EXPOSE 8080
|
||||||
|
|
||||||
|
CMD [ "bash", "start.sh"]
|
59
README.md
59
README.md
|
@ -113,6 +113,65 @@ Don't forget to explore our sibling project, [Open WebUI Community](https://open
|
||||||
|
|
||||||
- After installation, you can access Open WebUI at [http://localhost:3000](http://localhost:3000). Enjoy! 😄
|
- After installation, you can access Open WebUI at [http://localhost:3000](http://localhost:3000). Enjoy! 😄
|
||||||
|
|
||||||
|
- **If you want to customize your build with additional args**, use this commands:
|
||||||
|
|
||||||
|
> [!NOTE]
|
||||||
|
> If you only want to use Open WebUI with Ollama included or CUDA acelleration it's recomented to use our official images with the tags :cuda or :with-ollama
|
||||||
|
> If you want a combination of both or more customisation options like a different embedding model and/or CUDA version you need to build the image yourself following the instructions below.
|
||||||
|
|
||||||
|
**For the build:**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker build -t open-webui
|
||||||
|
```
|
||||||
|
|
||||||
|
Optional build ARGS (use them in the docker build command below if needed):
|
||||||
|
|
||||||
|
e.g.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
--build-arg="USE_EMBEDDING_MODEL=intfloat/multilingual-e5-large"
|
||||||
|
```
|
||||||
|
|
||||||
|
For "intfloat/multilingual-e5-large" custom embedding model (default is all-MiniLM-L6-v2), only works with [sentence transforer models](https://huggingface.co/models?library=sentence-transformers). Current [Leaderbord](https://huggingface.co/spaces/mteb/leaderboard) of embedding models.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
--build-arg="USE_OLLAMA=true"
|
||||||
|
```
|
||||||
|
|
||||||
|
For including ollama in the image.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
--build-arg="USE_CUDA=true"
|
||||||
|
```
|
||||||
|
|
||||||
|
To use CUDA exeleration for the embedding and whisper models.
|
||||||
|
|
||||||
|
> [!NOTE]
|
||||||
|
> You need to install the [Nvidia CUDA container toolkit](https://docs.nvidia.com/dgx/nvidia-container-runtime-upgrade/) on your machine to be able to set CUDA as the Docker engine. Only works with Linux - use WSL for Windows!
|
||||||
|
|
||||||
|
```bash
|
||||||
|
--build-arg="USE_CUDA_VER=cu117"
|
||||||
|
```
|
||||||
|
|
||||||
|
For CUDA 11 (default is CUDA 12)
|
||||||
|
|
||||||
|
**To run the image:**
|
||||||
|
|
||||||
|
- **If you DID NOT use the USE_CUDA=true build ARG**, use this command:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker run -d -p 3000:8080 -v open-webui:/app/backend/data --name open-webui --restart always ghcr.io/open-webui/open-webui:main
|
||||||
|
```
|
||||||
|
|
||||||
|
- **If you DID use the USE_CUDA=true build ARG**, use this command:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker run --gpus all -d -p 3000:8080 -v open-webui:/app/backend/data --name open-webui --restart always ghcr.io/open-webui/open-webui:main
|
||||||
|
```
|
||||||
|
|
||||||
|
- After installation, you can access Open WebUI at [http://localhost:3000](http://localhost:3000). Enjoy! 😄
|
||||||
|
|
||||||
#### Open WebUI: Server Connection Error
|
#### Open WebUI: Server Connection Error
|
||||||
|
|
||||||
If you're experiencing connection issues, it’s often due to the WebUI docker container not being able to reach the Ollama server at 127.0.0.1:11434 (host.docker.internal:11434) inside the container . Use the `--network=host` flag in your docker command to resolve this. Note that the port changes from 3000 to 8080, resulting in the link: `http://localhost:8080`.
|
If you're experiencing connection issues, it’s often due to the WebUI docker container not being able to reach the Ollama server at 127.0.0.1:11434 (host.docker.internal:11434) inside the container . Use the `--network=host` flag in your docker command to resolve this. Note that the port changes from 3000 to 8080, resulting in the link: `http://localhost:8080`.
|
||||||
|
|
|
@ -28,6 +28,7 @@ from config import (
|
||||||
UPLOAD_DIR,
|
UPLOAD_DIR,
|
||||||
WHISPER_MODEL,
|
WHISPER_MODEL,
|
||||||
WHISPER_MODEL_DIR,
|
WHISPER_MODEL_DIR,
|
||||||
|
DEVICE_TYPE,
|
||||||
)
|
)
|
||||||
|
|
||||||
log = logging.getLogger(__name__)
|
log = logging.getLogger(__name__)
|
||||||
|
@ -42,6 +43,10 @@ app.add_middleware(
|
||||||
allow_headers=["*"],
|
allow_headers=["*"],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# setting device type for whisper model
|
||||||
|
whisper_device_type = DEVICE_TYPE if DEVICE_TYPE and DEVICE_TYPE == "cuda" else "cpu"
|
||||||
|
log.info(f"whisper_device_type: {whisper_device_type}")
|
||||||
|
|
||||||
|
|
||||||
@app.post("/transcribe")
|
@app.post("/transcribe")
|
||||||
def transcribe(
|
def transcribe(
|
||||||
|
@ -66,7 +71,7 @@ def transcribe(
|
||||||
|
|
||||||
model = WhisperModel(
|
model = WhisperModel(
|
||||||
WHISPER_MODEL,
|
WHISPER_MODEL,
|
||||||
device="auto",
|
device=whisper_device_type,
|
||||||
compute_type="int8",
|
compute_type="int8",
|
||||||
download_root=WHISPER_MODEL_DIR,
|
download_root=WHISPER_MODEL_DIR,
|
||||||
)
|
)
|
||||||
|
|
|
@ -59,7 +59,7 @@ from config import (
|
||||||
UPLOAD_DIR,
|
UPLOAD_DIR,
|
||||||
DOCS_DIR,
|
DOCS_DIR,
|
||||||
RAG_EMBEDDING_MODEL,
|
RAG_EMBEDDING_MODEL,
|
||||||
RAG_EMBEDDING_MODEL_DEVICE_TYPE,
|
DEVICE_TYPE,
|
||||||
CHROMA_CLIENT,
|
CHROMA_CLIENT,
|
||||||
CHUNK_SIZE,
|
CHUNK_SIZE,
|
||||||
CHUNK_OVERLAP,
|
CHUNK_OVERLAP,
|
||||||
|
@ -71,15 +71,6 @@ from constants import ERROR_MESSAGES
|
||||||
log = logging.getLogger(__name__)
|
log = logging.getLogger(__name__)
|
||||||
log.setLevel(SRC_LOG_LEVELS["RAG"])
|
log.setLevel(SRC_LOG_LEVELS["RAG"])
|
||||||
|
|
||||||
#
|
|
||||||
# if RAG_EMBEDDING_MODEL:
|
|
||||||
# sentence_transformer_ef = SentenceTransformer(
|
|
||||||
# model_name_or_path=RAG_EMBEDDING_MODEL,
|
|
||||||
# cache_folder=RAG_EMBEDDING_MODEL_DIR,
|
|
||||||
# device=RAG_EMBEDDING_MODEL_DEVICE_TYPE,
|
|
||||||
# )
|
|
||||||
|
|
||||||
|
|
||||||
app = FastAPI()
|
app = FastAPI()
|
||||||
|
|
||||||
app.state.PDF_EXTRACT_IMAGES = False
|
app.state.PDF_EXTRACT_IMAGES = False
|
||||||
|
@ -92,7 +83,7 @@ app.state.TOP_K = 4
|
||||||
app.state.sentence_transformer_ef = (
|
app.state.sentence_transformer_ef = (
|
||||||
embedding_functions.SentenceTransformerEmbeddingFunction(
|
embedding_functions.SentenceTransformerEmbeddingFunction(
|
||||||
model_name=app.state.RAG_EMBEDDING_MODEL,
|
model_name=app.state.RAG_EMBEDDING_MODEL,
|
||||||
device=RAG_EMBEDDING_MODEL_DEVICE_TYPE,
|
device=DEVICE_TYPE,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -147,10 +138,9 @@ async def update_embedding_model(
|
||||||
app.state.sentence_transformer_ef = (
|
app.state.sentence_transformer_ef = (
|
||||||
embedding_functions.SentenceTransformerEmbeddingFunction(
|
embedding_functions.SentenceTransformerEmbeddingFunction(
|
||||||
model_name=app.state.RAG_EMBEDDING_MODEL,
|
model_name=app.state.RAG_EMBEDDING_MODEL,
|
||||||
device=RAG_EMBEDDING_MODEL_DEVICE_TYPE,
|
device=DEVICE_TYPE,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"status": True,
|
"status": True,
|
||||||
"embedding_model": app.state.RAG_EMBEDDING_MODEL,
|
"embedding_model": app.state.RAG_EMBEDDING_MODEL,
|
||||||
|
|
|
@ -257,6 +257,7 @@ OLLAMA_API_BASE_URL = os.environ.get(
|
||||||
|
|
||||||
OLLAMA_BASE_URL = os.environ.get("OLLAMA_BASE_URL", "")
|
OLLAMA_BASE_URL = os.environ.get("OLLAMA_BASE_URL", "")
|
||||||
K8S_FLAG = os.environ.get("K8S_FLAG", "")
|
K8S_FLAG = os.environ.get("K8S_FLAG", "")
|
||||||
|
USE_OLLAMA_DOCKER = os.environ.get("USE_OLLAMA_DOCKER", "false")
|
||||||
|
|
||||||
if OLLAMA_BASE_URL == "" and OLLAMA_API_BASE_URL != "":
|
if OLLAMA_BASE_URL == "" and OLLAMA_API_BASE_URL != "":
|
||||||
OLLAMA_BASE_URL = (
|
OLLAMA_BASE_URL = (
|
||||||
|
@ -266,9 +267,13 @@ if OLLAMA_BASE_URL == "" and OLLAMA_API_BASE_URL != "":
|
||||||
)
|
)
|
||||||
|
|
||||||
if ENV == "prod":
|
if ENV == "prod":
|
||||||
if OLLAMA_BASE_URL == "/ollama":
|
if OLLAMA_BASE_URL == "/ollama" and not K8S_FLAG:
|
||||||
OLLAMA_BASE_URL = "http://host.docker.internal:11434"
|
if USE_OLLAMA_DOCKER.lower() == "true":
|
||||||
|
# if you use all-in-one docker container (Open WebUI + Ollama)
|
||||||
|
# with the docker build arg USE_OLLAMA=true (--build-arg="USE_OLLAMA=true") this only works with http://localhost:11434
|
||||||
|
OLLAMA_BASE_URL = "http://localhost:11434"
|
||||||
|
else:
|
||||||
|
OLLAMA_BASE_URL = "http://host.docker.internal:11434"
|
||||||
elif K8S_FLAG:
|
elif K8S_FLAG:
|
||||||
OLLAMA_BASE_URL = "http://ollama-service.open-webui.svc.cluster.local:11434"
|
OLLAMA_BASE_URL = "http://ollama-service.open-webui.svc.cluster.local:11434"
|
||||||
|
|
||||||
|
@ -391,10 +396,16 @@ if WEBUI_AUTH and WEBUI_SECRET_KEY == "":
|
||||||
CHROMA_DATA_PATH = f"{DATA_DIR}/vector_db"
|
CHROMA_DATA_PATH = f"{DATA_DIR}/vector_db"
|
||||||
# this uses the model defined in the Dockerfile ENV variable. If you dont use docker or docker based deployments such as k8s, the default embedding model will be used (all-MiniLM-L6-v2)
|
# this uses the model defined in the Dockerfile ENV variable. If you dont use docker or docker based deployments such as k8s, the default embedding model will be used (all-MiniLM-L6-v2)
|
||||||
RAG_EMBEDDING_MODEL = os.environ.get("RAG_EMBEDDING_MODEL", "all-MiniLM-L6-v2")
|
RAG_EMBEDDING_MODEL = os.environ.get("RAG_EMBEDDING_MODEL", "all-MiniLM-L6-v2")
|
||||||
|
log.info(f"Embedding model set: {RAG_EMBEDDING_MODEL}"),
|
||||||
# device type ebbeding models - "cpu" (default), "cuda" (nvidia gpu required) or "mps" (apple silicon) - choosing this right can lead to better performance
|
# device type ebbeding models - "cpu" (default), "cuda" (nvidia gpu required) or "mps" (apple silicon) - choosing this right can lead to better performance
|
||||||
RAG_EMBEDDING_MODEL_DEVICE_TYPE = os.environ.get(
|
USE_CUDA = os.environ.get("USE_CUDA_DOCKER", "false")
|
||||||
"RAG_EMBEDDING_MODEL_DEVICE_TYPE", "cpu"
|
|
||||||
)
|
if USE_CUDA.lower() == "true":
|
||||||
|
DEVICE_TYPE = "cuda"
|
||||||
|
else:
|
||||||
|
DEVICE_TYPE = "cpu"
|
||||||
|
|
||||||
|
|
||||||
CHROMA_CLIENT = chromadb.PersistentClient(
|
CHROMA_CLIENT = chromadb.PersistentClient(
|
||||||
path=CHROMA_DATA_PATH,
|
path=CHROMA_DATA_PATH,
|
||||||
settings=Settings(allow_reset=True, anonymized_telemetry=False),
|
settings=Settings(allow_reset=True, anonymized_telemetry=False),
|
||||||
|
|
|
@ -7,16 +7,26 @@ KEY_FILE=.webui_secret_key
|
||||||
|
|
||||||
PORT="${PORT:-8080}"
|
PORT="${PORT:-8080}"
|
||||||
if test "$WEBUI_SECRET_KEY $WEBUI_JWT_SECRET_KEY" = " "; then
|
if test "$WEBUI_SECRET_KEY $WEBUI_JWT_SECRET_KEY" = " "; then
|
||||||
echo No WEBUI_SECRET_KEY provided
|
echo "No WEBUI_SECRET_KEY provided"
|
||||||
|
|
||||||
if ! [ -e "$KEY_FILE" ]; then
|
if ! [ -e "$KEY_FILE" ]; then
|
||||||
echo Generating WEBUI_SECRET_KEY
|
echo "Generating WEBUI_SECRET_KEY"
|
||||||
# Generate a random value to use as a WEBUI_SECRET_KEY in case the user didn't provide one.
|
# Generate a random value to use as a WEBUI_SECRET_KEY in case the user didn't provide one.
|
||||||
echo $(head -c 12 /dev/random | base64) > $KEY_FILE
|
echo $(head -c 12 /dev/random | base64) > "$KEY_FILE"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
echo Loading WEBUI_SECRET_KEY from $KEY_FILE
|
echo "Loading WEBUI_SECRET_KEY from $KEY_FILE"
|
||||||
WEBUI_SECRET_KEY=`cat $KEY_FILE`
|
WEBUI_SECRET_KEY=$(cat "$KEY_FILE")
|
||||||
fi
|
fi
|
||||||
|
|
||||||
WEBUI_SECRET_KEY="$WEBUI_SECRET_KEY" exec uvicorn main:app --host 0.0.0.0 --port "$PORT" --forwarded-allow-ips '*'
|
if [ "$USE_OLLAMA_DOCKER" = "true" ]; then
|
||||||
|
echo "USE_OLLAMA is set to true, starting ollama serve."
|
||||||
|
ollama serve &
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ "$USE_CUDA_DOCKER" = "true" ]; then
|
||||||
|
echo "CUDA is enabled, appending LD_LIBRARY_PATH to include torch/cudnn & cublas libraries."
|
||||||
|
export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/usr/local/lib/python3.11/site-packages/torch/lib:/usr/local/lib/python3.11/site-packages/nvidia/cudnn/lib"
|
||||||
|
fi
|
||||||
|
|
||||||
|
WEBUI_SECRET_KEY="$WEBUI_SECRET_KEY" exec uvicorn main:app --host 0.0.0.0 --port "$PORT" --forwarded-allow-ips '*'
|
||||||
|
|
Loading…
Reference in a new issue