open-webui/Dockerfile

# syntax=docker/dockerfile:1
# Initialize device type args
ARG USE_CUDA=false
ARG USE_MPS=false
ARG INCLUDE_OLLAMA=false

######## WebUI frontend ########
FROM node:21-alpine3.19 as build

WORKDIR /app

#RUN apt-get update \ 
#    && apt-get install -y --no-install-recommends wget \ 
#    # cleanup
#    && rm -rf /var/lib/apt/lists/*

# wget embedding model weight from alpine (does not exist from slim-buster)
#RUN wget "https://chroma-onnx-models.s3.amazonaws.com/all-MiniLM-L6-v2/onnx.tar.gz" -O - | \
#    tar -xzf - -C /app

COPY package.json package-lock.json ./
RUN npm ci

COPY . .
RUN npm run build

######## WebUI backend ########
FROM python:3.11-slim-bookworm as base

# Use args
ARG USE_CUDA
ARG USE_MPS
ARG INCLUDE_OLLAMA

## Basis ##
ENV ENV=prod \
    PORT=8080 \
    INCLUDE_OLLAMA_ENV=${INCLUDE_OLLAMA}

## Basis URL Config ##
ENV OLLAMA_BASE_URL="/ollama" \
    OPENAI_API_BASE_URL=""

## API Key and Security Config ##
ENV OPENAI_API_KEY="" \
    WEBUI_SECRET_KEY="" \
    SCARF_NO_ANALYTICS=true \
    DO_NOT_TRACK=true

#### Preloaded models #########################################################
## whisper TTS Settings ##
ENV WHISPER_MODEL="base" \
    WHISPER_MODEL_DIR="/app/backend/data/cache/whisper/models"

## RAG Embedding Model Settings ##
# any sentence transformer model; models to use can be found at https://huggingface.co/models?library=sentence-transformers
# Leaderboard: https://huggingface.co/spaces/mteb/leaderboard 
# for better performance and multilangauge support use "intfloat/multilingual-e5-large" (~2.5GB) or "intfloat/multilingual-e5-base" (~1.5GB)
# IMPORTANT: If you change the default model (all-MiniLM-L6-v2) and vice versa, you aren't able to use RAG Chat with your previous documents loaded in the WebUI! You need to re-embed them.
ENV RAG_EMBEDDING_MODEL="all-MiniLM-L6-v2" \
    RAG_EMBEDDING_MODEL_DIR="/app/backend/data/cache/embedding/models" \
    SENTENCE_TRANSFORMERS_HOME="/app/backend/data/cache/embedding/models" \
    # device type for whisper tts and embbeding models - "cpu" (default) or "mps" (apple silicon) - choosing this right can lead to better performance
    # Important:
    #  If you want to use CUDA you need to install the nvidia-container-toolkit (https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html) 
    #  you can set this to "cuda" but its recomended to use --build-arg CUDA_ENABLED=true flag when building the image
    RAG_EMBEDDING_MODEL_DEVICE_TYPE="cpu" \
    DEVICE_COMPUTE_TYPE="int8"
# device type for whisper tts and embbeding models - "cpu" (default), "cuda" (nvidia gpu and CUDA required) or "mps" (apple silicon) - choosing this right can lead to better performance
#### Preloaded models ##########################################################

WORKDIR /app/backend
# install python dependencies
COPY ./backend/requirements.txt ./requirements.txt

RUN if [ "$USE_CUDA" = "true" ]; then \
        export DEVICE_TYPE="cuda" && \
        pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu117 --no-cache-dir && \
        pip3 install -r requirements.txt --no-cache-dir; \
    elif [ "$USE_MPS" = "true" ]; then \
        export DEVICE_TYPE="mps" && \
        pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu --no-cache-dir && \
        pip3 install -r requirements.txt --no-cache-dir && \
        python -c "import os; from faster_whisper import WhisperModel; WhisperModel(os.environ['WHISPER_MODEL'], device='cpu', compute_type='int8', download_root=os.environ['WHISPER_MODEL_DIR'])" && \
        python -c "import os; from chromadb.utils import embedding_functions; sentence_transformer_ef = embedding_functions.SentenceTransformerEmbeddingFunction(model_name=os.environ['RAG_EMBEDDING_MODEL'], device=os.environ['DEVICE_TYPE'])"; \
    else \
        export DEVICE_TYPE="cpu" && \
        pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu --no-cache-dir && \
        pip3 install -r requirements.txt --no-cache-dir && \
        python -c "import os; from faster_whisper import WhisperModel; WhisperModel(os.environ['WHISPER_MODEL'], device='cpu', compute_type='int8', download_root=os.environ['WHISPER_MODEL_DIR'])" && \
        python -c "import os; from chromadb.utils import embedding_functions; sentence_transformer_ef = embedding_functions.SentenceTransformerEmbeddingFunction(model_name=os.environ['RAG_EMBEDDING_MODEL'], device=os.environ['DEVICE_TYPE'])"; \
    fi


RUN if [ "$INCLUDE_OLLAMA" = "true" ]; then \
        apt-get update && \
        # Install pandoc and netcat
        apt-get install -y --no-install-recommends pandoc netcat-openbsd && \
        # for RAG OCR
        apt-get install -y --no-install-recommends ffmpeg libsm6 libxext6 && \
        # install helper tools
        apt-get install -y --no-install-recommends curl && \
        # install ollama
        curl -fsSL https://ollama.com/install.sh | sh && \
        # cleanup
        rm -rf /var/lib/apt/lists/*; \
    else \
        apt-get update && \
        # Install pandoc and netcat
        apt-get install -y --no-install-recommends pandoc netcat-openbsd && \
        # for RAG OCR
        apt-get install -y --no-install-recommends ffmpeg libsm6 libxext6 && \
        # cleanup
        rm -rf /var/lib/apt/lists/*; \
    fi


# copy embedding weight from build
# RUN mkdir -p /root/.cache/chroma/onnx_models/all-MiniLM-L6-v2
# COPY --from=build /app/onnx /root/.cache/chroma/onnx_models/all-MiniLM-L6-v2/onnx

# copy built frontend files
COPY --from=build /app/build /app/build
COPY --from=build /app/CHANGELOG.md /app/CHANGELOG.md
COPY --from=build /app/package.json /app/package.json

# copy backend files
COPY ./backend .

EXPOSE 8080

CMD [ "bash", "start.sh"]
chat feature added 2023-10-09 00:38:42 +02:00			`# syntax=docker/dockerfile:1`
docker improvements & changed universal device type env for different models used 2024-03-20 08:44:09 +01:00			`# Initialize device type args`
			`ARG USE_CUDA=false`
			`ARG USE_MPS=false`
All in one Dockerfile for including Ollama 2024-03-22 09:31:35 +01:00			`ARG INCLUDE_OLLAMA=false`
chat feature added 2023-10-09 00:38:42 +02:00
Dockerfile optimisation 2024-03-14 11:18:27 +01:00			`######## WebUI frontend ########`
Update Dockerfile 2024-03-16 12:43:48 +01:00			`FROM node:21-alpine3.19 as build`
feat: enable buildtime API_ENDPOINT env var 2023-10-22 01:14:12 +02:00
feat: backend reverse proxy 2023-11-15 01:28:51 +01:00			`WORKDIR /app`

changed from bullseye to bookworm + removed unused steps 2024-03-14 11:33:54 +01:00			`#RUN apt-get update \`
			`# && apt-get install -y --no-install-recommends wget \`
			`# # cleanup`
			`# && rm -rf /var/lib/apt/lists/*`
Dockerfile optimisation 2024-03-14 11:18:27 +01:00
fix: docker 2024-01-08 05:50:09 +01:00			`# wget embedding model weight from alpine (does not exist from slim-buster)`
Dockerfile optimisation 2024-03-14 11:18:27 +01:00			`#RUN wget "https://chroma-onnx-models.s3.amazonaws.com/all-MiniLM-L6-v2/onnx.tar.gz" -O - \| \`
			`# tar -xzf - -C /app`
fix: docker 2024-01-08 05:50:09 +01:00
Reduce container image size Signed-off-by: Xiaodong Ye <yeahdongcn@gmail.com> 2024-01-25 11:08:35 +01:00			`COPY package.json package-lock.json ./`
fix: revert to node for container image 2024-01-05 06:15:13 +01:00			`RUN npm ci`
refactor: :recycle: bun-based dockerfile 2024-01-05 05:32:51 +01:00
fix: revert to node for container image 2024-01-05 06:15:13 +01:00			`COPY . .`
			`RUN npm run build`
chat feature added 2023-10-09 00:38:42 +02:00
Dockerfile optimisation 2024-03-14 11:18:27 +01:00			`######## WebUI backend ########`
docker: slim 2024-01-07 17:28:35 +01:00			`FROM python:3.11-slim-bookworm as base`
feat: backend reverse proxy 2023-11-15 01:28:51 +01:00
docker improvements & changed universal device type env for different models used 2024-03-20 08:44:09 +01:00			`# Use args`
			`ARG USE_CUDA`
			`ARG USE_MPS`
All in one Dockerfile for including Ollama 2024-03-22 09:31:35 +01:00			`ARG INCLUDE_OLLAMA`
docker improvements & changed universal device type env for different models used 2024-03-20 08:44:09 +01:00
Dockerfile optimisation 2024-03-14 11:18:27 +01:00			`## Basis ##`
			`ENV ENV=prod \`
All in one Dockerfile for including Ollama 2024-03-22 09:31:35 +01:00			`PORT=8080 \`
			`INCLUDE_OLLAMA_ENV=${INCLUDE_OLLAMA}`
chore: dockerfile update 2024-01-05 03:55:15 +01:00
Dockerfile optimisation 2024-03-14 11:18:27 +01:00			`## Basis URL Config ##`
			`ENV OLLAMA_BASE_URL="/ollama" \`
			`OPENAI_API_BASE_URL=""`
chore: dockerfile update 2024-01-05 03:55:15 +01:00
Dockerfile optimisation 2024-03-14 11:18:27 +01:00			`## API Key and Security Config ##`
			`ENV OPENAI_API_KEY="" \`
			`WEBUI_SECRET_KEY="" \`
			`SCARF_NO_ANALYTICS=true \`
			`DO_NOT_TRACK=true`
feat: backend reverse proxy 2023-11-15 01:28:51 +01:00
cuda support 2024-03-18 17:08:34 +01:00			`#### Preloaded models #########################################################`
Dockerfile optimisation 2024-03-14 11:18:27 +01:00			`## whisper TTS Settings ##`
			`ENV WHISPER_MODEL="base" \`
			`WHISPER_MODEL_DIR="/app/backend/data/cache/whisper/models"`
feat: disable tracking from unstructured 2024-02-10 03:11:01 +01:00
Dockerfile optimisation 2024-03-14 11:18:27 +01:00			`## RAG Embedding Model Settings ##`
choose embedding model when using docker 2024-02-17 19:38:29 +01:00			`# any sentence transformer model; models to use can be found at https://huggingface.co/models?library=sentence-transformers`
			`# Leaderboard: https://huggingface.co/spaces/mteb/leaderboard`
Fix typo in Dockerfile comment for model recommendation Okay, this was driving my OCD crazy. Corrected a spelling error in the Dockerfile's comment section to enhance documentation clarity. The typo 'persormance' was updated to 'performance,' ensuring accurate guidance on using multilingual sentence transformer models for better performance and language support. 2024-03-20 23:28:57 +01:00			`# for better performance and multilangauge support use "intfloat/multilingual-e5-large" (~2.5GB) or "intfloat/multilingual-e5-base" (~1.5GB)`
choose embedding model when using docker 2024-02-17 19:38:29 +01:00			`# IMPORTANT: If you change the default model (all-MiniLM-L6-v2) and vice versa, you aren't able to use RAG Chat with your previous documents loaded in the WebUI! You need to re-embed them.`
Dockerfile optimisation 2024-03-14 11:18:27 +01:00			`ENV RAG_EMBEDDING_MODEL="all-MiniLM-L6-v2" \`
			`RAG_EMBEDDING_MODEL_DIR="/app/backend/data/cache/embedding/models" \`
cuda support 2024-03-18 17:08:34 +01:00			`SENTENCE_TRANSFORMERS_HOME="/app/backend/data/cache/embedding/models" \`
			`# device type for whisper tts and embbeding models - "cpu" (default) or "mps" (apple silicon) - choosing this right can lead to better performance`
			`# Important:`
			`# If you want to use CUDA you need to install the nvidia-container-toolkit (https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html)`
			`# you can set this to "cuda" but its recomended to use --build-arg CUDA_ENABLED=true flag when building the image`
docker improvements & changed universal device type env for different models used 2024-03-20 08:44:09 +01:00			`RAG_EMBEDDING_MODEL_DEVICE_TYPE="cpu" \`
			`DEVICE_COMPUTE_TYPE="int8"`
cuda support 2024-03-18 17:08:34 +01:00			`# device type for whisper tts and embbeding models - "cpu" (default), "cuda" (nvidia gpu and CUDA required) or "mps" (apple silicon) - choosing this right can lead to better performance`
Dockerfile optimisation 2024-03-14 11:18:27 +01:00			`#### Preloaded models ##########################################################`
choose embedding model when using docker 2024-02-17 19:38:29 +01:00
feat: backend reverse proxy 2023-11-15 01:28:51 +01:00			`WORKDIR /app/backend`
chore: optimize dockerfile order for caching 2024-01-23 13:48:27 +01:00			`# install python dependencies`
feat: backend reverse proxy 2023-11-15 01:28:51 +01:00			`COPY ./backend/requirements.txt ./requirements.txt`
fix: dockerfile 2024-01-08 06:22:37 +01:00
docker improvements & changed universal device type env for different models used 2024-03-20 08:44:09 +01:00			`RUN if [ "$USE_CUDA" = "true" ]; then \`
			`export DEVICE_TYPE="cuda" && \`
			`pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu117 --no-cache-dir && \`
			`pip3 install -r requirements.txt --no-cache-dir; \`
			`elif [ "$USE_MPS" = "true" ]; then \`
			`export DEVICE_TYPE="mps" && \`
			`pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu --no-cache-dir && \`
			`pip3 install -r requirements.txt --no-cache-dir && \`
			`python -c "import os; from faster_whisper import WhisperModel; WhisperModel(os.environ['WHISPER_MODEL'], device='cpu', compute_type='int8', download_root=os.environ['WHISPER_MODEL_DIR'])" && \`
			`python -c "import os; from chromadb.utils import embedding_functions; sentence_transformer_ef = embedding_functions.SentenceTransformerEmbeddingFunction(model_name=os.environ['RAG_EMBEDDING_MODEL'], device=os.environ['DEVICE_TYPE'])"; \`
cuda support 2024-03-18 17:08:34 +01:00			`else \`
docker improvements & changed universal device type env for different models used 2024-03-20 08:44:09 +01:00			`export DEVICE_TYPE="cpu" && \`
cuda support 2024-03-18 17:08:34 +01:00			`pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu --no-cache-dir && \`
docker improvements & changed universal device type env for different models used 2024-03-20 08:44:09 +01:00			`pip3 install -r requirements.txt --no-cache-dir && \`
			`python -c "import os; from faster_whisper import WhisperModel; WhisperModel(os.environ['WHISPER_MODEL'], device='cpu', compute_type='int8', download_root=os.environ['WHISPER_MODEL_DIR'])" && \`
			`python -c "import os; from chromadb.utils import embedding_functions; sentence_transformer_ef = embedding_functions.SentenceTransformerEmbeddingFunction(model_name=os.environ['RAG_EMBEDDING_MODEL'], device=os.environ['DEVICE_TYPE'])"; \`
cuda support 2024-03-18 17:08:34 +01:00			`fi`

All in one Dockerfile for including Ollama 2024-03-22 09:31:35 +01:00
			`RUN if [ "$INCLUDE_OLLAMA" = "true" ]; then \`
			`apt-get update && \`
			`# Install pandoc and netcat`
			`apt-get install -y --no-install-recommends pandoc netcat-openbsd && \`
			`# for RAG OCR`
			`apt-get install -y --no-install-recommends ffmpeg libsm6 libxext6 && \`
			`# install helper tools`
			`apt-get install -y --no-install-recommends curl && \`
			`# install ollama`
			`curl -fsSL https://ollama.com/install.sh \| sh && \`
			`# cleanup`
			`rm -rf /var/lib/apt/lists/*; \`
			`else \`
			`apt-get update && \`
			`# Install pandoc and netcat`
			`apt-get install -y --no-install-recommends pandoc netcat-openbsd && \`
			`# for RAG OCR`
			`apt-get install -y --no-install-recommends ffmpeg libsm6 libxext6 && \`
			`# cleanup`
			`rm -rf /var/lib/apt/lists/*; \`
			`fi`
Update Dockerfile 2024-01-23 08:11:50 +01:00
cuda support 2024-03-18 17:08:34 +01:00
no internet connection for whisper if you use docker 2024-02-13 15:11:53 +01:00
chore: optimize dockerfile order for caching 2024-01-23 13:48:27 +01:00			`# copy embedding weight from build`
Dockerfile optimisation 2024-03-14 11:18:27 +01:00			`# RUN mkdir -p /root/.cache/chroma/onnx_models/all-MiniLM-L6-v2`
			`# COPY --from=build /app/onnx /root/.cache/chroma/onnx_models/all-MiniLM-L6-v2/onnx`
chore: optimize dockerfile order for caching 2024-01-23 13:48:27 +01:00
			`# copy built frontend files`
			`COPY --from=build /app/build /app/build`
fix: docker import file issue 2024-02-23 09:54:22 +01:00			`COPY --from=build /app/CHANGELOG.md /app/CHANGELOG.md`
			`COPY --from=build /app/package.json /app/package.json`
chore: optimize dockerfile order for caching 2024-01-23 13:48:27 +01:00
			`# copy backend files`
feat: backend reverse proxy 2023-11-15 01:28:51 +01:00			`COPY ./backend .`

Exposed port 8080 2024-03-16 20:11:09 +01:00			`EXPOSE 8080`

docker improvements & changed universal device type env for different models used 2024-03-20 08:44:09 +01:00			`CMD [ "bash", "start.sh"]`