forked from open-webui/open-webui
		
	Merge pull request #1165 from jannikstdl/dockerfile-optimisation
refac: Dockerfile
This commit is contained in:
		
						commit
						e844e7f708
					
				
					 7 changed files with 210 additions and 88 deletions
				
			
		
							
								
								
									
										36
									
								
								.github/workflows/docker-build.yaml
									
										
									
									
										vendored
									
									
								
							
							
						
						
									
										36
									
								
								.github/workflows/docker-build.yaml
									
										
									
									
										vendored
									
									
								
							|  | @ -1,5 +1,4 @@ | ||||||
| # | name: Create and publish Docker images with specific build args | ||||||
| name: Create and publish a Docker image |  | ||||||
| 
 | 
 | ||||||
| # Configures this workflow to run every time a change is pushed to the branch called `release`. | # Configures this workflow to run every time a change is pushed to the branch called `release`. | ||||||
| on: | on: | ||||||
|  | @ -24,7 +23,7 @@ jobs: | ||||||
|     permissions: |     permissions: | ||||||
|       contents: read |       contents: read | ||||||
|       packages: write |       packages: write | ||||||
|       # | 
 | ||||||
|     steps: |     steps: | ||||||
|       - name: Checkout repository |       - name: Checkout repository | ||||||
|         uses: actions/checkout@v4 |         uses: actions/checkout@v4 | ||||||
|  | @ -42,8 +41,8 @@ jobs: | ||||||
|           username: ${{ github.actor }} |           username: ${{ github.actor }} | ||||||
|           password: ${{ secrets.GITHUB_TOKEN }} |           password: ${{ secrets.GITHUB_TOKEN }} | ||||||
| 
 | 
 | ||||||
|       - name: Extract metadata for Docker images |       - name: Extract metadata for Docker images (default latest tag) | ||||||
|         id: meta |         id: meta-latest | ||||||
|         uses: docker/metadata-action@v5 |         uses: docker/metadata-action@v5 | ||||||
|         with: |         with: | ||||||
|           images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} |           images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} | ||||||
|  | @ -54,14 +53,31 @@ jobs: | ||||||
|             type=sha,prefix=git- |             type=sha,prefix=git- | ||||||
|             type=semver,pattern={{version}} |             type=semver,pattern={{version}} | ||||||
|             type=semver,pattern={{major}}.{{minor}} |             type=semver,pattern={{major}}.{{minor}} | ||||||
|           flavor: | |             latest=true | ||||||
|             latest=${{ github.ref == 'refs/heads/main' }} |  | ||||||
| 
 | 
 | ||||||
|       - name: Build and push Docker image |       - name: Build and push Docker image (latest) | ||||||
|         uses: docker/build-push-action@v5 |         uses: docker/build-push-action@v5 | ||||||
|         with: |         with: | ||||||
|           context: . |           context: . | ||||||
|           push: true |           push: true | ||||||
|           platforms: linux/amd64,linux/arm64 |           platforms: linux/amd64,linux/arm64 | ||||||
|           tags: ${{ steps.meta.outputs.tags }} |           tags: ${{ steps.meta-latest.outputs.tags }} | ||||||
|           labels: ${{ steps.meta.outputs.labels }} |           labels: ${{ steps.meta-latest.outputs.labels }} | ||||||
|  | 
 | ||||||
|  |       - name: Build and push Docker image with CUDA | ||||||
|  |         uses: docker/build-push-action@v5 | ||||||
|  |         with: | ||||||
|  |           context: . | ||||||
|  |           push: true | ||||||
|  |           platforms: linux/amd64,linux/arm64 | ||||||
|  |           tags: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:cuda | ||||||
|  |           build-args: USE_CUDA=true | ||||||
|  | 
 | ||||||
|  |       - name: Build and push Docker image with Ollama | ||||||
|  |         uses: docker/build-push-action@v5 | ||||||
|  |         with: | ||||||
|  |           context: . | ||||||
|  |           push: true | ||||||
|  |           platforms: linux/amd64,linux/arm64 | ||||||
|  |           tags: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:ollama | ||||||
|  |           build-args: USE_OLLAMA=true | ||||||
|  |  | ||||||
							
								
								
									
										135
									
								
								Dockerfile
									
										
									
									
									
								
							
							
						
						
									
										135
									
								
								Dockerfile
									
										
									
									
									
								
							|  | @ -1,82 +1,111 @@ | ||||||
| # syntax=docker/dockerfile:1 | # syntax=docker/dockerfile:1 | ||||||
|  | # Initialize device type args | ||||||
|  | # use build args in the docker build commmand with --build-arg="BUILDARG=true" | ||||||
|  | ARG USE_CUDA=false | ||||||
|  | ARG USE_OLLAMA=false | ||||||
|  | # Tested with cu117 for CUDA 11 and cu121 for CUDA 12 (default) | ||||||
|  | ARG USE_CUDA_VER=cu121 | ||||||
|  | # any sentence transformer model; models to use can be found at https://huggingface.co/models?library=sentence-transformers | ||||||
|  | # Leaderboard: https://huggingface.co/spaces/mteb/leaderboard  | ||||||
|  | # for better performance and multilangauge support use "intfloat/multilingual-e5-large" (~2.5GB) or "intfloat/multilingual-e5-base" (~1.5GB) | ||||||
|  | # IMPORTANT: If you change the default model (all-MiniLM-L6-v2) and vice versa, you aren't able to use RAG Chat with your previous documents loaded in the WebUI! You need to re-embed them. | ||||||
|  | ARG USE_EMBEDDING_MODEL=all-MiniLM-L6-v2 | ||||||
| 
 | 
 | ||||||
| FROM node:alpine as build | ######## WebUI frontend ######## | ||||||
|  | FROM node:21-alpine3.19 as build | ||||||
| 
 | 
 | ||||||
| WORKDIR /app | WORKDIR /app | ||||||
| 
 | 
 | ||||||
| # wget embedding model weight from alpine (does not exist from slim-buster) |  | ||||||
| RUN wget "https://chroma-onnx-models.s3.amazonaws.com/all-MiniLM-L6-v2/onnx.tar.gz" -O - | \ |  | ||||||
|     tar -xzf - -C /app |  | ||||||
| 
 |  | ||||||
| COPY package.json package-lock.json ./ | COPY package.json package-lock.json ./ | ||||||
| RUN npm ci | RUN npm ci | ||||||
| 
 | 
 | ||||||
| COPY . . | COPY . . | ||||||
| RUN npm run build | RUN npm run build | ||||||
| 
 | 
 | ||||||
| 
 | ######## WebUI backend ######## | ||||||
| FROM python:3.11-slim-bookworm as base | FROM python:3.11-slim-bookworm as base | ||||||
| 
 | 
 | ||||||
| ENV ENV=prod | # Use args | ||||||
| ENV PORT "" | ARG USE_CUDA | ||||||
|  | ARG USE_OLLAMA | ||||||
|  | ARG USE_CUDA_VER | ||||||
|  | ARG USE_EMBEDDING_MODEL | ||||||
| 
 | 
 | ||||||
| ENV OLLAMA_BASE_URL "/ollama" | ## Basis ## | ||||||
|  | ENV ENV=prod \ | ||||||
|  |     PORT=8080 \ | ||||||
|  |     # pass build args to the build | ||||||
|  |     USE_OLLAMA_DOCKER=${USE_OLLAMA} \ | ||||||
|  |     USE_CUDA_DOCKER=${USE_CUDA} \ | ||||||
|  |     USE_CUDA_DOCKER_VER=${USE_CUDA_VER} \ | ||||||
|  |     USE_EMBEDDING_MODEL_DOCKER=${USE_EMBEDDING_MODEL} | ||||||
| 
 | 
 | ||||||
| ENV OPENAI_API_BASE_URL "" | ## Basis URL Config ## | ||||||
| ENV OPENAI_API_KEY "" | ENV OLLAMA_BASE_URL="/ollama" \ | ||||||
|  |     OPENAI_API_BASE_URL="" | ||||||
| 
 | 
 | ||||||
| ENV WEBUI_SECRET_KEY "" | ## API Key and Security Config ## | ||||||
| ENV WEBUI_AUTH_TRUSTED_EMAIL_HEADER "" | ENV OPENAI_API_KEY="" \ | ||||||
|  |     WEBUI_SECRET_KEY="" \ | ||||||
|  |     SCARF_NO_ANALYTICS=true \ | ||||||
|  |     DO_NOT_TRACK=true | ||||||
| 
 | 
 | ||||||
| ENV SCARF_NO_ANALYTICS true | #### Other models ######################################################### | ||||||
| ENV DO_NOT_TRACK true | ## whisper TTS model settings ## | ||||||
|  | ENV WHISPER_MODEL="base" \ | ||||||
|  |     WHISPER_MODEL_DIR="/app/backend/data/cache/whisper/models" | ||||||
| 
 | 
 | ||||||
| # Use locally bundled version of the LiteLLM cost map json | ## RAG Embedding model settings ## | ||||||
| # to avoid repetitive startup connections | ENV RAG_EMBEDDING_MODEL="$USE_EMBEDDING_MODEL_DOCKER" \ | ||||||
| ENV LITELLM_LOCAL_MODEL_COST_MAP="True" |     RAG_EMBEDDING_MODEL_DIR="/app/backend/data/cache/embedding/models" \ | ||||||
| 
 |     SENTENCE_TRANSFORMERS_HOME="/app/backend/data/cache/embedding/models" | ||||||
| ######## Preloaded models ######## | #### Other models ########################################################## | ||||||
| # whisper TTS Settings |  | ||||||
| ENV WHISPER_MODEL="base" |  | ||||||
| ENV WHISPER_MODEL_DIR="/app/backend/data/cache/whisper/models" |  | ||||||
| 
 |  | ||||||
| # RAG Embedding Model Settings |  | ||||||
| # any sentence transformer model; models to use can be found at https://huggingface.co/models?library=sentence-transformers |  | ||||||
| # Leaderboard: https://huggingface.co/spaces/mteb/leaderboard  |  | ||||||
| # for better persormance and multilangauge support use "intfloat/multilingual-e5-large" (~2.5GB) or "intfloat/multilingual-e5-base" (~1.5GB) |  | ||||||
| # IMPORTANT: If you change the default model (all-MiniLM-L6-v2) and vice versa, you aren't able to use RAG Chat with your previous documents loaded in the WebUI! You need to re-embed them. |  | ||||||
| ENV RAG_EMBEDDING_MODEL="all-MiniLM-L6-v2" |  | ||||||
| # device type for whisper tts and embbeding models - "cpu" (default), "cuda" (nvidia gpu and CUDA required) or "mps" (apple silicon) - choosing this right can lead to better performance |  | ||||||
| ENV RAG_EMBEDDING_MODEL_DEVICE_TYPE="cpu" |  | ||||||
| ENV RAG_EMBEDDING_MODEL_DIR="/app/backend/data/cache/embedding/models" |  | ||||||
| ENV SENTENCE_TRANSFORMERS_HOME $RAG_EMBEDDING_MODEL_DIR |  | ||||||
| 
 |  | ||||||
| ######## Preloaded models ######## |  | ||||||
| 
 | 
 | ||||||
| WORKDIR /app/backend | WORKDIR /app/backend | ||||||
| 
 |  | ||||||
| # install python dependencies | # install python dependencies | ||||||
| COPY ./backend/requirements.txt ./requirements.txt | COPY ./backend/requirements.txt ./requirements.txt | ||||||
| 
 | 
 | ||||||
| RUN apt-get update && apt-get install ffmpeg libsm6 libxext6  -y | RUN if [ "$USE_CUDA" = "true" ]; then \ | ||||||
|  |         # If you use CUDA the whisper and embedding modell will be downloaded on first use | ||||||
|  |         pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/$USE_CUDA_DOCKER_VER --no-cache-dir && \ | ||||||
|  |         pip3 install -r requirements.txt --no-cache-dir && \ | ||||||
|  |         python -c "import os; from faster_whisper import WhisperModel; WhisperModel(os.environ['WHISPER_MODEL'], device='cpu', compute_type='int8', download_root=os.environ['WHISPER_MODEL_DIR'])" && \ | ||||||
|  |         python -c "import os; from chromadb.utils import embedding_functions; sentence_transformer_ef = embedding_functions.SentenceTransformerEmbeddingFunction(model_name=os.environ['RAG_EMBEDDING_MODEL'], device='cpu')"; \ | ||||||
|  |     else \ | ||||||
|  |         pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu --no-cache-dir && \ | ||||||
|  |         pip3 install -r requirements.txt --no-cache-dir && \ | ||||||
|  |         python -c "import os; from faster_whisper import WhisperModel; WhisperModel(os.environ['WHISPER_MODEL'], device='cpu', compute_type='int8', download_root=os.environ['WHISPER_MODEL_DIR'])" && \ | ||||||
|  |         python -c "import os; from chromadb.utils import embedding_functions; sentence_transformer_ef = embedding_functions.SentenceTransformerEmbeddingFunction(model_name=os.environ['RAG_EMBEDDING_MODEL'], device='cpu')"; \ | ||||||
|  |     fi | ||||||
| 
 | 
 | ||||||
| RUN pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu --no-cache-dir |  | ||||||
| RUN pip3 install -r requirements.txt --no-cache-dir |  | ||||||
| 
 | 
 | ||||||
| # Install pandoc and netcat | RUN if [ "$USE_OLLAMA" = "true" ]; then \ | ||||||
| # RUN python -c "import pypandoc; pypandoc.download_pandoc()" |         apt-get update && \ | ||||||
| RUN apt-get update \ |         # Install pandoc and netcat | ||||||
|     && apt-get install -y pandoc netcat-openbsd \ |         apt-get install -y --no-install-recommends pandoc netcat-openbsd && \ | ||||||
|     && rm -rf /var/lib/apt/lists/* |         # for RAG OCR | ||||||
|  |         apt-get install -y --no-install-recommends ffmpeg libsm6 libxext6 && \ | ||||||
|  |         # install helper tools | ||||||
|  |         apt-get install -y --no-install-recommends curl && \ | ||||||
|  |         # install ollama | ||||||
|  |         curl -fsSL https://ollama.com/install.sh | sh && \ | ||||||
|  |         # cleanup | ||||||
|  |         rm -rf /var/lib/apt/lists/*; \ | ||||||
|  |     else \ | ||||||
|  |         apt-get update && \ | ||||||
|  |         # Install pandoc and netcat | ||||||
|  |         apt-get install -y --no-install-recommends pandoc netcat-openbsd && \ | ||||||
|  |         # for RAG OCR | ||||||
|  |         apt-get install -y --no-install-recommends ffmpeg libsm6 libxext6 && \ | ||||||
|  |         # cleanup | ||||||
|  |         rm -rf /var/lib/apt/lists/*; \ | ||||||
|  |     fi | ||||||
|  | 
 | ||||||
| 
 | 
 | ||||||
| # preload embedding model |  | ||||||
| RUN python -c "import os; from chromadb.utils import embedding_functions; sentence_transformer_ef = embedding_functions.SentenceTransformerEmbeddingFunction(model_name=os.environ['RAG_EMBEDDING_MODEL'], device=os.environ['RAG_EMBEDDING_MODEL_DEVICE_TYPE'])" |  | ||||||
| # preload tts model |  | ||||||
| RUN python -c "import os; from faster_whisper import WhisperModel; WhisperModel(os.environ['WHISPER_MODEL'], device='auto', compute_type='int8', download_root=os.environ['WHISPER_MODEL_DIR'])" |  | ||||||
| 
 | 
 | ||||||
| # copy embedding weight from build | # copy embedding weight from build | ||||||
| RUN mkdir -p /root/.cache/chroma/onnx_models/all-MiniLM-L6-v2 | # RUN mkdir -p /root/.cache/chroma/onnx_models/all-MiniLM-L6-v2 | ||||||
| COPY --from=build /app/onnx /root/.cache/chroma/onnx_models/all-MiniLM-L6-v2/onnx | # COPY --from=build /app/onnx /root/.cache/chroma/onnx_models/all-MiniLM-L6-v2/onnx | ||||||
| 
 | 
 | ||||||
| # copy built frontend files | # copy built frontend files | ||||||
| COPY --from=build /app/build /app/build | COPY --from=build /app/build /app/build | ||||||
|  | @ -86,4 +115,6 @@ COPY --from=build /app/package.json /app/package.json | ||||||
| # copy backend files | # copy backend files | ||||||
| COPY ./backend . | COPY ./backend . | ||||||
| 
 | 
 | ||||||
| CMD [ "bash", "start.sh"] | EXPOSE 8080 | ||||||
|  | 
 | ||||||
|  | CMD [ "bash", "start.sh"] | ||||||
							
								
								
									
										59
									
								
								README.md
									
										
									
									
									
								
							
							
						
						
									
										59
									
								
								README.md
									
										
									
									
									
								
							|  | @ -113,6 +113,65 @@ Don't forget to explore our sibling project, [Open WebUI Community](https://open | ||||||
| 
 | 
 | ||||||
| - After installation, you can access Open WebUI at [http://localhost:3000](http://localhost:3000). Enjoy! 😄 | - After installation, you can access Open WebUI at [http://localhost:3000](http://localhost:3000). Enjoy! 😄 | ||||||
| 
 | 
 | ||||||
|  | - **If you want to customize your build with additional args**, use this commands: | ||||||
|  | 
 | ||||||
|  |   > [!NOTE]   | ||||||
|  |   > If you only want to use Open WebUI with Ollama included or CUDA acelleration it's recomented to use our official images with the tags :cuda or :with-ollama | ||||||
|  |   > If you want a combination of both or more customisation options like a different embedding model and/or CUDA version you need to build the image yourself following the instructions below. | ||||||
|  | 
 | ||||||
|  |   **For the build:** | ||||||
|  | 
 | ||||||
|  |   ```bash | ||||||
|  |   docker build -t open-webui | ||||||
|  |   ``` | ||||||
|  | 
 | ||||||
|  |   Optional build ARGS (use them in the docker build command below if needed): | ||||||
|  | 
 | ||||||
|  |       e.g. | ||||||
|  | 
 | ||||||
|  |   ```bash | ||||||
|  |   --build-arg="USE_EMBEDDING_MODEL=intfloat/multilingual-e5-large" | ||||||
|  |   ``` | ||||||
|  | 
 | ||||||
|  |   For "intfloat/multilingual-e5-large" custom embedding model (default is all-MiniLM-L6-v2), only works with [sentence transforer models](https://huggingface.co/models?library=sentence-transformers). Current [Leaderbord](https://huggingface.co/spaces/mteb/leaderboard) of embedding models. | ||||||
|  | 
 | ||||||
|  |   ```bash | ||||||
|  |   --build-arg="USE_OLLAMA=true" | ||||||
|  |   ``` | ||||||
|  | 
 | ||||||
|  |   For including ollama in the image. | ||||||
|  | 
 | ||||||
|  |   ```bash | ||||||
|  |   --build-arg="USE_CUDA=true" | ||||||
|  |   ``` | ||||||
|  | 
 | ||||||
|  |   To use CUDA exeleration for the embedding and whisper models. | ||||||
|  | 
 | ||||||
|  |   > [!NOTE] | ||||||
|  |   > You need to install the [Nvidia CUDA container toolkit](https://docs.nvidia.com/dgx/nvidia-container-runtime-upgrade/) on your machine to be able to set CUDA as the Docker engine. Only works with Linux - use WSL for Windows! | ||||||
|  | 
 | ||||||
|  |   ```bash | ||||||
|  |   --build-arg="USE_CUDA_VER=cu117" | ||||||
|  |   ``` | ||||||
|  | 
 | ||||||
|  |   For CUDA 11 (default is CUDA 12) | ||||||
|  | 
 | ||||||
|  |   **To run the image:** | ||||||
|  | 
 | ||||||
|  |   - **If you DID NOT use the USE_CUDA=true build ARG**, use this command: | ||||||
|  | 
 | ||||||
|  |   ```bash | ||||||
|  |     docker run -d -p 3000:8080 -v open-webui:/app/backend/data --name open-webui --restart always ghcr.io/open-webui/open-webui:main | ||||||
|  |   ``` | ||||||
|  | 
 | ||||||
|  |   - **If you DID use the USE_CUDA=true build ARG**, use this command: | ||||||
|  | 
 | ||||||
|  |   ```bash | ||||||
|  |     docker run --gpus all -d -p 3000:8080 -v open-webui:/app/backend/data --name open-webui --restart always ghcr.io/open-webui/open-webui:main | ||||||
|  |   ``` | ||||||
|  | 
 | ||||||
|  |   - After installation, you can access Open WebUI at [http://localhost:3000](http://localhost:3000). Enjoy! 😄 | ||||||
|  | 
 | ||||||
| #### Open WebUI: Server Connection Error | #### Open WebUI: Server Connection Error | ||||||
| 
 | 
 | ||||||
| If you're experiencing connection issues, it’s often due to the WebUI docker container not being able to reach the Ollama server at 127.0.0.1:11434 (host.docker.internal:11434) inside the container . Use the `--network=host` flag in your docker command to resolve this. Note that the port changes from 3000 to 8080, resulting in the link: `http://localhost:8080`. | If you're experiencing connection issues, it’s often due to the WebUI docker container not being able to reach the Ollama server at 127.0.0.1:11434 (host.docker.internal:11434) inside the container . Use the `--network=host` flag in your docker command to resolve this. Note that the port changes from 3000 to 8080, resulting in the link: `http://localhost:8080`. | ||||||
|  |  | ||||||
|  | @ -28,6 +28,7 @@ from config import ( | ||||||
|     UPLOAD_DIR, |     UPLOAD_DIR, | ||||||
|     WHISPER_MODEL, |     WHISPER_MODEL, | ||||||
|     WHISPER_MODEL_DIR, |     WHISPER_MODEL_DIR, | ||||||
|  |     DEVICE_TYPE, | ||||||
| ) | ) | ||||||
| 
 | 
 | ||||||
| log = logging.getLogger(__name__) | log = logging.getLogger(__name__) | ||||||
|  | @ -42,6 +43,10 @@ app.add_middleware( | ||||||
|     allow_headers=["*"], |     allow_headers=["*"], | ||||||
| ) | ) | ||||||
| 
 | 
 | ||||||
|  | # setting device type for whisper model | ||||||
|  | whisper_device_type = DEVICE_TYPE if DEVICE_TYPE and DEVICE_TYPE == "cuda" else "cpu" | ||||||
|  | log.info(f"whisper_device_type: {whisper_device_type}") | ||||||
|  | 
 | ||||||
| 
 | 
 | ||||||
| @app.post("/transcribe") | @app.post("/transcribe") | ||||||
| def transcribe( | def transcribe( | ||||||
|  | @ -66,7 +71,7 @@ def transcribe( | ||||||
| 
 | 
 | ||||||
|         model = WhisperModel( |         model = WhisperModel( | ||||||
|             WHISPER_MODEL, |             WHISPER_MODEL, | ||||||
|             device="auto", |             device=whisper_device_type, | ||||||
|             compute_type="int8", |             compute_type="int8", | ||||||
|             download_root=WHISPER_MODEL_DIR, |             download_root=WHISPER_MODEL_DIR, | ||||||
|         ) |         ) | ||||||
|  |  | ||||||
|  | @ -59,7 +59,7 @@ from config import ( | ||||||
|     UPLOAD_DIR, |     UPLOAD_DIR, | ||||||
|     DOCS_DIR, |     DOCS_DIR, | ||||||
|     RAG_EMBEDDING_MODEL, |     RAG_EMBEDDING_MODEL, | ||||||
|     RAG_EMBEDDING_MODEL_DEVICE_TYPE, |     DEVICE_TYPE, | ||||||
|     CHROMA_CLIENT, |     CHROMA_CLIENT, | ||||||
|     CHUNK_SIZE, |     CHUNK_SIZE, | ||||||
|     CHUNK_OVERLAP, |     CHUNK_OVERLAP, | ||||||
|  | @ -71,15 +71,6 @@ from constants import ERROR_MESSAGES | ||||||
| log = logging.getLogger(__name__) | log = logging.getLogger(__name__) | ||||||
| log.setLevel(SRC_LOG_LEVELS["RAG"]) | log.setLevel(SRC_LOG_LEVELS["RAG"]) | ||||||
| 
 | 
 | ||||||
| # |  | ||||||
| # if RAG_EMBEDDING_MODEL: |  | ||||||
| #    sentence_transformer_ef = SentenceTransformer( |  | ||||||
| #        model_name_or_path=RAG_EMBEDDING_MODEL, |  | ||||||
| #        cache_folder=RAG_EMBEDDING_MODEL_DIR, |  | ||||||
| #        device=RAG_EMBEDDING_MODEL_DEVICE_TYPE, |  | ||||||
| #    ) |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| app = FastAPI() | app = FastAPI() | ||||||
| 
 | 
 | ||||||
| app.state.PDF_EXTRACT_IMAGES = False | app.state.PDF_EXTRACT_IMAGES = False | ||||||
|  | @ -92,7 +83,7 @@ app.state.TOP_K = 4 | ||||||
| app.state.sentence_transformer_ef = ( | app.state.sentence_transformer_ef = ( | ||||||
|     embedding_functions.SentenceTransformerEmbeddingFunction( |     embedding_functions.SentenceTransformerEmbeddingFunction( | ||||||
|         model_name=app.state.RAG_EMBEDDING_MODEL, |         model_name=app.state.RAG_EMBEDDING_MODEL, | ||||||
|         device=RAG_EMBEDDING_MODEL_DEVICE_TYPE, |         device=DEVICE_TYPE, | ||||||
|     ) |     ) | ||||||
| ) | ) | ||||||
| 
 | 
 | ||||||
|  | @ -147,10 +138,9 @@ async def update_embedding_model( | ||||||
|     app.state.sentence_transformer_ef = ( |     app.state.sentence_transformer_ef = ( | ||||||
|         embedding_functions.SentenceTransformerEmbeddingFunction( |         embedding_functions.SentenceTransformerEmbeddingFunction( | ||||||
|             model_name=app.state.RAG_EMBEDDING_MODEL, |             model_name=app.state.RAG_EMBEDDING_MODEL, | ||||||
|             device=RAG_EMBEDDING_MODEL_DEVICE_TYPE, |             device=DEVICE_TYPE, | ||||||
|         ) |         ) | ||||||
|     ) |     ) | ||||||
| 
 |  | ||||||
|     return { |     return { | ||||||
|         "status": True, |         "status": True, | ||||||
|         "embedding_model": app.state.RAG_EMBEDDING_MODEL, |         "embedding_model": app.state.RAG_EMBEDDING_MODEL, | ||||||
|  |  | ||||||
|  | @ -257,6 +257,7 @@ OLLAMA_API_BASE_URL = os.environ.get( | ||||||
| 
 | 
 | ||||||
| OLLAMA_BASE_URL = os.environ.get("OLLAMA_BASE_URL", "") | OLLAMA_BASE_URL = os.environ.get("OLLAMA_BASE_URL", "") | ||||||
| K8S_FLAG = os.environ.get("K8S_FLAG", "") | K8S_FLAG = os.environ.get("K8S_FLAG", "") | ||||||
|  | USE_OLLAMA_DOCKER = os.environ.get("USE_OLLAMA_DOCKER", "false") | ||||||
| 
 | 
 | ||||||
| if OLLAMA_BASE_URL == "" and OLLAMA_API_BASE_URL != "": | if OLLAMA_BASE_URL == "" and OLLAMA_API_BASE_URL != "": | ||||||
|     OLLAMA_BASE_URL = ( |     OLLAMA_BASE_URL = ( | ||||||
|  | @ -266,9 +267,13 @@ if OLLAMA_BASE_URL == "" and OLLAMA_API_BASE_URL != "": | ||||||
|     ) |     ) | ||||||
| 
 | 
 | ||||||
| if ENV == "prod": | if ENV == "prod": | ||||||
|     if OLLAMA_BASE_URL == "/ollama": |     if OLLAMA_BASE_URL == "/ollama" and not K8S_FLAG: | ||||||
|         OLLAMA_BASE_URL = "http://host.docker.internal:11434" |         if USE_OLLAMA_DOCKER.lower() == "true": | ||||||
| 
 |             # if you use all-in-one docker container (Open WebUI + Ollama) | ||||||
|  |             # with the docker build arg USE_OLLAMA=true (--build-arg="USE_OLLAMA=true") this only works with http://localhost:11434 | ||||||
|  |             OLLAMA_BASE_URL = "http://localhost:11434" | ||||||
|  |         else: | ||||||
|  |             OLLAMA_BASE_URL = "http://host.docker.internal:11434" | ||||||
|     elif K8S_FLAG: |     elif K8S_FLAG: | ||||||
|         OLLAMA_BASE_URL = "http://ollama-service.open-webui.svc.cluster.local:11434" |         OLLAMA_BASE_URL = "http://ollama-service.open-webui.svc.cluster.local:11434" | ||||||
| 
 | 
 | ||||||
|  | @ -391,10 +396,16 @@ if WEBUI_AUTH and WEBUI_SECRET_KEY == "": | ||||||
| CHROMA_DATA_PATH = f"{DATA_DIR}/vector_db" | CHROMA_DATA_PATH = f"{DATA_DIR}/vector_db" | ||||||
| # this uses the model defined in the Dockerfile ENV variable. If you dont use docker or docker based deployments such as k8s, the default embedding model will be used (all-MiniLM-L6-v2) | # this uses the model defined in the Dockerfile ENV variable. If you dont use docker or docker based deployments such as k8s, the default embedding model will be used (all-MiniLM-L6-v2) | ||||||
| RAG_EMBEDDING_MODEL = os.environ.get("RAG_EMBEDDING_MODEL", "all-MiniLM-L6-v2") | RAG_EMBEDDING_MODEL = os.environ.get("RAG_EMBEDDING_MODEL", "all-MiniLM-L6-v2") | ||||||
|  | log.info(f"Embedding model set: {RAG_EMBEDDING_MODEL}"), | ||||||
| # device type ebbeding models - "cpu" (default), "cuda" (nvidia gpu required) or "mps" (apple silicon) - choosing this right can lead to better performance | # device type ebbeding models - "cpu" (default), "cuda" (nvidia gpu required) or "mps" (apple silicon) - choosing this right can lead to better performance | ||||||
| RAG_EMBEDDING_MODEL_DEVICE_TYPE = os.environ.get( | USE_CUDA = os.environ.get("USE_CUDA_DOCKER", "false") | ||||||
|     "RAG_EMBEDDING_MODEL_DEVICE_TYPE", "cpu" | 
 | ||||||
| ) | if USE_CUDA.lower() == "true": | ||||||
|  |     DEVICE_TYPE = "cuda" | ||||||
|  | else: | ||||||
|  |     DEVICE_TYPE = "cpu" | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
| CHROMA_CLIENT = chromadb.PersistentClient( | CHROMA_CLIENT = chromadb.PersistentClient( | ||||||
|     path=CHROMA_DATA_PATH, |     path=CHROMA_DATA_PATH, | ||||||
|     settings=Settings(allow_reset=True, anonymized_telemetry=False), |     settings=Settings(allow_reset=True, anonymized_telemetry=False), | ||||||
|  |  | ||||||
|  | @ -7,16 +7,26 @@ KEY_FILE=.webui_secret_key | ||||||
| 
 | 
 | ||||||
| PORT="${PORT:-8080}" | PORT="${PORT:-8080}" | ||||||
| if test "$WEBUI_SECRET_KEY $WEBUI_JWT_SECRET_KEY" = " "; then | if test "$WEBUI_SECRET_KEY $WEBUI_JWT_SECRET_KEY" = " "; then | ||||||
|   echo No WEBUI_SECRET_KEY provided |   echo "No WEBUI_SECRET_KEY provided" | ||||||
| 
 | 
 | ||||||
|   if ! [ -e "$KEY_FILE" ]; then |   if ! [ -e "$KEY_FILE" ]; then | ||||||
|     echo Generating WEBUI_SECRET_KEY |     echo "Generating WEBUI_SECRET_KEY" | ||||||
|     # Generate a random value to use as a WEBUI_SECRET_KEY in case the user didn't provide one. |     # Generate a random value to use as a WEBUI_SECRET_KEY in case the user didn't provide one. | ||||||
|     echo $(head -c 12 /dev/random | base64) > $KEY_FILE |     echo $(head -c 12 /dev/random | base64) > "$KEY_FILE" | ||||||
|   fi |   fi | ||||||
| 
 | 
 | ||||||
|   echo Loading WEBUI_SECRET_KEY from $KEY_FILE |   echo "Loading WEBUI_SECRET_KEY from $KEY_FILE" | ||||||
|   WEBUI_SECRET_KEY=`cat $KEY_FILE` |   WEBUI_SECRET_KEY=$(cat "$KEY_FILE") | ||||||
| fi | fi | ||||||
| 
 | 
 | ||||||
| WEBUI_SECRET_KEY="$WEBUI_SECRET_KEY" exec uvicorn main:app --host 0.0.0.0 --port "$PORT" --forwarded-allow-ips '*' | if [ "$USE_OLLAMA_DOCKER" = "true" ]; then | ||||||
|  |     echo "USE_OLLAMA is set to true, starting ollama serve." | ||||||
|  |     ollama serve & | ||||||
|  | fi | ||||||
|  | 
 | ||||||
|  | if [ "$USE_CUDA_DOCKER" = "true" ]; then | ||||||
|  |   echo "CUDA is enabled, appending LD_LIBRARY_PATH to include torch/cudnn & cublas libraries." | ||||||
|  |   export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/usr/local/lib/python3.11/site-packages/torch/lib:/usr/local/lib/python3.11/site-packages/nvidia/cudnn/lib" | ||||||
|  | fi | ||||||
|  | 
 | ||||||
|  | WEBUI_SECRET_KEY="$WEBUI_SECRET_KEY" exec uvicorn main:app --host 0.0.0.0 --port "$PORT" --forwarded-allow-ips '*' | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Timothy Jaeryang Baek
						Timothy Jaeryang Baek