diff --git a/.dockerignore b/.dockerignore
index 58cf1f0f..e28863bf 100644
--- a/.dockerignore
+++ b/.dockerignore
@@ -7,7 +7,6 @@ node_modules
/package
.env
.env.*
-!.env.example
vite.config.js.timestamp-*
vite.config.ts.timestamp-*
__pycache__
diff --git a/.env.example b/.env.example
new file mode 100644
index 00000000..3d2aafc0
--- /dev/null
+++ b/.env.example
@@ -0,0 +1,12 @@
+# Ollama URL for the backend to connect
+# The path '/ollama' will be redirected to the specified backend URL
+OLLAMA_BASE_URL='http://localhost:11434'
+
+OPENAI_API_BASE_URL=''
+OPENAI_API_KEY=''
+
+# AUTOMATIC1111_BASE_URL="http://localhost:7860"
+
+# DO NOT TRACK
+SCARF_NO_ANALYTICS=true
+DO_NOT_TRACK=true
\ No newline at end of file
diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md
index 5a85d087..43866613 100644
--- a/.github/ISSUE_TEMPLATE/bug_report.md
+++ b/.github/ISSUE_TEMPLATE/bug_report.md
@@ -32,7 +32,7 @@ assignees: ''
**Confirmation:**
- [ ] I have read and followed all the instructions provided in the README.md.
-- [ ] I have reviewed the troubleshooting.md document.
+- [ ] I am on the latest version of both Open WebUI and Ollama.
- [ ] I have included the browser console logs.
- [ ] I have included the Docker container logs.
diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md
new file mode 100644
index 00000000..4c4cfa3b
--- /dev/null
+++ b/.github/pull_request_template.md
@@ -0,0 +1,32 @@
+## Pull Request Checklist
+
+- [ ] **Description:** Briefly describe the changes in this pull request.
+- [ ] **Changelog:** Ensure a changelog entry following the format of [Keep a Changelog](https://keepachangelog.com/) is added at the bottom of the PR description.
+- [ ] **Documentation:** Have you updated relevant documentation?
+- [ ] **Dependencies:** Are there any new dependencies? Have you updated the dependency versions in the documentation?
+
+---
+
+## Description
+
+[Insert a brief description of the changes made in this pull request]
+
+---
+
+### Changelog Entry
+
+### Added
+
+- [List any new features or additions]
+
+### Fixed
+
+- [List any fixes or corrections]
+
+### Changed
+
+- [List any changes or updates]
+
+### Removed
+
+- [List any removed features or files]
diff --git a/.github/workflows/build-release.yml b/.github/workflows/build-release.yml
new file mode 100644
index 00000000..259f0c5f
--- /dev/null
+++ b/.github/workflows/build-release.yml
@@ -0,0 +1,59 @@
+name: Release
+
+on:
+ push:
+ branches:
+ - main # or whatever branch you want to use
+
+jobs:
+ release:
+ runs-on: ubuntu-latest
+
+ steps:
+ - name: Checkout repository
+ uses: actions/checkout@v2
+
+ - name: Check for changes in package.json
+ run: |
+ git diff --cached --diff-filter=d package.json || {
+ echo "No changes to package.json"
+ exit 1
+ }
+
+ - name: Get version number from package.json
+ id: get_version
+ run: |
+ VERSION=$(jq -r '.version' package.json)
+ echo "::set-output name=version::$VERSION"
+
+ - name: Extract latest CHANGELOG entry
+ id: changelog
+ run: |
+ CHANGELOG_CONTENT=$(awk 'BEGIN {print_section=0;} /^## \[/ {if (print_section == 0) {print_section=1;} else {exit;}} print_section {print;}' CHANGELOG.md)
+ CHANGELOG_ESCAPED=$(echo "$CHANGELOG_CONTENT" | sed ':a;N;$!ba;s/\n/%0A/g')
+ echo "Extracted latest release notes from CHANGELOG.md:"
+ echo -e "$CHANGELOG_CONTENT"
+ echo "::set-output name=content::$CHANGELOG_ESCAPED"
+
+ - name: Create GitHub release
+ uses: actions/github-script@v5
+ with:
+ github-token: ${{ secrets.GITHUB_TOKEN }}
+ script: |
+ const changelog = `${{ steps.changelog.outputs.content }}`;
+ const release = await github.rest.repos.createRelease({
+ owner: context.repo.owner,
+ repo: context.repo.repo,
+ tag_name: `v${{ steps.get_version.outputs.version }}`,
+ name: `v${{ steps.get_version.outputs.version }}`,
+ body: changelog,
+ })
+ console.log(`Created release ${release.data.html_url}`)
+
+ - name: Upload package to GitHub release
+ uses: actions/upload-artifact@v3
+ with:
+ name: package
+ path: .
+ env:
+ GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
diff --git a/.github/workflows/docker-build.yaml b/.github/workflows/docker-build.yaml
index de32dbeb..bb71de8b 100644
--- a/.github/workflows/docker-build.yaml
+++ b/.github/workflows/docker-build.yaml
@@ -52,6 +52,7 @@ jobs:
type=ref,event=tag
type=sha,prefix=git-
type=semver,pattern={{version}}
+ type=semver,pattern={{major}}.{{minor}}
flavor: |
latest=${{ github.ref == 'refs/heads/main' }}
diff --git a/CHANGELOG.md b/CHANGELOG.md
new file mode 100644
index 00000000..3956e566
--- /dev/null
+++ b/CHANGELOG.md
@@ -0,0 +1,211 @@
+# Changelog
+
+All notable changes to this project will be documented in this file.
+
+The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
+and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+
+## [0.1.114] - 2024-03-20
+
+### Added
+
+- **🔗 Webhook Integration**: Now you can subscribe to new user sign-up events via webhook. Simply navigate to the admin panel > admin settings > webhook URL.
+- **🛡️ Enhanced Model Filtering**: Alongside Ollama, OpenAI proxy model whitelisting, we've added model filtering functionality for LiteLLM proxy.
+- **🌍 Expanded Language Support**: Spanish, Catalan, and Vietnamese languages are now available, with improvements made to others.
+
+### Fixed
+
+- **🔧 Input Field Spelling**: Resolved issue with spelling mistakes in input fields.
+- **🖊️ Light Mode Styling**: Fixed styling issue with light mode in document adding.
+
+### Changed
+
+- **🔄 Language Sorting**: Languages are now sorted alphabetically by their code for improved organization.
+
+## [0.1.113] - 2024-03-18
+
+### Added
+
+- 🌍 **Localization**: You can now change the UI language in Settings > General. We support Ukrainian, German, Farsi (Persian), Traditional and Simplified Chinese and French translations. You can help us to translate the UI into your language! More info in our [CONTRIBUTION.md](https://github.com/open-webui/open-webui/blob/main/docs/CONTRIBUTING.md#-translations-and-internationalization).
+- 🎨 **System-wide Theme**: Introducing a new system-wide theme for enhanced visual experience.
+
+### Fixed
+
+- 🌑 **Dark Background on Select Fields**: Improved readability by adding a dark background to select fields, addressing issues on certain browsers/devices.
+- **Multiple OPENAI_API_BASE_URLS Issue**: Resolved issue where multiple base URLs caused conflicts when one wasn't functioning.
+- **RAG Encoding Issue**: Fixed encoding problem in RAG.
+- **npm Audit Fix**: Addressed npm audit findings.
+- **Reduced Scroll Threshold**: Improved auto-scroll experience by reducing the scroll threshold from 50px to 5px.
+
+### Changed
+
+- 🔄 **Sidebar UI Update**: Updated sidebar UI to feature a chat menu dropdown, replacing two icons for improved navigation.
+
+## [0.1.112] - 2024-03-15
+
+### Fixed
+
+- 🗨️ Resolved chat malfunction after image generation.
+- 🎨 Fixed various RAG issues.
+- 🧪 Rectified experimental broken GGUF upload logic.
+
+## [0.1.111] - 2024-03-10
+
+### Added
+
+- 🛡️ **Model Whitelisting**: Admins now have the ability to whitelist models for users with the 'user' role.
+- 🔄 **Update All Models**: Added a convenient button to update all models at once.
+- 📄 **Toggle PDF OCR**: Users can now toggle PDF OCR option for improved parsing performance.
+- 🎨 **DALL-E Integration**: Introduced DALL-E integration for image generation alongside automatic1111.
+- 🛠️ **RAG API Refactoring**: Refactored RAG logic and exposed its API, with additional documentation to follow.
+
+### Fixed
+
+- 🔒 **Max Token Settings**: Added max token settings for anthropic/claude-3-sonnet-20240229 (Issue #1094).
+- 🔧 **Misalignment Issue**: Corrected misalignment of Edit and Delete Icons when Chat Title is Empty (Issue #1104).
+- 🔄 **Context Loss Fix**: Resolved RAG losing context on model response regeneration with Groq models via API key (Issue #1105).
+- 📁 **File Handling Bug**: Addressed File Not Found Notification when Dropping a Conversation Element (Issue #1098).
+- 🖱️ **Dragged File Styling**: Fixed dragged file layover styling issue.
+
+## [0.1.110] - 2024-03-06
+
+### Added
+
+- **🌐 Multiple OpenAI Servers Support**: Enjoy seamless integration with multiple OpenAI-compatible APIs, now supported natively.
+
+### Fixed
+
+- **🔍 OCR Issue**: Resolved PDF parsing issue caused by OCR malfunction.
+- **🚫 RAG Issue**: Fixed the RAG functionality, ensuring it operates smoothly.
+- **📄 "Add Docs" Model Button**: Addressed the non-functional behavior of the "Add Docs" model button.
+
+## [0.1.109] - 2024-03-06
+
+### Added
+
+- **🔄 Multiple Ollama Servers Support**: Enjoy enhanced scalability and performance with support for multiple Ollama servers in a single WebUI. Load balancing features are now available, providing improved efficiency (#788, #278).
+- **🔧 Support for Claude 3 and Gemini**: Responding to user requests, we've expanded our toolset to include Claude 3 and Gemini, offering a wider range of functionalities within our platform (#1064).
+- **🔍 OCR Functionality for PDF Loader**: We've augmented our PDF loader with Optical Character Recognition (OCR) capabilities. Now, extract text from scanned documents and images within PDFs, broadening the scope of content processing (#1050).
+
+### Fixed
+
+- **🛠️ RAG Collection**: Implemented a dynamic mechanism to recreate RAG collections, ensuring users have up-to-date and accurate data (#1031).
+- **📝 User Agent Headers**: Fixed issue of RAG web requests being sent with empty user_agent headers, reducing rejections from certain websites. Realistic headers are now utilized for these requests (#1024).
+- **⏹️ Playground Cancel Functionality**: Introducing a new "Cancel" option for stopping Ollama generation in the Playground, enhancing user control and usability (#1006).
+- **🔤 Typographical Error in 'ASSISTANT' Field**: Corrected a typographical error in the 'ASSISTANT' field within the GGUF model upload template for accuracy and consistency (#1061).
+
+### Changed
+
+- **🔄 Refactored Message Deletion Logic**: Streamlined message deletion process for improved efficiency and user experience, simplifying interactions within the platform (#1004).
+- **⚠️ Deprecation of `OLLAMA_API_BASE_URL`**: Deprecated `OLLAMA_API_BASE_URL` environment variable; recommend using `OLLAMA_BASE_URL` instead. Refer to our documentation for further details.
+
+## [0.1.108] - 2024-03-02
+
+### Added
+
+- **🎮 Playground Feature (Beta)**: Explore the full potential of the raw API through an intuitive UI with our new playground feature, accessible to admins. Simply click on the bottom name area of the sidebar to access it. The playground feature offers two modes text completion (notebook) and chat completion. As it's in beta, please report any issues you encounter.
+- **🛠️ Direct Database Download for Admins**: Admins can now download the database directly from the WebUI via the admin settings.
+- **🎨 Additional RAG Settings**: Customize your RAG process with the ability to edit the TOP K value. Navigate to Documents > Settings > General to make changes.
+- **🖥️ UI Improvements**: Tooltips now available in the input area and sidebar handle. More tooltips will be added across other parts of the UI.
+
+### Fixed
+
+- Resolved input autofocus issue on mobile when the sidebar is open, making it easier to use.
+- Corrected numbered list display issue in Safari (#963).
+- Restricted user ability to delete chats without proper permissions (#993).
+
+### Changed
+
+- **Simplified Ollama Settings**: Ollama settings now don't require the `/api` suffix. You can now utilize the Ollama base URL directly, e.g., `http://localhost:11434`. Also, an `OLLAMA_BASE_URL` environment variable has been added.
+- **Database Renaming**: Starting from this release, `ollama.db` will be automatically renamed to `webui.db`.
+
+## [0.1.107] - 2024-03-01
+
+### Added
+
+- **🚀 Makefile and LLM Update Script**: Included Makefile and a script for LLM updates in the repository.
+
+### Fixed
+
+- Corrected issue where links in the settings modal didn't appear clickable (#960).
+- Fixed problem with web UI port not taking effect due to incorrect environment variable name in run-compose.sh (#996).
+- Enhanced user experience by displaying chat in browser title and enabling automatic scrolling to the bottom (#992).
+
+### Changed
+
+- Upgraded toast library from `svelte-french-toast` to `svelte-sonner` for a more polished UI.
+- Enhanced accessibility with the addition of dark mode on the authentication page.
+
+## [0.1.106] - 2024-02-27
+
+### Added
+
+- **🎯 Auto-focus Feature**: The input area now automatically focuses when initiating or opening a chat conversation.
+
+### Fixed
+
+- Corrected typo from "HuggingFace" to "Hugging Face" (Issue #924).
+- Resolved bug causing errors in chat completion API calls to OpenAI due to missing "num_ctx" parameter (Issue #927).
+- Fixed issues preventing text editing, selection, and cursor retention in the input field (Issue #940).
+- Fixed a bug where defining an OpenAI-compatible API server using 'OPENAI_API_BASE_URL' containing 'openai' string resulted in hiding models not containing 'gpt' string from the model menu. (Issue #930)
+
+## [0.1.105] - 2024-02-25
+
+### Added
+
+- **📄 Document Selection**: Now you can select and delete multiple documents at once for easier management.
+
+### Changed
+
+- **🏷️ Document Pre-tagging**: Simply click the "+" button at the top, enter tag names in the popup window, or select from a list of existing tags. Then, upload files with the added tags for streamlined organization.
+
+## [0.1.104] - 2024-02-25
+
+### Added
+
+- **🔄 Check for Updates**: Keep your system current by checking for updates conveniently located in Settings > About.
+- **🗑️ Automatic Tag Deletion**: Unused tags on the sidebar will now be deleted automatically with just a click.
+
+### Changed
+
+- **🎨 Modernized Styling**: Enjoy a refreshed look with updated styling for a more contemporary experience.
+
+## [0.1.103] - 2024-02-25
+
+### Added
+
+- **🔗 Built-in LiteLLM Proxy**: Now includes LiteLLM proxy within Open WebUI for enhanced functionality.
+
+ - Easily integrate existing LiteLLM configurations using `-v /path/to/config.yaml:/app/backend/data/litellm/config.yaml` flag.
+ - When utilizing Docker container to run Open WebUI, ensure connections to localhost use `host.docker.internal`.
+
+- **🖼️ Image Generation Enhancements**: Introducing Advanced Settings with Image Preview Feature.
+ - Customize image generation by setting the number of steps; defaults to A1111 value.
+
+### Fixed
+
+- Resolved issue with RAG scan halting document loading upon encountering unsupported MIME types or exceptions (Issue #866).
+
+### Changed
+
+- Ollama is no longer required to run Open WebUI.
+- Access our comprehensive documentation at [Open WebUI Documentation](https://docs.openwebui.com/).
+
+## [0.1.102] - 2024-02-22
+
+### Added
+
+- **🖼️ Image Generation**: Generate Images using the AUTOMATIC1111/stable-diffusion-webui API. You can set this up in Settings > Images.
+- **📝 Change title generation prompt**: Change the prompt used to generate titles for your chats. You can set this up in the Settings > Interface.
+- **🤖 Change embedding model**: Change the embedding model used to generate embeddings for your chats in the Dockerfile. Use any sentence transformer model from huggingface.co.
+- **📢 CHANGELOG.md/Popup**: This popup will show you the latest changes.
+
+## [0.1.101] - 2024-02-22
+
+### Fixed
+
+- LaTex output formatting issue (#828)
+
+### Changed
+
+- Instead of having the previous 1.0.0-alpha.101, we switched to semantic versioning as a way to respect global conventions.
diff --git a/Dockerfile b/Dockerfile
index 520c2964..de501838 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -20,7 +20,7 @@ FROM python:3.11-slim-bookworm as base
ENV ENV=prod
ENV PORT ""
-ENV OLLAMA_API_BASE_URL "/ollama/api"
+ENV OLLAMA_BASE_URL "/ollama"
ENV OPENAI_API_BASE_URL ""
ENV OPENAI_API_KEY ""
@@ -30,15 +30,31 @@ ENV WEBUI_SECRET_KEY ""
ENV SCARF_NO_ANALYTICS true
ENV DO_NOT_TRACK true
-#Whisper TTS Settings
+######## Preloaded models ########
+# whisper TTS Settings
ENV WHISPER_MODEL="base"
ENV WHISPER_MODEL_DIR="/app/backend/data/cache/whisper/models"
+# RAG Embedding Model Settings
+# any sentence transformer model; models to use can be found at https://huggingface.co/models?library=sentence-transformers
+# Leaderboard: https://huggingface.co/spaces/mteb/leaderboard
+# for better persormance and multilangauge support use "intfloat/multilingual-e5-large" (~2.5GB) or "intfloat/multilingual-e5-base" (~1.5GB)
+# IMPORTANT: If you change the default model (all-MiniLM-L6-v2) and vice versa, you aren't able to use RAG Chat with your previous documents loaded in the WebUI! You need to re-embed them.
+ENV RAG_EMBEDDING_MODEL="all-MiniLM-L6-v2"
+# device type for whisper tts and embbeding models - "cpu" (default), "cuda" (nvidia gpu and CUDA required) or "mps" (apple silicon) - choosing this right can lead to better performance
+ENV RAG_EMBEDDING_MODEL_DEVICE_TYPE="cpu"
+ENV RAG_EMBEDDING_MODEL_DIR="/app/backend/data/cache/embedding/models"
+ENV SENTENCE_TRANSFORMERS_HOME $RAG_EMBEDDING_MODEL_DIR
+
+######## Preloaded models ########
+
WORKDIR /app/backend
# install python dependencies
COPY ./backend/requirements.txt ./requirements.txt
+RUN apt-get update && apt-get install ffmpeg libsm6 libxext6 -y
+
RUN pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu --no-cache-dir
RUN pip3 install -r requirements.txt --no-cache-dir
@@ -48,9 +64,10 @@ RUN apt-get update \
&& apt-get install -y pandoc netcat-openbsd \
&& rm -rf /var/lib/apt/lists/*
-# RUN python -c "from sentence_transformers import SentenceTransformer; model = SentenceTransformer('all-MiniLM-L6-v2')"
-RUN python -c "import os; from faster_whisper import WhisperModel; WhisperModel(os.environ['WHISPER_MODEL'], device='cpu', compute_type='int8', download_root=os.environ['WHISPER_MODEL_DIR'])"
-
+# preload embedding model
+RUN python -c "import os; from chromadb.utils import embedding_functions; sentence_transformer_ef = embedding_functions.SentenceTransformerEmbeddingFunction(model_name=os.environ['RAG_EMBEDDING_MODEL'], device=os.environ['RAG_EMBEDDING_MODEL_DEVICE_TYPE'])"
+# preload tts model
+RUN python -c "import os; from faster_whisper import WhisperModel; WhisperModel(os.environ['WHISPER_MODEL'], device='auto', compute_type='int8', download_root=os.environ['WHISPER_MODEL_DIR'])"
# copy embedding weight from build
RUN mkdir -p /root/.cache/chroma/onnx_models/all-MiniLM-L6-v2
@@ -58,8 +75,10 @@ COPY --from=build /app/onnx /root/.cache/chroma/onnx_models/all-MiniLM-L6-v2/onn
# copy built frontend files
COPY --from=build /app/build /app/build
+COPY --from=build /app/CHANGELOG.md /app/CHANGELOG.md
+COPY --from=build /app/package.json /app/package.json
# copy backend files
COPY ./backend .
-CMD [ "bash", "start.sh"]
\ No newline at end of file
+CMD [ "bash", "start.sh"]
diff --git a/Makefile b/Makefile
new file mode 100644
index 00000000..cbcc41d9
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,25 @@
+install:
+ @docker-compose up -d
+
+remove:
+ @chmod +x confirm_remove.sh
+ @./confirm_remove.sh
+
+
+start:
+ @docker-compose start
+
+stop:
+ @docker-compose stop
+
+update:
+ # Calls the LLM update script
+ chmod +x update_ollama_models.sh
+ @./update_ollama_models.sh
+ @git pull
+ @docker-compose down
+ # Make sure the ollama-webui container is stopped before rebuilding
+ @docker stop open-webui || true
+ @docker-compose up --build -d
+ @docker-compose start
+
diff --git a/README.md b/README.md
index a1d089e7..e2ee284e 100644
--- a/README.md
+++ b/README.md
@@ -1,22 +1,20 @@
# Open WebUI (Formerly Ollama WebUI) 👋
-![GitHub stars](https://img.shields.io/github/stars/ollama-webui/ollama-webui?style=social)
-![GitHub forks](https://img.shields.io/github/forks/ollama-webui/ollama-webui?style=social)
-![GitHub watchers](https://img.shields.io/github/watchers/ollama-webui/ollama-webui?style=social)
-![GitHub repo size](https://img.shields.io/github/repo-size/ollama-webui/ollama-webui)
-![GitHub language count](https://img.shields.io/github/languages/count/ollama-webui/ollama-webui)
-![GitHub top language](https://img.shields.io/github/languages/top/ollama-webui/ollama-webui)
-![GitHub last commit](https://img.shields.io/github/last-commit/ollama-webui/ollama-webui?color=red)
+![GitHub stars](https://img.shields.io/github/stars/open-webui/open-webui?style=social)
+![GitHub forks](https://img.shields.io/github/forks/open-webui/open-webui?style=social)
+![GitHub watchers](https://img.shields.io/github/watchers/open-webui/open-webui?style=social)
+![GitHub repo size](https://img.shields.io/github/repo-size/open-webui/open-webui)
+![GitHub language count](https://img.shields.io/github/languages/count/open-webui/open-webui)
+![GitHub top language](https://img.shields.io/github/languages/top/open-webui/open-webui)
+![GitHub last commit](https://img.shields.io/github/last-commit/open-webui/open-webui?color=red)
![Hits](https://hits.seeyoufarm.com/api/count/incr/badge.svg?url=https%3A%2F%2Fgithub.com%2Follama-webui%2Follama-wbui&count_bg=%2379C83D&title_bg=%23555555&icon=&icon_color=%23E7E7E7&title=hits&edge_flat=false)
[![Discord](https://img.shields.io/badge/Discord-Open_WebUI-blue?logo=discord&logoColor=white)](https://discord.gg/5rJgQTnV4s)
[![](https://img.shields.io/static/v1?label=Sponsor&message=%E2%9D%A4&logo=GitHub&color=%23fe8e86)](https://github.com/sponsors/tjbck)
-ChatGPT-Style Web Interface for Ollama 🦙
+Open WebUI is an extensible, feature-rich, and user-friendly self-hosted WebUI designed to operate entirely offline. It supports various LLM runners, including Ollama and OpenAI-compatible APIs. For more information, be sure to check out our [Open WebUI Documentation](https://docs.openwebui.com/).
![Open WebUI Demo](./demo.gif)
-Also check our sibling project, [Open WebUI Community](https://openwebui.com/), where you can discover, download, and explore customized Modelfiles for Ollama! 🦙🔍
-
## Features ⭐
- 🖥️ **Intuitive Interface**: Our chat interface takes inspiration from ChatGPT, ensuring a user-friendly experience.
@@ -55,8 +53,6 @@ Also check our sibling project, [Open WebUI Community](https://openwebui.com/),
- 💬 **Collaborative Chat**: Harness the collective intelligence of multiple models by seamlessly orchestrating group conversations. Use the `@` command to specify the model, enabling dynamic and diverse dialogues within your chat interface. Immerse yourself in the collective intelligence woven into your chat environment.
-- 🤝 **OpenAI API Integration**: Effortlessly integrate OpenAI-compatible API for versatile conversations alongside Ollama models. Customize the API Base URL to link with **LMStudio, Mistral, OpenRouter, and more**.
-
- 🔄 **Regeneration History Access**: Easily revisit and explore your entire regeneration history.
- 📜 **Chat History**: Effortlessly access and manage your conversation history.
@@ -67,66 +63,39 @@ Also check our sibling project, [Open WebUI Community](https://openwebui.com/),
- ⚙️ **Fine-Tuned Control with Advanced Parameters**: Gain a deeper level of control by adjusting parameters such as temperature and defining your system prompts to tailor the conversation to your specific preferences and needs.
+- 🎨🤖 **Image Generation Integration**: Seamlessly incorporate image generation capabilities using AUTOMATIC1111 API (local) and DALL-E, enriching your chat experience with dynamic visual content.
+
+- 🤝 **OpenAI API Integration**: Effortlessly integrate OpenAI-compatible API for versatile conversations alongside Ollama models. Customize the API Base URL to link with **LMStudio, Mistral, OpenRouter, and more**.
+
+- ✨ **Multiple OpenAI-Compatible API Support**: Seamlessly integrate and customize various OpenAI-compatible APIs, enhancing the versatility of your chat interactions.
+
- 🔗 **External Ollama Server Connection**: Seamlessly link to an external Ollama server hosted on a different address by configuring the environment variable.
+- 🔀 **Multiple Ollama Instance Load Balancing**: Effortlessly distribute chat requests across multiple Ollama instances for enhanced performance and reliability.
+
+- 👥 **Multi-User Management**: Easily oversee and administer users via our intuitive admin panel, streamlining user management processes.
+
- 🔐 **Role-Based Access Control (RBAC)**: Ensure secure access with restricted permissions; only authorized individuals can access your Ollama, and exclusive model creation/pulling rights are reserved for administrators.
- 🔒 **Backend Reverse Proxy Support**: Bolster security through direct communication between Open WebUI backend and Ollama. This key feature eliminates the need to expose Ollama over LAN. Requests made to the '/ollama/api' route from the web UI are seamlessly redirected to Ollama from the backend, enhancing overall system security.
+- 🌐🌍 **Multilingual Support**: Experience Open WebUI in your preferred language with our internationalization (i18n) support. Join us in expanding our supported languages! We're actively seeking contributors!
+
- 🌟 **Continuous Updates**: We are committed to improving Open WebUI with regular updates and new features.
## 🔗 Also Check Out Open WebUI Community!
-Don't forget to explore our sibling project, [Open WebUI Community](https://openwebui.com/), where you can discover, download, and explore customized Modelfiles. Open WebUI Community offers a wide range of exciting possibilities for enhancing your chat interactions with Ollama! 🚀
+Don't forget to explore our sibling project, [Open WebUI Community](https://openwebui.com/), where you can discover, download, and explore customized Modelfiles. Open WebUI Community offers a wide range of exciting possibilities for enhancing your chat interactions with Open WebUI! 🚀
## How to Install 🚀
-🌟 **Important Note on User Roles and Privacy:**
+> [!NOTE]
+> Please note that for certain Docker environments, additional configurations might be needed. If you encounter any connection issues, our detailed guide on [Open WebUI Documentation](https://docs.openwebui.com/) is ready to assist you.
-- **Admin Creation:** The very first account to sign up on the Open WebUI will be granted **Administrator privileges**. This account will have comprehensive control over the platform, including user management and system settings.
+### Quick Start with Docker 🐳
-- **User Registrations:** All subsequent users signing up will initially have their accounts set to **Pending** status by default. These accounts will require approval from the Administrator to gain access to the platform functionalities.
-
-- **Privacy and Data Security:** We prioritize your privacy and data security above all. Please be reassured that all data entered into the Open WebUI is stored locally on your device. Our system is designed to be privacy-first, ensuring that no external requests are made, and your data does not leave your local environment. We are committed to maintaining the highest standards of data privacy and security, ensuring that your information remains confidential and under your control.
-
-### Steps to Install Open WebUI
-
-#### Before You Begin
-
-1. **Installing Docker:**
-
- - **For Windows and Mac Users:**
-
- - Download Docker Desktop from [Docker's official website](https://www.docker.com/products/docker-desktop).
- - Follow the installation instructions provided on the website. After installation, open Docker Desktop to ensure it's running properly.
-
- - **For Ubuntu and Other Linux Users:**
- - Open your terminal.
- - Set up your Docker apt repository according to the [Docker documentation](https://docs.docker.com/engine/install/ubuntu/#install-using-the-repository)
- - Update your package index:
- ```bash
- sudo apt-get update
- ```
- - Install Docker using the following command:
- ```bash
- sudo apt-get install docker-ce docker-ce-cli containerd.io
- ```
- - Verify the Docker installation with:
- ```bash
- sudo docker run hello-world
- ```
- This command downloads a test image and runs it in a container, which prints an informational message.
-
-2. **Ensure You Have the Latest Version of Ollama:**
-
- - Download the latest version from [https://ollama.com/](https://ollama.com/).
-
-3. **Verify Ollama Installation:**
- - After installing Ollama, check if it's working by visiting [http://127.0.0.1:11434/](http://127.0.0.1:11434/) in your web browser. Remember, the port number might be different for you.
-
-#### Installing with Docker 🐳
-
-- **Important:** When using Docker to install Open WebUI, make sure to include the `-v open-webui:/app/backend/data` in your Docker command. This step is crucial as it ensures your database is properly mounted and prevents any loss of data.
+> [!IMPORTANT]
+> When using Docker to install Open WebUI, make sure to include the `-v open-webui:/app/backend/data` in your Docker command. This step is crucial as it ensures your database is properly mounted and prevents any loss of data.
- **If Ollama is on your computer**, use this command:
@@ -134,158 +103,51 @@ Don't forget to explore our sibling project, [Open WebUI Community](https://open
docker run -d -p 3000:8080 --add-host=host.docker.internal:host-gateway -v open-webui:/app/backend/data --name open-webui --restart always ghcr.io/open-webui/open-webui:main
```
-- **To build the container yourself**, follow these steps:
+- **If Ollama is on a Different Server**, use this command:
+
+- To connect to Ollama on another server, change the `OLLAMA_BASE_URL` to the server's URL:
```bash
- docker build -t open-webui .
- docker run -d -p 3000:8080 --add-host=host.docker.internal:host-gateway -v open-webui:/app/backend/data --name open-webui --restart always open-webui
+ docker run -d -p 3000:8080 -e OLLAMA_BASE_URL=https://example.com -v open-webui:/app/backend/data --name open-webui --restart always ghcr.io/open-webui/open-webui:main
```
-- After installation, you can access Open WebUI at [http://localhost:3000](http://localhost:3000).
+- After installation, you can access Open WebUI at [http://localhost:3000](http://localhost:3000). Enjoy! 😄
-#### Using Ollama on a Different Server
+#### Open WebUI: Server Connection Error
-- To connect to Ollama on another server, change the `OLLAMA_API_BASE_URL` to the server's URL:
+If you're experiencing connection issues, it’s often due to the WebUI docker container not being able to reach the Ollama server at 127.0.0.1:11434 (host.docker.internal:11434) inside the container . Use the `--network=host` flag in your docker command to resolve this. Note that the port changes from 3000 to 8080, resulting in the link: `http://localhost:8080`.
- ```bash
- docker run -d -p 3000:8080 -e OLLAMA_API_BASE_URL=https://example.com/api -v open-webui:/app/backend/data --name open-webui --restart always ghcr.io/open-webui/open-webui:main
- ```
-
- Or for a self-built container:
-
- ```bash
- docker build -t open-webui .
- docker run -d -p 3000:8080 -e OLLAMA_API_BASE_URL=https://example.com/api -v open-webui:/app/backend/data --name open-webui --restart always open-webui
- ```
-
-### Installing Ollama and Open WebUI Together
-
-#### Using Docker Compose
-
-- If you don't have Ollama yet, use Docker Compose for easy installation. Run this command:
-
- ```bash
- docker compose up -d --build
- ```
-
-- **For GPU Support:** Use an additional Docker Compose file:
-
- ```bash
- docker compose -f docker-compose.yaml -f docker-compose.gpu.yaml up -d --build
- ```
-
-- **To Expose Ollama API:** Use another Docker Compose file:
-
- ```bash
- docker compose -f docker-compose.yaml -f docker-compose.api.yaml up -d --build
- ```
-
-#### Using `run-compose.sh` Script (Linux or Docker-Enabled WSL2 on Windows)
-
-- Give execute permission to the script:
-
- ```bash
- chmod +x run-compose.sh
- ```
-
-- For CPU-only container:
-
- ```bash
- ./run-compose.sh
- ```
-
-- For GPU support (read the note about GPU compatibility):
-
- ```bash
- ./run-compose.sh --enable-gpu
- ```
-
-- To build the latest local version, add `--build`:
-
- ```bash
- ./run-compose.sh --enable-gpu --build
- ```
-
-### Alternative Installation Methods
-
-For other ways to install, like using Kustomize or Helm, check out [INSTALLATION.md](/INSTALLATION.md). Join our [Open WebUI Discord community](https://discord.gg/5rJgQTnV4s) for more help and information.
-
-### Updating your Docker Installation
-
-In case you want to update your local Docker installation to the latest version, you can do it performing the following actions:
+**Example Docker Command**:
```bash
-docker rm -f open-webui
-docker pull ghcr.io/open-webui/open-webui:main
-[insert command you used to install]
+docker run -d --network=host -v open-webui:/app/backend/data -e OLLAMA_BASE_URL=http://127.0.0.1:11434 --name open-webui --restart always ghcr.io/open-webui/open-webui:main
```
-In the last line, you need to use the very same command you used to install (local install, remote server, etc.)
+### Other Installation Methods
-## How to Install Without Docker
+We offer various installation alternatives, including non-Docker methods, Docker Compose, Kustomize, and Helm. Visit our [Open WebUI Documentation](https://docs.openwebui.com/getting-started/) or join our [Discord community](https://discord.gg/5rJgQTnV4s) for comprehensive guidance.
-While we strongly recommend using our convenient Docker container installation for optimal support, we understand that some situations may require a non-Docker setup, especially for development purposes. Please note that non-Docker installations are not officially supported, and you might need to troubleshoot on your own.
+### Troubleshooting
-### Project Components
+Encountering connection issues? Our [Open WebUI Documentation](https://docs.openwebui.com/getting-started/troubleshooting/) has got you covered. For further assistance and to join our vibrant community, visit the [Open WebUI Discord](https://discord.gg/5rJgQTnV4s).
-The Open WebUI consists of two primary components: the frontend and the backend (which serves as a reverse proxy, handling static frontend files, and additional features). Both need to be running concurrently for the development environment.
+### Keeping Your Docker Installation Up-to-Date
-> [!IMPORTANT]
-> The backend is required for proper functionality
+In case you want to update your local Docker installation to the latest version, you can do it with [Watchtower](https://containrrr.dev/watchtower/):
-### Requirements 📦
-
-- 🐰 [Bun](https://bun.sh) >= 1.0.21 or 🐢 [Node.js](https://nodejs.org/en) >= 20.10
-- 🐍 [Python](https://python.org) >= 3.11
-
-### Build and Install 🛠️
-
-Run the following commands to install:
-
-```sh
-git clone https://github.com/open-webui/open-webui.git
-cd open-webui/
-
-# Copying required .env file
-cp -RPp example.env .env
-
-# Building Frontend Using Node
-npm i
-npm run build
-
-# or Building Frontend Using Bun
-# bun install
-# bun run build
-
-# Serving Frontend with the Backend
-cd ./backend
-pip install -r requirements.txt -U
-sh start.sh
+```bash
+docker run --rm --volume /var/run/docker.sock:/var/run/docker.sock containrrr/watchtower --run-once open-webui
```
-You should have the Open WebUI up and running at http://localhost:8080/. Enjoy! 😄
+In the last part of the command, replace `open-webui` with your container name if it is different.
-## Troubleshooting
+### Moving from Ollama WebUI to Open WebUI
-See [TROUBLESHOOTING.md](/TROUBLESHOOTING.md) for information on how to troubleshoot and/or join our [Open WebUI Discord community](https://discord.gg/5rJgQTnV4s).
+Check our Migration Guide available in our [Open WebUI Documentation](https://docs.openwebui.com/migration/).
-## What's Next? 🚀
+## What's Next? 🌟
-### Roadmap 📝
-
-Here are some exciting tasks on our roadmap:
-
-- 🔊 **Local Text-to-Speech Integration**: Seamlessly incorporate text-to-speech functionality directly within the platform, allowing for a smoother and more immersive user experience.
-- 🛡️ **Granular Permissions and User Groups**: Empower administrators to finely control access levels and group users according to their roles and responsibilities. This feature ensures robust security measures and streamlined management of user privileges, enhancing overall platform functionality.
-- 🔄 **Function Calling**: Empower your interactions by running code directly within the chat. Execute functions and commands effortlessly, enhancing the functionality of your conversations.
-- ⚙️ **Custom Python Backend Actions**: Empower your Open WebUI by creating or downloading custom Python backend actions. Unleash the full potential of your web interface with tailored actions that suit your specific needs, enhancing functionality and versatility.
-- 🔧 **Fine-tune Model (LoRA)**: Fine-tune your model directly from the user interface. This feature allows for precise customization and optimization of the chat experience to better suit your needs and preferences.
-- 🧠 **Long-Term Memory**: Witness the power of persistent memory in our agents. Enjoy conversations that feel continuous as agents remember and reference past interactions, creating a more cohesive and personalized user experience.
-- 🧪 **Research-Centric Features**: Empower researchers in the fields of LLM and HCI with a comprehensive web UI for conducting user studies. Stay tuned for ongoing feature enhancements (e.g., surveys, analytics, and participant tracking) to facilitate their research.
-- 📈 **User Study Tools**: Providing specialized tools, like heat maps and behavior tracking modules, to empower researchers in capturing and analyzing user behavior patterns with precision and accuracy.
-- 📚 **Enhanced Documentation**: Elevate your setup and customization experience with improved, comprehensive documentation.
-
-Feel free to contribute and help us make Open WebUI even better! 🙌
+Discover upcoming features on our roadmap in the [Open WebUI Documentation](https://docs.openwebui.com/roadmap/).
## Supporters ✨
@@ -308,6 +170,16 @@ This project is licensed under the [MIT License](LICENSE) - see the [LICENSE](LI
If you have any questions, suggestions, or need assistance, please open an issue or join our
[Open WebUI Discord community](https://discord.gg/5rJgQTnV4s) to connect with us! 🤝
+## Star History
+
+
+
+
+
---
Created by [Timothy J. Baek](https://github.com/tjbck) - Let's make Open Web UI even more amazing together! 💪
diff --git a/TROUBLESHOOTING.md b/TROUBLESHOOTING.md
index d3163501..8e8f89da 100644
--- a/TROUBLESHOOTING.md
+++ b/TROUBLESHOOTING.md
@@ -4,7 +4,7 @@
The Open WebUI system is designed to streamline interactions between the client (your browser) and the Ollama API. At the heart of this design is a backend reverse proxy, enhancing security and resolving CORS issues.
-- **How it Works**: The Open WebUI is designed to interact with the Ollama API through a specific route. When a request is made from the WebUI to Ollama, it is not directly sent to the Ollama API. Initially, the request is sent to the Open WebUI backend via `/ollama/api` route. From there, the backend is responsible for forwarding the request to the Ollama API. This forwarding is accomplished by using the route specified in the `OLLAMA_API_BASE_URL` environment variable. Therefore, a request made to `/ollama/api` in the WebUI is effectively the same as making a request to `OLLAMA_API_BASE_URL` in the backend. For instance, a request to `/ollama/api/tags` in the WebUI is equivalent to `OLLAMA_API_BASE_URL/tags` in the backend.
+- **How it Works**: The Open WebUI is designed to interact with the Ollama API through a specific route. When a request is made from the WebUI to Ollama, it is not directly sent to the Ollama API. Initially, the request is sent to the Open WebUI backend via `/ollama` route. From there, the backend is responsible for forwarding the request to the Ollama API. This forwarding is accomplished by using the route specified in the `OLLAMA_BASE_URL` environment variable. Therefore, a request made to `/ollama` in the WebUI is effectively the same as making a request to `OLLAMA_BASE_URL` in the backend. For instance, a request to `/ollama/api/tags` in the WebUI is equivalent to `OLLAMA_BASE_URL/api/tags` in the backend.
- **Security Benefits**: This design prevents direct exposure of the Ollama API to the frontend, safeguarding against potential CORS (Cross-Origin Resource Sharing) issues and unauthorized access. Requiring authentication to access the Ollama API further enhances this security layer.
@@ -15,7 +15,7 @@ If you're experiencing connection issues, it’s often due to the WebUI docker c
**Example Docker Command**:
```bash
-docker run -d --network=host -v open-webui:/app/backend/data -e OLLAMA_API_BASE_URL=http://127.0.0.1:11434/api --name open-webui --restart always ghcr.io/open-webui/open-webui:main
+docker run -d --network=host -v open-webui:/app/backend/data -e OLLAMA_BASE_URL=http://127.0.0.1:11434 --name open-webui --restart always ghcr.io/open-webui/open-webui:main
```
### General Connection Errors
@@ -25,8 +25,8 @@ docker run -d --network=host -v open-webui:/app/backend/data -e OLLAMA_API_BASE_
**Troubleshooting Steps**:
1. **Verify Ollama URL Format**:
- - When running the Web UI container, ensure the `OLLAMA_API_BASE_URL` is correctly set, including the `/api` suffix. (e.g., `http://192.168.1.1:11434/api` for different host setups).
+ - When running the Web UI container, ensure the `OLLAMA_BASE_URL` is correctly set. (e.g., `http://192.168.1.1:11434` for different host setups).
- In the Open WebUI, navigate to "Settings" > "General".
- - Confirm that the Ollama Server URL is correctly set to `[OLLAMA URL]/api` (e.g., `http://localhost:11434/api`), including the `/api` suffix.
+ - Confirm that the Ollama Server URL is correctly set to `[OLLAMA URL]` (e.g., `http://localhost:11434`).
By following these enhanced troubleshooting steps, connection issues should be effectively resolved. For further assistance or queries, feel free to reach out to us on our community Discord.
diff --git a/backend/.dockerignore b/backend/.dockerignore
index 11f9256f..97ab3283 100644
--- a/backend/.dockerignore
+++ b/backend/.dockerignore
@@ -4,4 +4,11 @@ _old
uploads
.ipynb_checkpoints
*.db
-_test
\ No newline at end of file
+_test
+!/data
+/data/*
+!/data/litellm
+/data/litellm/*
+!data/litellm/config.yaml
+
+!data/config.json
\ No newline at end of file
diff --git a/backend/.gitignore b/backend/.gitignore
index 4dd0b849..ea83b34f 100644
--- a/backend/.gitignore
+++ b/backend/.gitignore
@@ -6,5 +6,11 @@ uploads
*.db
_test
Pipfile
-data/*
+!/data
+/data/*
+!/data/litellm
+/data/litellm/*
+!data/litellm/config.yaml
+
+!data/config.json
.webui_secret_key
\ No newline at end of file
diff --git a/backend/apps/audio/main.py b/backend/apps/audio/main.py
index 86e79c47..d8cb415f 100644
--- a/backend/apps/audio/main.py
+++ b/backend/apps/audio/main.py
@@ -56,7 +56,7 @@ def transcribe(
model = WhisperModel(
WHISPER_MODEL,
- device="cpu",
+ device="auto",
compute_type="int8",
download_root=WHISPER_MODEL_DIR,
)
diff --git a/backend/apps/images/main.py b/backend/apps/images/main.py
new file mode 100644
index 00000000..e14b0f6a
--- /dev/null
+++ b/backend/apps/images/main.py
@@ -0,0 +1,365 @@
+import re
+import requests
+from fastapi import (
+ FastAPI,
+ Request,
+ Depends,
+ HTTPException,
+ status,
+ UploadFile,
+ File,
+ Form,
+)
+from fastapi.middleware.cors import CORSMiddleware
+from faster_whisper import WhisperModel
+
+from constants import ERROR_MESSAGES
+from utils.utils import (
+ get_current_user,
+ get_admin_user,
+)
+from utils.misc import calculate_sha256
+from typing import Optional
+from pydantic import BaseModel
+from pathlib import Path
+import uuid
+import base64
+import json
+
+from config import CACHE_DIR, AUTOMATIC1111_BASE_URL
+
+
+IMAGE_CACHE_DIR = Path(CACHE_DIR).joinpath("./image/generations/")
+IMAGE_CACHE_DIR.mkdir(parents=True, exist_ok=True)
+
+app = FastAPI()
+app.add_middleware(
+ CORSMiddleware,
+ allow_origins=["*"],
+ allow_credentials=True,
+ allow_methods=["*"],
+ allow_headers=["*"],
+)
+
+app.state.ENGINE = ""
+app.state.ENABLED = False
+
+app.state.OPENAI_API_KEY = ""
+app.state.MODEL = ""
+
+
+app.state.AUTOMATIC1111_BASE_URL = AUTOMATIC1111_BASE_URL
+
+app.state.IMAGE_SIZE = "512x512"
+app.state.IMAGE_STEPS = 50
+
+
+@app.get("/config")
+async def get_config(request: Request, user=Depends(get_admin_user)):
+ return {"engine": app.state.ENGINE, "enabled": app.state.ENABLED}
+
+
+class ConfigUpdateForm(BaseModel):
+ engine: str
+ enabled: bool
+
+
+@app.post("/config/update")
+async def update_config(form_data: ConfigUpdateForm, user=Depends(get_admin_user)):
+ app.state.ENGINE = form_data.engine
+ app.state.ENABLED = form_data.enabled
+ return {"engine": app.state.ENGINE, "enabled": app.state.ENABLED}
+
+
+class UrlUpdateForm(BaseModel):
+ url: str
+
+
+@app.get("/url")
+async def get_automatic1111_url(user=Depends(get_admin_user)):
+ return {"AUTOMATIC1111_BASE_URL": app.state.AUTOMATIC1111_BASE_URL}
+
+
+@app.post("/url/update")
+async def update_automatic1111_url(
+ form_data: UrlUpdateForm, user=Depends(get_admin_user)
+):
+
+ if form_data.url == "":
+ app.state.AUTOMATIC1111_BASE_URL = AUTOMATIC1111_BASE_URL
+ else:
+ url = form_data.url.strip("/")
+ try:
+ r = requests.head(url)
+ app.state.AUTOMATIC1111_BASE_URL = url
+ except Exception as e:
+ raise HTTPException(status_code=400, detail=ERROR_MESSAGES.DEFAULT(e))
+
+ return {
+ "AUTOMATIC1111_BASE_URL": app.state.AUTOMATIC1111_BASE_URL,
+ "status": True,
+ }
+
+
+class OpenAIKeyUpdateForm(BaseModel):
+ key: str
+
+
+@app.get("/key")
+async def get_openai_key(user=Depends(get_admin_user)):
+ return {"OPENAI_API_KEY": app.state.OPENAI_API_KEY}
+
+
+@app.post("/key/update")
+async def update_openai_key(
+ form_data: OpenAIKeyUpdateForm, user=Depends(get_admin_user)
+):
+
+ if form_data.key == "":
+ raise HTTPException(status_code=400, detail=ERROR_MESSAGES.API_KEY_NOT_FOUND)
+
+ app.state.OPENAI_API_KEY = form_data.key
+ return {
+ "OPENAI_API_KEY": app.state.OPENAI_API_KEY,
+ "status": True,
+ }
+
+
+class ImageSizeUpdateForm(BaseModel):
+ size: str
+
+
+@app.get("/size")
+async def get_image_size(user=Depends(get_admin_user)):
+ return {"IMAGE_SIZE": app.state.IMAGE_SIZE}
+
+
+@app.post("/size/update")
+async def update_image_size(
+ form_data: ImageSizeUpdateForm, user=Depends(get_admin_user)
+):
+ pattern = r"^\d+x\d+$" # Regular expression pattern
+ if re.match(pattern, form_data.size):
+ app.state.IMAGE_SIZE = form_data.size
+ return {
+ "IMAGE_SIZE": app.state.IMAGE_SIZE,
+ "status": True,
+ }
+ else:
+ raise HTTPException(
+ status_code=400,
+ detail=ERROR_MESSAGES.INCORRECT_FORMAT(" (e.g., 512x512)."),
+ )
+
+
+class ImageStepsUpdateForm(BaseModel):
+ steps: int
+
+
+@app.get("/steps")
+async def get_image_size(user=Depends(get_admin_user)):
+ return {"IMAGE_STEPS": app.state.IMAGE_STEPS}
+
+
+@app.post("/steps/update")
+async def update_image_size(
+ form_data: ImageStepsUpdateForm, user=Depends(get_admin_user)
+):
+ if form_data.steps >= 0:
+ app.state.IMAGE_STEPS = form_data.steps
+ return {
+ "IMAGE_STEPS": app.state.IMAGE_STEPS,
+ "status": True,
+ }
+ else:
+ raise HTTPException(
+ status_code=400,
+ detail=ERROR_MESSAGES.INCORRECT_FORMAT(" (e.g., 50)."),
+ )
+
+
+@app.get("/models")
+def get_models(user=Depends(get_current_user)):
+ try:
+ if app.state.ENGINE == "openai":
+ return [
+ {"id": "dall-e-2", "name": "DALL·E 2"},
+ {"id": "dall-e-3", "name": "DALL·E 3"},
+ ]
+ else:
+ r = requests.get(
+ url=f"{app.state.AUTOMATIC1111_BASE_URL}/sdapi/v1/sd-models"
+ )
+ models = r.json()
+ return list(
+ map(
+ lambda model: {"id": model["title"], "name": model["model_name"]},
+ models,
+ )
+ )
+ except Exception as e:
+ app.state.ENABLED = False
+ raise HTTPException(status_code=400, detail=ERROR_MESSAGES.DEFAULT(e))
+
+
+@app.get("/models/default")
+async def get_default_model(user=Depends(get_admin_user)):
+ try:
+ if app.state.ENGINE == "openai":
+ return {"model": app.state.MODEL if app.state.MODEL else "dall-e-2"}
+ else:
+ r = requests.get(url=f"{app.state.AUTOMATIC1111_BASE_URL}/sdapi/v1/options")
+ options = r.json()
+ return {"model": options["sd_model_checkpoint"]}
+ except Exception as e:
+ app.state.ENABLED = False
+ raise HTTPException(status_code=400, detail=ERROR_MESSAGES.DEFAULT(e))
+
+
+class UpdateModelForm(BaseModel):
+ model: str
+
+
+def set_model_handler(model: str):
+
+ if app.state.ENGINE == "openai":
+ app.state.MODEL = model
+ return app.state.MODEL
+ else:
+ r = requests.get(url=f"{app.state.AUTOMATIC1111_BASE_URL}/sdapi/v1/options")
+ options = r.json()
+
+ if model != options["sd_model_checkpoint"]:
+ options["sd_model_checkpoint"] = model
+ r = requests.post(
+ url=f"{app.state.AUTOMATIC1111_BASE_URL}/sdapi/v1/options", json=options
+ )
+
+ return options
+
+
+@app.post("/models/default/update")
+def update_default_model(
+ form_data: UpdateModelForm,
+ user=Depends(get_current_user),
+):
+ return set_model_handler(form_data.model)
+
+
+class GenerateImageForm(BaseModel):
+ model: Optional[str] = None
+ prompt: str
+ n: int = 1
+ size: Optional[str] = None
+ negative_prompt: Optional[str] = None
+
+
+def save_b64_image(b64_str):
+ image_id = str(uuid.uuid4())
+ file_path = IMAGE_CACHE_DIR.joinpath(f"{image_id}.png")
+
+ try:
+ # Split the base64 string to get the actual image data
+ img_data = base64.b64decode(b64_str)
+
+ # Write the image data to a file
+ with open(file_path, "wb") as f:
+ f.write(img_data)
+
+ return image_id
+ except Exception as e:
+ print(f"Error saving image: {e}")
+ return None
+
+
+@app.post("/generations")
+def generate_image(
+ form_data: GenerateImageForm,
+ user=Depends(get_current_user),
+):
+
+ r = None
+ try:
+ if app.state.ENGINE == "openai":
+
+ headers = {}
+ headers["Authorization"] = f"Bearer {app.state.OPENAI_API_KEY}"
+ headers["Content-Type"] = "application/json"
+
+ data = {
+ "model": app.state.MODEL if app.state.MODEL != "" else "dall-e-2",
+ "prompt": form_data.prompt,
+ "n": form_data.n,
+ "size": form_data.size if form_data.size else app.state.IMAGE_SIZE,
+ "response_format": "b64_json",
+ }
+
+ r = requests.post(
+ url=f"https://api.openai.com/v1/images/generations",
+ json=data,
+ headers=headers,
+ )
+
+ r.raise_for_status()
+ res = r.json()
+
+ images = []
+
+ for image in res["data"]:
+ image_id = save_b64_image(image["b64_json"])
+ images.append({"url": f"/cache/image/generations/{image_id}.png"})
+ file_body_path = IMAGE_CACHE_DIR.joinpath(f"{image_id}.json")
+
+ with open(file_body_path, "w") as f:
+ json.dump(data, f)
+
+ return images
+
+ else:
+ if form_data.model:
+ set_model_handler(form_data.model)
+
+ width, height = tuple(map(int, app.state.IMAGE_SIZE.split("x")))
+
+ data = {
+ "prompt": form_data.prompt,
+ "batch_size": form_data.n,
+ "width": width,
+ "height": height,
+ }
+
+ if app.state.IMAGE_STEPS != None:
+ data["steps"] = app.state.IMAGE_STEPS
+
+ if form_data.negative_prompt != None:
+ data["negative_prompt"] = form_data.negative_prompt
+
+ r = requests.post(
+ url=f"{app.state.AUTOMATIC1111_BASE_URL}/sdapi/v1/txt2img",
+ json=data,
+ )
+
+ res = r.json()
+
+ print(res)
+
+ images = []
+
+ for image in res["images"]:
+ image_id = save_b64_image(image)
+ images.append({"url": f"/cache/image/generations/{image_id}.png"})
+ file_body_path = IMAGE_CACHE_DIR.joinpath(f"{image_id}.json")
+
+ with open(file_body_path, "w") as f:
+ json.dump({**data, "info": res["info"]}, f)
+
+ return images
+
+ except Exception as e:
+ error = e
+
+ if r != None:
+ data = r.json()
+ if "error" in data:
+ error = data["error"]["message"]
+ raise HTTPException(status_code=400, detail=ERROR_MESSAGES.DEFAULT(error))
diff --git a/backend/apps/litellm/main.py b/backend/apps/litellm/main.py
new file mode 100644
index 00000000..838b4707
--- /dev/null
+++ b/backend/apps/litellm/main.py
@@ -0,0 +1,95 @@
+from litellm.proxy.proxy_server import ProxyConfig, initialize
+from litellm.proxy.proxy_server import app
+
+from fastapi import FastAPI, Request, Depends, status, Response
+from fastapi.responses import JSONResponse
+
+from starlette.middleware.base import BaseHTTPMiddleware, RequestResponseEndpoint
+from starlette.responses import StreamingResponse
+import json
+
+from utils.utils import get_http_authorization_cred, get_current_user
+from config import ENV
+
+
+from config import (
+ MODEL_FILTER_ENABLED,
+ MODEL_FILTER_LIST,
+)
+
+
+proxy_config = ProxyConfig()
+
+
+async def config():
+ router, model_list, general_settings = await proxy_config.load_config(
+ router=None, config_file_path="./data/litellm/config.yaml"
+ )
+
+ await initialize(config="./data/litellm/config.yaml", telemetry=False)
+
+
+async def startup():
+ await config()
+
+
+@app.on_event("startup")
+async def on_startup():
+ await startup()
+
+
+app.state.MODEL_FILTER_ENABLED = MODEL_FILTER_ENABLED
+app.state.MODEL_FILTER_LIST = MODEL_FILTER_LIST
+
+
+@app.middleware("http")
+async def auth_middleware(request: Request, call_next):
+ auth_header = request.headers.get("Authorization", "")
+ request.state.user = None
+
+ try:
+ user = get_current_user(get_http_authorization_cred(auth_header))
+ print(user)
+ request.state.user = user
+ except Exception as e:
+ return JSONResponse(status_code=400, content={"detail": str(e)})
+
+ response = await call_next(request)
+ return response
+
+
+class ModifyModelsResponseMiddleware(BaseHTTPMiddleware):
+ async def dispatch(
+ self, request: Request, call_next: RequestResponseEndpoint
+ ) -> Response:
+
+ response = await call_next(request)
+ user = request.state.user
+
+ if "/models" in request.url.path:
+ if isinstance(response, StreamingResponse):
+ # Read the content of the streaming response
+ body = b""
+ async for chunk in response.body_iterator:
+ body += chunk
+
+ data = json.loads(body.decode("utf-8"))
+
+ if app.state.MODEL_FILTER_ENABLED:
+ if user and user.role == "user":
+ data["data"] = list(
+ filter(
+ lambda model: model["id"]
+ in app.state.MODEL_FILTER_LIST,
+ data["data"],
+ )
+ )
+
+ # Modified Flag
+ data["modified"] = True
+ return JSONResponse(content=data)
+
+ return response
+
+
+app.add_middleware(ModifyModelsResponseMiddleware)
diff --git a/backend/apps/ollama/main.py b/backend/apps/ollama/main.py
index bc797f08..6f56f3cf 100644
--- a/backend/apps/ollama/main.py
+++ b/backend/apps/ollama/main.py
@@ -3,15 +3,22 @@ from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import StreamingResponse
from fastapi.concurrency import run_in_threadpool
+from pydantic import BaseModel, ConfigDict
+
+import random
import requests
import json
import uuid
-from pydantic import BaseModel
+import aiohttp
+import asyncio
from apps.web.models.users import Users
from constants import ERROR_MESSAGES
from utils.utils import decode_token, get_current_user, get_admin_user
-from config import OLLAMA_API_BASE_URL, WEBUI_AUTH
+from config import OLLAMA_BASE_URLS, MODEL_FILTER_ENABLED, MODEL_FILTER_LIST
+
+from typing import Optional, List, Union
+
app = FastAPI()
app.add_middleware(
@@ -22,27 +29,48 @@ app.add_middleware(
allow_headers=["*"],
)
-app.state.OLLAMA_API_BASE_URL = OLLAMA_API_BASE_URL
-# TARGET_SERVER_URL = OLLAMA_API_BASE_URL
+app.state.MODEL_FILTER_ENABLED = MODEL_FILTER_ENABLED
+app.state.MODEL_FILTER_LIST = MODEL_FILTER_LIST
+
+app.state.OLLAMA_BASE_URLS = OLLAMA_BASE_URLS
+app.state.MODELS = {}
REQUEST_POOL = []
-@app.get("/url")
-async def get_ollama_api_url(user=Depends(get_admin_user)):
- return {"OLLAMA_API_BASE_URL": app.state.OLLAMA_API_BASE_URL}
+# TODO: Implement a more intelligent load balancing mechanism for distributing requests among multiple backend instances.
+# Current implementation uses a simple round-robin approach (random.choice). Consider incorporating algorithms like weighted round-robin,
+# least connections, or least response time for better resource utilization and performance optimization.
+
+
+@app.middleware("http")
+async def check_url(request: Request, call_next):
+ if len(app.state.MODELS) == 0:
+ await get_all_models()
+ else:
+ pass
+
+ response = await call_next(request)
+ return response
+
+
+@app.get("/urls")
+async def get_ollama_api_urls(user=Depends(get_admin_user)):
+ return {"OLLAMA_BASE_URLS": app.state.OLLAMA_BASE_URLS}
class UrlUpdateForm(BaseModel):
- url: str
+ urls: List[str]
-@app.post("/url/update")
+@app.post("/urls/update")
async def update_ollama_api_url(form_data: UrlUpdateForm, user=Depends(get_admin_user)):
- app.state.OLLAMA_API_BASE_URL = form_data.url
- return {"OLLAMA_API_BASE_URL": app.state.OLLAMA_API_BASE_URL}
+ app.state.OLLAMA_BASE_URLS = form_data.urls
+
+ print(app.state.OLLAMA_BASE_URLS)
+ return {"OLLAMA_BASE_URLS": app.state.OLLAMA_BASE_URLS}
@app.get("/cancel/{request_id}")
@@ -55,9 +83,824 @@ async def cancel_ollama_request(request_id: str, user=Depends(get_current_user))
raise HTTPException(status_code=401, detail=ERROR_MESSAGES.ACCESS_PROHIBITED)
+async def fetch_url(url):
+ try:
+ async with aiohttp.ClientSession() as session:
+ async with session.get(url) as response:
+ return await response.json()
+ except Exception as e:
+ # Handle connection error here
+ print(f"Connection error: {e}")
+ return None
+
+
+def merge_models_lists(model_lists):
+ merged_models = {}
+
+ for idx, model_list in enumerate(model_lists):
+ if model_list is not None:
+ for model in model_list:
+ digest = model["digest"]
+ if digest not in merged_models:
+ model["urls"] = [idx]
+ merged_models[digest] = model
+ else:
+ merged_models[digest]["urls"].append(idx)
+
+ return list(merged_models.values())
+
+
+# user=Depends(get_current_user)
+
+
+async def get_all_models():
+ print("get_all_models")
+ tasks = [fetch_url(f"{url}/api/tags") for url in app.state.OLLAMA_BASE_URLS]
+ responses = await asyncio.gather(*tasks)
+
+ models = {
+ "models": merge_models_lists(
+ map(lambda response: response["models"] if response else None, responses)
+ )
+ }
+
+ app.state.MODELS = {model["model"]: model for model in models["models"]}
+
+ return models
+
+
+@app.get("/api/tags")
+@app.get("/api/tags/{url_idx}")
+async def get_ollama_tags(
+ url_idx: Optional[int] = None, user=Depends(get_current_user)
+):
+ if url_idx == None:
+ models = await get_all_models()
+
+ if app.state.MODEL_FILTER_ENABLED:
+ if user.role == "user":
+ models["models"] = list(
+ filter(
+ lambda model: model["name"] in app.state.MODEL_FILTER_LIST,
+ models["models"],
+ )
+ )
+ return models
+ return models
+ else:
+ url = app.state.OLLAMA_BASE_URLS[url_idx]
+ try:
+ r = requests.request(method="GET", url=f"{url}/api/tags")
+ r.raise_for_status()
+
+ return r.json()
+ except Exception as e:
+ print(e)
+ error_detail = "Open WebUI: Server Connection Error"
+ if r is not None:
+ try:
+ res = r.json()
+ if "error" in res:
+ error_detail = f"Ollama: {res['error']}"
+ except:
+ error_detail = f"Ollama: {e}"
+
+ raise HTTPException(
+ status_code=r.status_code if r else 500,
+ detail=error_detail,
+ )
+
+
+@app.get("/api/version")
+@app.get("/api/version/{url_idx}")
+async def get_ollama_versions(url_idx: Optional[int] = None):
+
+ if url_idx == None:
+
+ # returns lowest version
+ tasks = [fetch_url(f"{url}/api/version") for url in app.state.OLLAMA_BASE_URLS]
+ responses = await asyncio.gather(*tasks)
+ responses = list(filter(lambda x: x is not None, responses))
+
+ if len(responses) > 0:
+ lowest_version = min(
+ responses, key=lambda x: tuple(map(int, x["version"].split(".")))
+ )
+
+ return {"version": lowest_version["version"]}
+ else:
+ raise HTTPException(
+ status_code=500,
+ detail=ERROR_MESSAGES.OLLAMA_NOT_FOUND,
+ )
+ else:
+ url = app.state.OLLAMA_BASE_URLS[url_idx]
+ try:
+ r = requests.request(method="GET", url=f"{url}/api/version")
+ r.raise_for_status()
+
+ return r.json()
+ except Exception as e:
+ print(e)
+ error_detail = "Open WebUI: Server Connection Error"
+ if r is not None:
+ try:
+ res = r.json()
+ if "error" in res:
+ error_detail = f"Ollama: {res['error']}"
+ except:
+ error_detail = f"Ollama: {e}"
+
+ raise HTTPException(
+ status_code=r.status_code if r else 500,
+ detail=error_detail,
+ )
+
+
+class ModelNameForm(BaseModel):
+ name: str
+
+
+@app.post("/api/pull")
+@app.post("/api/pull/{url_idx}")
+async def pull_model(
+ form_data: ModelNameForm, url_idx: int = 0, user=Depends(get_admin_user)
+):
+ url = app.state.OLLAMA_BASE_URLS[url_idx]
+ print(url)
+
+ r = None
+
+ def get_request():
+ nonlocal url
+ nonlocal r
+ try:
+
+ def stream_content():
+ for chunk in r.iter_content(chunk_size=8192):
+ yield chunk
+
+ r = requests.request(
+ method="POST",
+ url=f"{url}/api/pull",
+ data=form_data.model_dump_json(exclude_none=True).encode(),
+ stream=True,
+ )
+
+ r.raise_for_status()
+
+ return StreamingResponse(
+ stream_content(),
+ status_code=r.status_code,
+ headers=dict(r.headers),
+ )
+ except Exception as e:
+ raise e
+
+ try:
+ return await run_in_threadpool(get_request)
+ except Exception as e:
+ print(e)
+ error_detail = "Open WebUI: Server Connection Error"
+ if r is not None:
+ try:
+ res = r.json()
+ if "error" in res:
+ error_detail = f"Ollama: {res['error']}"
+ except:
+ error_detail = f"Ollama: {e}"
+
+ raise HTTPException(
+ status_code=r.status_code if r else 500,
+ detail=error_detail,
+ )
+
+
+class PushModelForm(BaseModel):
+ name: str
+ insecure: Optional[bool] = None
+ stream: Optional[bool] = None
+
+
+@app.delete("/api/push")
+@app.delete("/api/push/{url_idx}")
+async def push_model(
+ form_data: PushModelForm,
+ url_idx: Optional[int] = None,
+ user=Depends(get_admin_user),
+):
+ if url_idx == None:
+ if form_data.name in app.state.MODELS:
+ url_idx = app.state.MODELS[form_data.name]["urls"][0]
+ else:
+ raise HTTPException(
+ status_code=400,
+ detail=ERROR_MESSAGES.MODEL_NOT_FOUND(form_data.name),
+ )
+
+ url = app.state.OLLAMA_BASE_URLS[url_idx]
+ print(url)
+
+ r = None
+
+ def get_request():
+ nonlocal url
+ nonlocal r
+ try:
+
+ def stream_content():
+ for chunk in r.iter_content(chunk_size=8192):
+ yield chunk
+
+ r = requests.request(
+ method="POST",
+ url=f"{url}/api/push",
+ data=form_data.model_dump_json(exclude_none=True).encode(),
+ )
+
+ r.raise_for_status()
+
+ return StreamingResponse(
+ stream_content(),
+ status_code=r.status_code,
+ headers=dict(r.headers),
+ )
+ except Exception as e:
+ raise e
+
+ try:
+ return await run_in_threadpool(get_request)
+ except Exception as e:
+ print(e)
+ error_detail = "Open WebUI: Server Connection Error"
+ if r is not None:
+ try:
+ res = r.json()
+ if "error" in res:
+ error_detail = f"Ollama: {res['error']}"
+ except:
+ error_detail = f"Ollama: {e}"
+
+ raise HTTPException(
+ status_code=r.status_code if r else 500,
+ detail=error_detail,
+ )
+
+
+class CreateModelForm(BaseModel):
+ name: str
+ modelfile: Optional[str] = None
+ stream: Optional[bool] = None
+ path: Optional[str] = None
+
+
+@app.post("/api/create")
+@app.post("/api/create/{url_idx}")
+async def create_model(
+ form_data: CreateModelForm, url_idx: int = 0, user=Depends(get_admin_user)
+):
+ print(form_data)
+ url = app.state.OLLAMA_BASE_URLS[url_idx]
+ print(url)
+
+ r = None
+
+ def get_request():
+ nonlocal url
+ nonlocal r
+ try:
+
+ def stream_content():
+ for chunk in r.iter_content(chunk_size=8192):
+ yield chunk
+
+ r = requests.request(
+ method="POST",
+ url=f"{url}/api/create",
+ data=form_data.model_dump_json(exclude_none=True).encode(),
+ stream=True,
+ )
+
+ r.raise_for_status()
+
+ print(r)
+
+ return StreamingResponse(
+ stream_content(),
+ status_code=r.status_code,
+ headers=dict(r.headers),
+ )
+ except Exception as e:
+ raise e
+
+ try:
+ return await run_in_threadpool(get_request)
+ except Exception as e:
+ print(e)
+ error_detail = "Open WebUI: Server Connection Error"
+ if r is not None:
+ try:
+ res = r.json()
+ if "error" in res:
+ error_detail = f"Ollama: {res['error']}"
+ except:
+ error_detail = f"Ollama: {e}"
+
+ raise HTTPException(
+ status_code=r.status_code if r else 500,
+ detail=error_detail,
+ )
+
+
+class CopyModelForm(BaseModel):
+ source: str
+ destination: str
+
+
+@app.post("/api/copy")
+@app.post("/api/copy/{url_idx}")
+async def copy_model(
+ form_data: CopyModelForm,
+ url_idx: Optional[int] = None,
+ user=Depends(get_admin_user),
+):
+ if url_idx == None:
+ if form_data.source in app.state.MODELS:
+ url_idx = app.state.MODELS[form_data.source]["urls"][0]
+ else:
+ raise HTTPException(
+ status_code=400,
+ detail=ERROR_MESSAGES.MODEL_NOT_FOUND(form_data.source),
+ )
+
+ url = app.state.OLLAMA_BASE_URLS[url_idx]
+ print(url)
+
+ try:
+ r = requests.request(
+ method="POST",
+ url=f"{url}/api/copy",
+ data=form_data.model_dump_json(exclude_none=True).encode(),
+ )
+ r.raise_for_status()
+
+ print(r.text)
+
+ return True
+ except Exception as e:
+ print(e)
+ error_detail = "Open WebUI: Server Connection Error"
+ if r is not None:
+ try:
+ res = r.json()
+ if "error" in res:
+ error_detail = f"Ollama: {res['error']}"
+ except:
+ error_detail = f"Ollama: {e}"
+
+ raise HTTPException(
+ status_code=r.status_code if r else 500,
+ detail=error_detail,
+ )
+
+
+@app.delete("/api/delete")
+@app.delete("/api/delete/{url_idx}")
+async def delete_model(
+ form_data: ModelNameForm,
+ url_idx: Optional[int] = None,
+ user=Depends(get_admin_user),
+):
+ if url_idx == None:
+ if form_data.name in app.state.MODELS:
+ url_idx = app.state.MODELS[form_data.name]["urls"][0]
+ else:
+ raise HTTPException(
+ status_code=400,
+ detail=ERROR_MESSAGES.MODEL_NOT_FOUND(form_data.name),
+ )
+
+ url = app.state.OLLAMA_BASE_URLS[url_idx]
+ print(url)
+
+ try:
+ r = requests.request(
+ method="DELETE",
+ url=f"{url}/api/delete",
+ data=form_data.model_dump_json(exclude_none=True).encode(),
+ )
+ r.raise_for_status()
+
+ print(r.text)
+
+ return True
+ except Exception as e:
+ print(e)
+ error_detail = "Open WebUI: Server Connection Error"
+ if r is not None:
+ try:
+ res = r.json()
+ if "error" in res:
+ error_detail = f"Ollama: {res['error']}"
+ except:
+ error_detail = f"Ollama: {e}"
+
+ raise HTTPException(
+ status_code=r.status_code if r else 500,
+ detail=error_detail,
+ )
+
+
+@app.post("/api/show")
+async def show_model_info(form_data: ModelNameForm, user=Depends(get_current_user)):
+ if form_data.name not in app.state.MODELS:
+ raise HTTPException(
+ status_code=400,
+ detail=ERROR_MESSAGES.MODEL_NOT_FOUND(form_data.name),
+ )
+
+ url_idx = random.choice(app.state.MODELS[form_data.name]["urls"])
+ url = app.state.OLLAMA_BASE_URLS[url_idx]
+ print(url)
+
+ try:
+ r = requests.request(
+ method="POST",
+ url=f"{url}/api/show",
+ data=form_data.model_dump_json(exclude_none=True).encode(),
+ )
+ r.raise_for_status()
+
+ return r.json()
+ except Exception as e:
+ print(e)
+ error_detail = "Open WebUI: Server Connection Error"
+ if r is not None:
+ try:
+ res = r.json()
+ if "error" in res:
+ error_detail = f"Ollama: {res['error']}"
+ except:
+ error_detail = f"Ollama: {e}"
+
+ raise HTTPException(
+ status_code=r.status_code if r else 500,
+ detail=error_detail,
+ )
+
+
+class GenerateEmbeddingsForm(BaseModel):
+ model: str
+ prompt: str
+ options: Optional[dict] = None
+ keep_alive: Optional[Union[int, str]] = None
+
+
+@app.post("/api/embeddings")
+@app.post("/api/embeddings/{url_idx}")
+async def generate_embeddings(
+ form_data: GenerateEmbeddingsForm,
+ url_idx: Optional[int] = None,
+ user=Depends(get_current_user),
+):
+ if url_idx == None:
+ if form_data.model in app.state.MODELS:
+ url_idx = random.choice(app.state.MODELS[form_data.model]["urls"])
+ else:
+ raise HTTPException(
+ status_code=400,
+ detail=ERROR_MESSAGES.MODEL_NOT_FOUND(form_data.model),
+ )
+
+ url = app.state.OLLAMA_BASE_URLS[url_idx]
+ print(url)
+
+ try:
+ r = requests.request(
+ method="POST",
+ url=f"{url}/api/embeddings",
+ data=form_data.model_dump_json(exclude_none=True).encode(),
+ )
+ r.raise_for_status()
+
+ return r.json()
+ except Exception as e:
+ print(e)
+ error_detail = "Open WebUI: Server Connection Error"
+ if r is not None:
+ try:
+ res = r.json()
+ if "error" in res:
+ error_detail = f"Ollama: {res['error']}"
+ except:
+ error_detail = f"Ollama: {e}"
+
+ raise HTTPException(
+ status_code=r.status_code if r else 500,
+ detail=error_detail,
+ )
+
+
+class GenerateCompletionForm(BaseModel):
+ model: str
+ prompt: str
+ images: Optional[List[str]] = None
+ format: Optional[str] = None
+ options: Optional[dict] = None
+ system: Optional[str] = None
+ template: Optional[str] = None
+ context: Optional[str] = None
+ stream: Optional[bool] = True
+ raw: Optional[bool] = None
+ keep_alive: Optional[Union[int, str]] = None
+
+
+@app.post("/api/generate")
+@app.post("/api/generate/{url_idx}")
+async def generate_completion(
+ form_data: GenerateCompletionForm,
+ url_idx: Optional[int] = None,
+ user=Depends(get_current_user),
+):
+
+ if url_idx == None:
+ if form_data.model in app.state.MODELS:
+ url_idx = random.choice(app.state.MODELS[form_data.model]["urls"])
+ else:
+ raise HTTPException(
+ status_code=400,
+ detail="error_detail",
+ )
+
+ url = app.state.OLLAMA_BASE_URLS[url_idx]
+ print(url)
+
+ r = None
+
+ def get_request():
+ nonlocal form_data
+ nonlocal r
+
+ request_id = str(uuid.uuid4())
+ try:
+ REQUEST_POOL.append(request_id)
+
+ def stream_content():
+ try:
+ if form_data.stream:
+ yield json.dumps({"id": request_id, "done": False}) + "\n"
+
+ for chunk in r.iter_content(chunk_size=8192):
+ if request_id in REQUEST_POOL:
+ yield chunk
+ else:
+ print("User: canceled request")
+ break
+ finally:
+ if hasattr(r, "close"):
+ r.close()
+ if request_id in REQUEST_POOL:
+ REQUEST_POOL.remove(request_id)
+
+ r = requests.request(
+ method="POST",
+ url=f"{url}/api/generate",
+ data=form_data.model_dump_json(exclude_none=True).encode(),
+ stream=True,
+ )
+
+ r.raise_for_status()
+
+ return StreamingResponse(
+ stream_content(),
+ status_code=r.status_code,
+ headers=dict(r.headers),
+ )
+ except Exception as e:
+ raise e
+
+ try:
+ return await run_in_threadpool(get_request)
+ except Exception as e:
+ error_detail = "Open WebUI: Server Connection Error"
+ if r is not None:
+ try:
+ res = r.json()
+ if "error" in res:
+ error_detail = f"Ollama: {res['error']}"
+ except:
+ error_detail = f"Ollama: {e}"
+
+ raise HTTPException(
+ status_code=r.status_code if r else 500,
+ detail=error_detail,
+ )
+
+
+class ChatMessage(BaseModel):
+ role: str
+ content: str
+ images: Optional[List[str]] = None
+
+
+class GenerateChatCompletionForm(BaseModel):
+ model: str
+ messages: List[ChatMessage]
+ format: Optional[str] = None
+ options: Optional[dict] = None
+ template: Optional[str] = None
+ stream: Optional[bool] = None
+ keep_alive: Optional[Union[int, str]] = None
+
+
+@app.post("/api/chat")
+@app.post("/api/chat/{url_idx}")
+async def generate_chat_completion(
+ form_data: GenerateChatCompletionForm,
+ url_idx: Optional[int] = None,
+ user=Depends(get_current_user),
+):
+
+ if url_idx == None:
+ if form_data.model in app.state.MODELS:
+ url_idx = random.choice(app.state.MODELS[form_data.model]["urls"])
+ else:
+ raise HTTPException(
+ status_code=400,
+ detail=ERROR_MESSAGES.MODEL_NOT_FOUND(form_data.model),
+ )
+
+ url = app.state.OLLAMA_BASE_URLS[url_idx]
+ print(url)
+
+ r = None
+
+ print(form_data.model_dump_json(exclude_none=True).encode())
+
+ def get_request():
+ nonlocal form_data
+ nonlocal r
+
+ request_id = str(uuid.uuid4())
+ try:
+ REQUEST_POOL.append(request_id)
+
+ def stream_content():
+ try:
+ if form_data.stream:
+ yield json.dumps({"id": request_id, "done": False}) + "\n"
+
+ for chunk in r.iter_content(chunk_size=8192):
+ if request_id in REQUEST_POOL:
+ yield chunk
+ else:
+ print("User: canceled request")
+ break
+ finally:
+ if hasattr(r, "close"):
+ r.close()
+ if request_id in REQUEST_POOL:
+ REQUEST_POOL.remove(request_id)
+
+ r = requests.request(
+ method="POST",
+ url=f"{url}/api/chat",
+ data=form_data.model_dump_json(exclude_none=True).encode(),
+ stream=True,
+ )
+
+ r.raise_for_status()
+
+ return StreamingResponse(
+ stream_content(),
+ status_code=r.status_code,
+ headers=dict(r.headers),
+ )
+ except Exception as e:
+ print(e)
+ raise e
+
+ try:
+ return await run_in_threadpool(get_request)
+ except Exception as e:
+ error_detail = "Open WebUI: Server Connection Error"
+ if r is not None:
+ try:
+ res = r.json()
+ if "error" in res:
+ error_detail = f"Ollama: {res['error']}"
+ except:
+ error_detail = f"Ollama: {e}"
+
+ raise HTTPException(
+ status_code=r.status_code if r else 500,
+ detail=error_detail,
+ )
+
+
+# TODO: we should update this part once Ollama supports other types
+class OpenAIChatMessage(BaseModel):
+ role: str
+ content: str
+
+ model_config = ConfigDict(extra="allow")
+
+
+class OpenAIChatCompletionForm(BaseModel):
+ model: str
+ messages: List[OpenAIChatMessage]
+
+ model_config = ConfigDict(extra="allow")
+
+
+@app.post("/v1/chat/completions")
+@app.post("/v1/chat/completions/{url_idx}")
+async def generate_openai_chat_completion(
+ form_data: OpenAIChatCompletionForm,
+ url_idx: Optional[int] = None,
+ user=Depends(get_current_user),
+):
+
+ if url_idx == None:
+ if form_data.model in app.state.MODELS:
+ url_idx = random.choice(app.state.MODELS[form_data.model]["urls"])
+ else:
+ raise HTTPException(
+ status_code=400,
+ detail=ERROR_MESSAGES.MODEL_NOT_FOUND(form_data.model),
+ )
+
+ url = app.state.OLLAMA_BASE_URLS[url_idx]
+ print(url)
+
+ r = None
+
+ def get_request():
+ nonlocal form_data
+ nonlocal r
+
+ request_id = str(uuid.uuid4())
+ try:
+ REQUEST_POOL.append(request_id)
+
+ def stream_content():
+ try:
+ if form_data.stream:
+ yield json.dumps(
+ {"request_id": request_id, "done": False}
+ ) + "\n"
+
+ for chunk in r.iter_content(chunk_size=8192):
+ if request_id in REQUEST_POOL:
+ yield chunk
+ else:
+ print("User: canceled request")
+ break
+ finally:
+ if hasattr(r, "close"):
+ r.close()
+ if request_id in REQUEST_POOL:
+ REQUEST_POOL.remove(request_id)
+
+ r = requests.request(
+ method="POST",
+ url=f"{url}/v1/chat/completions",
+ data=form_data.model_dump_json(exclude_none=True).encode(),
+ stream=True,
+ )
+
+ r.raise_for_status()
+
+ return StreamingResponse(
+ stream_content(),
+ status_code=r.status_code,
+ headers=dict(r.headers),
+ )
+ except Exception as e:
+ raise e
+
+ try:
+ return await run_in_threadpool(get_request)
+ except Exception as e:
+ error_detail = "Open WebUI: Server Connection Error"
+ if r is not None:
+ try:
+ res = r.json()
+ if "error" in res:
+ error_detail = f"Ollama: {res['error']}"
+ except:
+ error_detail = f"Ollama: {e}"
+
+ raise HTTPException(
+ status_code=r.status_code if r else 500,
+ detail=error_detail,
+ )
+
+
@app.api_route("/{path:path}", methods=["GET", "POST", "PUT", "DELETE"])
-async def proxy(path: str, request: Request, user=Depends(get_current_user)):
- target_url = f"{app.state.OLLAMA_API_BASE_URL}/{path}"
+async def deprecated_proxy(path: str, request: Request, user=Depends(get_current_user)):
+ url = app.state.OLLAMA_BASE_URLS[0]
+ target_url = f"{url}/{path}"
body = await request.body()
headers = dict(request.headers)
@@ -91,7 +934,13 @@ async def proxy(path: str, request: Request, user=Depends(get_current_user)):
def stream_content():
try:
- if path in ["chat"]:
+ if path == "generate":
+ data = json.loads(body.decode("utf-8"))
+
+ if not ("stream" in data and data["stream"] == False):
+ yield json.dumps({"id": request_id, "done": False}) + "\n"
+
+ elif path == "chat":
yield json.dumps({"id": request_id, "done": False}) + "\n"
for chunk in r.iter_content(chunk_size=8192):
@@ -103,7 +952,8 @@ async def proxy(path: str, request: Request, user=Depends(get_current_user)):
finally:
if hasattr(r, "close"):
r.close()
- REQUEST_POOL.remove(request_id)
+ if request_id in REQUEST_POOL:
+ REQUEST_POOL.remove(request_id)
r = requests.request(
method=request.method,
diff --git a/backend/apps/ollama/old_main.py b/backend/apps/ollama/old_main.py
deleted file mode 100644
index 5e5b8811..00000000
--- a/backend/apps/ollama/old_main.py
+++ /dev/null
@@ -1,127 +0,0 @@
-from fastapi import FastAPI, Request, Response, HTTPException, Depends
-from fastapi.middleware.cors import CORSMiddleware
-from fastapi.responses import StreamingResponse
-
-import requests
-import json
-from pydantic import BaseModel
-
-from apps.web.models.users import Users
-from constants import ERROR_MESSAGES
-from utils.utils import decode_token, get_current_user
-from config import OLLAMA_API_BASE_URL, WEBUI_AUTH
-
-import aiohttp
-
-app = FastAPI()
-app.add_middleware(
- CORSMiddleware,
- allow_origins=["*"],
- allow_credentials=True,
- allow_methods=["*"],
- allow_headers=["*"],
-)
-
-app.state.OLLAMA_API_BASE_URL = OLLAMA_API_BASE_URL
-
-# TARGET_SERVER_URL = OLLAMA_API_BASE_URL
-
-
-@app.get("/url")
-async def get_ollama_api_url(user=Depends(get_current_user)):
- if user and user.role == "admin":
- return {"OLLAMA_API_BASE_URL": app.state.OLLAMA_API_BASE_URL}
- else:
- raise HTTPException(status_code=401, detail=ERROR_MESSAGES.ACCESS_PROHIBITED)
-
-
-class UrlUpdateForm(BaseModel):
- url: str
-
-
-@app.post("/url/update")
-async def update_ollama_api_url(
- form_data: UrlUpdateForm, user=Depends(get_current_user)
-):
- if user and user.role == "admin":
- app.state.OLLAMA_API_BASE_URL = form_data.url
- return {"OLLAMA_API_BASE_URL": app.state.OLLAMA_API_BASE_URL}
- else:
- raise HTTPException(status_code=401, detail=ERROR_MESSAGES.ACCESS_PROHIBITED)
-
-
-# async def fetch_sse(method, target_url, body, headers):
-# async with aiohttp.ClientSession() as session:
-# try:
-# async with session.request(
-# method, target_url, data=body, headers=headers
-# ) as response:
-# print(response.status)
-# async for line in response.content:
-# yield line
-# except Exception as e:
-# print(e)
-# error_detail = "Open WebUI: Server Connection Error"
-# yield json.dumps({"error": error_detail, "message": str(e)}).encode()
-
-
-@app.api_route("/{path:path}", methods=["GET", "POST", "PUT", "DELETE"])
-async def proxy(path: str, request: Request, user=Depends(get_current_user)):
- target_url = f"{app.state.OLLAMA_API_BASE_URL}/{path}"
- print(target_url)
-
- body = await request.body()
- headers = dict(request.headers)
-
- if user.role in ["user", "admin"]:
- if path in ["pull", "delete", "push", "copy", "create"]:
- if user.role != "admin":
- raise HTTPException(
- status_code=401, detail=ERROR_MESSAGES.ACCESS_PROHIBITED
- )
- else:
- raise HTTPException(status_code=401, detail=ERROR_MESSAGES.ACCESS_PROHIBITED)
-
- headers.pop("Host", None)
- headers.pop("Authorization", None)
- headers.pop("Origin", None)
- headers.pop("Referer", None)
-
- session = aiohttp.ClientSession()
- response = None
- try:
- response = await session.request(
- request.method, target_url, data=body, headers=headers
- )
-
- print(response)
- if not response.ok:
- data = await response.json()
- print(data)
- response.raise_for_status()
-
- async def generate():
- async for line in response.content:
- print(line)
- yield line
- await session.close()
-
- return StreamingResponse(generate(), response.status)
-
- except Exception as e:
- print(e)
- error_detail = "Open WebUI: Server Connection Error"
-
- if response is not None:
- try:
- res = await response.json()
- if "error" in res:
- error_detail = f"Ollama: {res['error']}"
- except:
- error_detail = f"Ollama: {e}"
-
- await session.close()
- raise HTTPException(
- status_code=response.status if response else 500,
- detail=error_detail,
- )
diff --git a/backend/apps/openai/main.py b/backend/apps/openai/main.py
index 36326430..67a99794 100644
--- a/backend/apps/openai/main.py
+++ b/backend/apps/openai/main.py
@@ -3,7 +3,10 @@ from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import StreamingResponse, JSONResponse, FileResponse
import requests
+import aiohttp
+import asyncio
import json
+
from pydantic import BaseModel
@@ -15,7 +18,15 @@ from utils.utils import (
get_verified_user,
get_admin_user,
)
-from config import OPENAI_API_BASE_URL, OPENAI_API_KEY, CACHE_DIR
+from config import (
+ OPENAI_API_BASE_URLS,
+ OPENAI_API_KEYS,
+ CACHE_DIR,
+ MODEL_FILTER_ENABLED,
+ MODEL_FILTER_LIST,
+)
+from typing import List, Optional
+
import hashlib
from pathlib import Path
@@ -29,116 +40,241 @@ app.add_middleware(
allow_headers=["*"],
)
-app.state.OPENAI_API_BASE_URL = OPENAI_API_BASE_URL
-app.state.OPENAI_API_KEY = OPENAI_API_KEY
+app.state.MODEL_FILTER_ENABLED = MODEL_FILTER_ENABLED
+app.state.MODEL_FILTER_LIST = MODEL_FILTER_LIST
+
+app.state.OPENAI_API_BASE_URLS = OPENAI_API_BASE_URLS
+app.state.OPENAI_API_KEYS = OPENAI_API_KEYS
+
+app.state.MODELS = {}
-class UrlUpdateForm(BaseModel):
- url: str
+@app.middleware("http")
+async def check_url(request: Request, call_next):
+ if len(app.state.MODELS) == 0:
+ await get_all_models()
+ else:
+ pass
+
+ response = await call_next(request)
+ return response
-class KeyUpdateForm(BaseModel):
- key: str
+class UrlsUpdateForm(BaseModel):
+ urls: List[str]
-@app.get("/url")
-async def get_openai_url(user=Depends(get_admin_user)):
- return {"OPENAI_API_BASE_URL": app.state.OPENAI_API_BASE_URL}
+class KeysUpdateForm(BaseModel):
+ keys: List[str]
-@app.post("/url/update")
-async def update_openai_url(form_data: UrlUpdateForm, user=Depends(get_admin_user)):
- app.state.OPENAI_API_BASE_URL = form_data.url
- return {"OPENAI_API_BASE_URL": app.state.OPENAI_API_BASE_URL}
+@app.get("/urls")
+async def get_openai_urls(user=Depends(get_admin_user)):
+ return {"OPENAI_API_BASE_URLS": app.state.OPENAI_API_BASE_URLS}
-@app.get("/key")
-async def get_openai_key(user=Depends(get_admin_user)):
- return {"OPENAI_API_KEY": app.state.OPENAI_API_KEY}
+@app.post("/urls/update")
+async def update_openai_urls(form_data: UrlsUpdateForm, user=Depends(get_admin_user)):
+ app.state.OPENAI_API_BASE_URLS = form_data.urls
+ return {"OPENAI_API_BASE_URLS": app.state.OPENAI_API_BASE_URLS}
-@app.post("/key/update")
-async def update_openai_key(form_data: KeyUpdateForm, user=Depends(get_admin_user)):
- app.state.OPENAI_API_KEY = form_data.key
- return {"OPENAI_API_KEY": app.state.OPENAI_API_KEY}
+@app.get("/keys")
+async def get_openai_keys(user=Depends(get_admin_user)):
+ return {"OPENAI_API_KEYS": app.state.OPENAI_API_KEYS}
+
+
+@app.post("/keys/update")
+async def update_openai_key(form_data: KeysUpdateForm, user=Depends(get_admin_user)):
+ app.state.OPENAI_API_KEYS = form_data.keys
+ return {"OPENAI_API_KEYS": app.state.OPENAI_API_KEYS}
@app.post("/audio/speech")
async def speech(request: Request, user=Depends(get_verified_user)):
- target_url = f"{app.state.OPENAI_API_BASE_URL}/audio/speech"
-
- if app.state.OPENAI_API_KEY == "":
- raise HTTPException(status_code=401, detail=ERROR_MESSAGES.API_KEY_NOT_FOUND)
-
- body = await request.body()
-
- name = hashlib.sha256(body).hexdigest()
-
- SPEECH_CACHE_DIR = Path(CACHE_DIR).joinpath("./audio/speech/")
- SPEECH_CACHE_DIR.mkdir(parents=True, exist_ok=True)
- file_path = SPEECH_CACHE_DIR.joinpath(f"{name}.mp3")
- file_body_path = SPEECH_CACHE_DIR.joinpath(f"{name}.json")
-
- # Check if the file already exists in the cache
- if file_path.is_file():
- return FileResponse(file_path)
-
- headers = {}
- headers["Authorization"] = f"Bearer {app.state.OPENAI_API_KEY}"
- headers["Content-Type"] = "application/json"
-
+ idx = None
try:
- print("openai")
- r = requests.post(
- url=target_url,
- data=body,
- headers=headers,
- stream=True,
- )
+ idx = app.state.OPENAI_API_BASE_URLS.index("https://api.openai.com/v1")
+ body = await request.body()
+ name = hashlib.sha256(body).hexdigest()
- r.raise_for_status()
+ SPEECH_CACHE_DIR = Path(CACHE_DIR).joinpath("./audio/speech/")
+ SPEECH_CACHE_DIR.mkdir(parents=True, exist_ok=True)
+ file_path = SPEECH_CACHE_DIR.joinpath(f"{name}.mp3")
+ file_body_path = SPEECH_CACHE_DIR.joinpath(f"{name}.json")
- # Save the streaming content to a file
- with open(file_path, "wb") as f:
- for chunk in r.iter_content(chunk_size=8192):
- f.write(chunk)
+ # Check if the file already exists in the cache
+ if file_path.is_file():
+ return FileResponse(file_path)
- with open(file_body_path, "w") as f:
- json.dump(json.loads(body.decode("utf-8")), f)
+ headers = {}
+ headers["Authorization"] = f"Bearer {app.state.OPENAI_API_KEYS[idx]}"
+ headers["Content-Type"] = "application/json"
- # Return the saved file
- return FileResponse(file_path)
+ r = None
+ try:
+ r = requests.post(
+ url=f"{app.state.OPENAI_API_BASE_URLS[idx]}/audio/speech",
+ data=body,
+ headers=headers,
+ stream=True,
+ )
+ r.raise_for_status()
+
+ # Save the streaming content to a file
+ with open(file_path, "wb") as f:
+ for chunk in r.iter_content(chunk_size=8192):
+ f.write(chunk)
+
+ with open(file_body_path, "w") as f:
+ json.dump(json.loads(body.decode("utf-8")), f)
+
+ # Return the saved file
+ return FileResponse(file_path)
+
+ except Exception as e:
+ print(e)
+ error_detail = "Open WebUI: Server Connection Error"
+ if r is not None:
+ try:
+ res = r.json()
+ if "error" in res:
+ error_detail = f"External: {res['error']}"
+ except:
+ error_detail = f"External: {e}"
+
+ raise HTTPException(
+ status_code=r.status_code if r else 500, detail=error_detail
+ )
+
+ except ValueError:
+ raise HTTPException(status_code=401, detail=ERROR_MESSAGES.OPENAI_NOT_FOUND)
+
+
+async def fetch_url(url, key):
+ try:
+ headers = {"Authorization": f"Bearer {key}"}
+ async with aiohttp.ClientSession() as session:
+ async with session.get(url, headers=headers) as response:
+ return await response.json()
except Exception as e:
- print(e)
- error_detail = "Open WebUI: Server Connection Error"
- if r is not None:
- try:
- res = r.json()
- if "error" in res:
- error_detail = f"External: {res['error']}"
- except:
- error_detail = f"External: {e}"
+ # Handle connection error here
+ print(f"Connection error: {e}")
+ return None
- raise HTTPException(status_code=r.status_code, detail=error_detail)
+
+def merge_models_lists(model_lists):
+ merged_list = []
+
+ for idx, models in enumerate(model_lists):
+ if models is not None and "error" not in models:
+ merged_list.extend(
+ [
+ {**model, "urlIdx": idx}
+ for model in models
+ if "api.openai.com" not in app.state.OPENAI_API_BASE_URLS[idx]
+ or "gpt" in model["id"]
+ ]
+ )
+
+ return merged_list
+
+
+async def get_all_models():
+ print("get_all_models")
+
+ if len(app.state.OPENAI_API_KEYS) == 1 and app.state.OPENAI_API_KEYS[0] == "":
+ models = {"data": []}
+ else:
+ tasks = [
+ fetch_url(f"{url}/models", app.state.OPENAI_API_KEYS[idx])
+ for idx, url in enumerate(app.state.OPENAI_API_BASE_URLS)
+ ]
+
+ responses = await asyncio.gather(*tasks)
+ models = {
+ "data": merge_models_lists(
+ list(
+ map(
+ lambda response: (
+ response["data"]
+ if response and "data" in response
+ else None
+ ),
+ responses,
+ )
+ )
+ )
+ }
+
+ print(models)
+ app.state.MODELS = {model["id"]: model for model in models["data"]}
+
+ return models
+
+
+@app.get("/models")
+@app.get("/models/{url_idx}")
+async def get_models(url_idx: Optional[int] = None, user=Depends(get_current_user)):
+ if url_idx == None:
+ models = await get_all_models()
+ if app.state.MODEL_FILTER_ENABLED:
+ if user.role == "user":
+ models["data"] = list(
+ filter(
+ lambda model: model["id"] in app.state.MODEL_FILTER_LIST,
+ models["data"],
+ )
+ )
+ return models
+ return models
+ else:
+ url = app.state.OPENAI_API_BASE_URLS[url_idx]
+
+ r = None
+
+ try:
+ r = requests.request(method="GET", url=f"{url}/models")
+ r.raise_for_status()
+
+ response_data = r.json()
+ if "api.openai.com" in url:
+ response_data["data"] = list(
+ filter(lambda model: "gpt" in model["id"], response_data["data"])
+ )
+
+ return response_data
+ except Exception as e:
+ print(e)
+ error_detail = "Open WebUI: Server Connection Error"
+ if r is not None:
+ try:
+ res = r.json()
+ if "error" in res:
+ error_detail = f"External: {res['error']}"
+ except:
+ error_detail = f"External: {e}"
+
+ raise HTTPException(
+ status_code=r.status_code if r else 500,
+ detail=error_detail,
+ )
@app.api_route("/{path:path}", methods=["GET", "POST", "PUT", "DELETE"])
async def proxy(path: str, request: Request, user=Depends(get_verified_user)):
- target_url = f"{app.state.OPENAI_API_BASE_URL}/{path}"
- print(target_url, app.state.OPENAI_API_KEY)
-
- if app.state.OPENAI_API_KEY == "":
- raise HTTPException(status_code=401, detail=ERROR_MESSAGES.API_KEY_NOT_FOUND)
+ idx = 0
body = await request.body()
-
# TODO: Remove below after gpt-4-vision fix from Open AI
# Try to decode the body of the request from bytes to a UTF-8 string (Require add max_token to fix gpt-4-vision)
try:
body = body.decode("utf-8")
body = json.loads(body)
+ idx = app.state.MODELS[body.get("model")]["urlIdx"]
+
# Check if the model is "gpt-4-vision-preview" and set "max_tokens" to 4000
# This is a workaround until OpenAI fixes the issue with this model
if body.get("model") == "gpt-4-vision-preview":
@@ -146,15 +282,32 @@ async def proxy(path: str, request: Request, user=Depends(get_verified_user)):
body["max_tokens"] = 4000
print("Modified body_dict:", body)
+ # Fix for ChatGPT calls failing because the num_ctx key is in body
+ if "num_ctx" in body:
+ # If 'num_ctx' is in the dictionary, delete it
+ # Leaving it there generates an error with the
+ # OpenAI API (Feb 2024)
+ del body["num_ctx"]
+
# Convert the modified body back to JSON
body = json.dumps(body)
except json.JSONDecodeError as e:
print("Error loading request body into a dictionary:", e)
+ url = app.state.OPENAI_API_BASE_URLS[idx]
+ key = app.state.OPENAI_API_KEYS[idx]
+
+ target_url = f"{url}/{path}"
+
+ if key == "":
+ raise HTTPException(status_code=401, detail=ERROR_MESSAGES.API_KEY_NOT_FOUND)
+
headers = {}
- headers["Authorization"] = f"Bearer {app.state.OPENAI_API_KEY}"
+ headers["Authorization"] = f"Bearer {key}"
headers["Content-Type"] = "application/json"
+ r = None
+
try:
r = requests.request(
method=request.method,
@@ -174,21 +327,7 @@ async def proxy(path: str, request: Request, user=Depends(get_verified_user)):
headers=dict(r.headers),
)
else:
- # For non-SSE, read the response and return it
- # response_data = (
- # r.json()
- # if r.headers.get("Content-Type", "")
- # == "application/json"
- # else r.text
- # )
-
response_data = r.json()
-
- if "openai" in app.state.OPENAI_API_BASE_URL and path == "models":
- response_data["data"] = list(
- filter(lambda model: "gpt" in model["id"], response_data["data"])
- )
-
return response_data
except Exception as e:
print(e)
@@ -201,4 +340,6 @@ async def proxy(path: str, request: Request, user=Depends(get_verified_user)):
except:
error_detail = f"External: {e}"
- raise HTTPException(status_code=r.status_code, detail=error_detail)
+ raise HTTPException(
+ status_code=r.status_code if r else 500, detail=error_detail
+ )
diff --git a/backend/apps/rag/main.py b/backend/apps/rag/main.py
index 07a30ade..5fc38b4a 100644
--- a/backend/apps/rag/main.py
+++ b/backend/apps/rag/main.py
@@ -1,6 +1,5 @@
from fastapi import (
FastAPI,
- Request,
Depends,
HTTPException,
status,
@@ -10,9 +9,12 @@ from fastapi import (
)
from fastapi.middleware.cors import CORSMiddleware
import os, shutil
+
+from pathlib import Path
from typing import List
-# from chromadb.utils import embedding_functions
+from sentence_transformers import SentenceTransformer
+from chromadb.utils import embedding_functions
from langchain_community.document_loaders import (
WebBaseLoader,
@@ -28,27 +30,68 @@ from langchain_community.document_loaders import (
UnstructuredExcelLoader,
)
from langchain.text_splitter import RecursiveCharacterTextSplitter
-from langchain_community.vectorstores import Chroma
-from langchain.chains import RetrievalQA
-
from pydantic import BaseModel
from typing import Optional
-
+import mimetypes
import uuid
-import time
+import json
-from utils.misc import calculate_sha256, calculate_sha256_string
+
+from apps.web.models.documents import (
+ Documents,
+ DocumentForm,
+ DocumentResponse,
+)
+
+from apps.rag.utils import query_doc, query_collection
+
+from utils.misc import (
+ calculate_sha256,
+ calculate_sha256_string,
+ sanitize_filename,
+ extract_folders_after_data_docs,
+)
from utils.utils import get_current_user, get_admin_user
-from config import UPLOAD_DIR, EMBED_MODEL, CHROMA_CLIENT, CHUNK_SIZE, CHUNK_OVERLAP
+from config import (
+ UPLOAD_DIR,
+ DOCS_DIR,
+ RAG_EMBEDDING_MODEL,
+ RAG_EMBEDDING_MODEL_DEVICE_TYPE,
+ CHROMA_CLIENT,
+ CHUNK_SIZE,
+ CHUNK_OVERLAP,
+ RAG_TEMPLATE,
+)
+
from constants import ERROR_MESSAGES
-# EMBEDDING_FUNC = embedding_functions.SentenceTransformerEmbeddingFunction(
-# model_name=EMBED_MODEL
-# )
+#
+# if RAG_EMBEDDING_MODEL:
+# sentence_transformer_ef = SentenceTransformer(
+# model_name_or_path=RAG_EMBEDDING_MODEL,
+# cache_folder=RAG_EMBEDDING_MODEL_DIR,
+# device=RAG_EMBEDDING_MODEL_DEVICE_TYPE,
+# )
+
app = FastAPI()
+app.state.PDF_EXTRACT_IMAGES = False
+app.state.CHUNK_SIZE = CHUNK_SIZE
+app.state.CHUNK_OVERLAP = CHUNK_OVERLAP
+app.state.RAG_TEMPLATE = RAG_TEMPLATE
+app.state.RAG_EMBEDDING_MODEL = RAG_EMBEDDING_MODEL
+app.state.TOP_K = 4
+
+app.state.sentence_transformer_ef = (
+ embedding_functions.SentenceTransformerEmbeddingFunction(
+ model_name=app.state.RAG_EMBEDDING_MODEL,
+ device=RAG_EMBEDDING_MODEL_DEVICE_TYPE,
+ )
+)
+
+
origins = ["*"]
app.add_middleware(
@@ -68,9 +111,9 @@ class StoreWebForm(CollectionNameForm):
url: str
-def store_data_in_vector_db(data, collection_name) -> bool:
+def store_data_in_vector_db(data, collection_name, overwrite: bool = False) -> bool:
text_splitter = RecursiveCharacterTextSplitter(
- chunk_size=CHUNK_SIZE, chunk_overlap=CHUNK_OVERLAP
+ chunk_size=app.state.CHUNK_SIZE, chunk_overlap=app.state.CHUNK_OVERLAP
)
docs = text_splitter.split_documents(data)
@@ -78,7 +121,16 @@ def store_data_in_vector_db(data, collection_name) -> bool:
metadatas = [doc.metadata for doc in docs]
try:
- collection = CHROMA_CLIENT.create_collection(name=collection_name)
+ if overwrite:
+ for collection in CHROMA_CLIENT.list_collections():
+ if collection_name == collection.name:
+ print(f"deleting existing collection {collection_name}")
+ CHROMA_CLIENT.delete_collection(name=collection_name)
+
+ collection = CHROMA_CLIENT.create_collection(
+ name=collection_name,
+ embedding_function=app.state.sentence_transformer_ef,
+ )
collection.add(
documents=texts, metadatas=metadatas, ids=[str(uuid.uuid1()) for _ in texts]
@@ -94,26 +146,133 @@ def store_data_in_vector_db(data, collection_name) -> bool:
@app.get("/")
async def get_status():
- return {"status": True}
+ return {
+ "status": True,
+ "chunk_size": app.state.CHUNK_SIZE,
+ "chunk_overlap": app.state.CHUNK_OVERLAP,
+ "template": app.state.RAG_TEMPLATE,
+ "embedding_model": app.state.RAG_EMBEDDING_MODEL,
+ }
+
+
+@app.get("/embedding/model")
+async def get_embedding_model(user=Depends(get_admin_user)):
+ return {
+ "status": True,
+ "embedding_model": app.state.RAG_EMBEDDING_MODEL,
+ }
+
+
+class EmbeddingModelUpdateForm(BaseModel):
+ embedding_model: str
+
+
+@app.post("/embedding/model/update")
+async def update_embedding_model(
+ form_data: EmbeddingModelUpdateForm, user=Depends(get_admin_user)
+):
+ app.state.RAG_EMBEDDING_MODEL = form_data.embedding_model
+ app.state.sentence_transformer_ef = (
+ embedding_functions.SentenceTransformerEmbeddingFunction(
+ model_name=app.state.RAG_EMBEDDING_MODEL,
+ device=RAG_EMBEDDING_MODEL_DEVICE_TYPE,
+ )
+ )
+
+ return {
+ "status": True,
+ "embedding_model": app.state.RAG_EMBEDDING_MODEL,
+ }
+
+
+@app.get("/config")
+async def get_rag_config(user=Depends(get_admin_user)):
+ return {
+ "status": True,
+ "pdf_extract_images": app.state.PDF_EXTRACT_IMAGES,
+ "chunk": {
+ "chunk_size": app.state.CHUNK_SIZE,
+ "chunk_overlap": app.state.CHUNK_OVERLAP,
+ },
+ }
+
+
+class ChunkParamUpdateForm(BaseModel):
+ chunk_size: int
+ chunk_overlap: int
+
+
+class ConfigUpdateForm(BaseModel):
+ pdf_extract_images: bool
+ chunk: ChunkParamUpdateForm
+
+
+@app.post("/config/update")
+async def update_rag_config(form_data: ConfigUpdateForm, user=Depends(get_admin_user)):
+ app.state.PDF_EXTRACT_IMAGES = form_data.pdf_extract_images
+ app.state.CHUNK_SIZE = form_data.chunk.chunk_size
+ app.state.CHUNK_OVERLAP = form_data.chunk.chunk_overlap
+
+ return {
+ "status": True,
+ "pdf_extract_images": app.state.PDF_EXTRACT_IMAGES,
+ "chunk": {
+ "chunk_size": app.state.CHUNK_SIZE,
+ "chunk_overlap": app.state.CHUNK_OVERLAP,
+ },
+ }
+
+
+@app.get("/template")
+async def get_rag_template(user=Depends(get_current_user)):
+ return {
+ "status": True,
+ "template": app.state.RAG_TEMPLATE,
+ }
+
+
+@app.get("/query/settings")
+async def get_query_settings(user=Depends(get_admin_user)):
+ return {
+ "status": True,
+ "template": app.state.RAG_TEMPLATE,
+ "k": app.state.TOP_K,
+ }
+
+
+class QuerySettingsForm(BaseModel):
+ k: Optional[int] = None
+ template: Optional[str] = None
+
+
+@app.post("/query/settings/update")
+async def update_query_settings(
+ form_data: QuerySettingsForm, user=Depends(get_admin_user)
+):
+ app.state.RAG_TEMPLATE = form_data.template if form_data.template else RAG_TEMPLATE
+ app.state.TOP_K = form_data.k if form_data.k else 4
+ return {"status": True, "template": app.state.RAG_TEMPLATE}
class QueryDocForm(BaseModel):
collection_name: str
query: str
- k: Optional[int] = 4
+ k: Optional[int] = None
@app.post("/query/doc")
-def query_doc(
+def query_doc_handler(
form_data: QueryDocForm,
user=Depends(get_current_user),
):
+
try:
- collection = CHROMA_CLIENT.get_collection(
- name=form_data.collection_name,
+ return query_doc(
+ collection_name=form_data.collection_name,
+ query=form_data.query,
+ k=form_data.k if form_data.k else app.state.TOP_K,
+ embedding_function=app.state.sentence_transformer_ef,
)
- result = collection.query(query_texts=[form_data.query], n_results=form_data.k)
- return result
except Exception as e:
print(e)
raise HTTPException(
@@ -125,74 +284,20 @@ def query_doc(
class QueryCollectionsForm(BaseModel):
collection_names: List[str]
query: str
- k: Optional[int] = 4
-
-
-def merge_and_sort_query_results(query_results, k):
- # Initialize lists to store combined data
- combined_ids = []
- combined_distances = []
- combined_metadatas = []
- combined_documents = []
-
- # Combine data from each dictionary
- for data in query_results:
- combined_ids.extend(data["ids"][0])
- combined_distances.extend(data["distances"][0])
- combined_metadatas.extend(data["metadatas"][0])
- combined_documents.extend(data["documents"][0])
-
- # Create a list of tuples (distance, id, metadata, document)
- combined = list(
- zip(combined_distances, combined_ids, combined_metadatas, combined_documents)
- )
-
- # Sort the list based on distances
- combined.sort(key=lambda x: x[0])
-
- # Unzip the sorted list
- sorted_distances, sorted_ids, sorted_metadatas, sorted_documents = zip(*combined)
-
- # Slicing the lists to include only k elements
- sorted_distances = list(sorted_distances)[:k]
- sorted_ids = list(sorted_ids)[:k]
- sorted_metadatas = list(sorted_metadatas)[:k]
- sorted_documents = list(sorted_documents)[:k]
-
- # Create the output dictionary
- merged_query_results = {
- "ids": [sorted_ids],
- "distances": [sorted_distances],
- "metadatas": [sorted_metadatas],
- "documents": [sorted_documents],
- "embeddings": None,
- "uris": None,
- "data": None,
- }
-
- return merged_query_results
+ k: Optional[int] = None
@app.post("/query/collection")
-def query_collection(
+def query_collection_handler(
form_data: QueryCollectionsForm,
user=Depends(get_current_user),
):
- results = []
-
- for collection_name in form_data.collection_names:
- try:
- collection = CHROMA_CLIENT.get_collection(
- name=collection_name,
- )
- result = collection.query(
- query_texts=[form_data.query], n_results=form_data.k
- )
- results.append(result)
- except:
- pass
-
- return merge_and_sort_query_results(results, form_data.k)
+ return query_collection(
+ collection_names=form_data.collection_names,
+ query=form_data.query,
+ k=form_data.k if form_data.k else app.state.TOP_K,
+ embedding_function=app.state.sentence_transformer_ef,
+ )
@app.post("/web")
@@ -206,7 +311,7 @@ def store_web(form_data: StoreWebForm, user=Depends(get_current_user)):
if collection_name == "":
collection_name = calculate_sha256_string(form_data.url)[:63]
- store_data_in_vector_db(data, collection_name)
+ store_data_in_vector_db(data, collection_name, overwrite=True)
return {
"status": True,
"collection_name": collection_name,
@@ -220,8 +325,8 @@ def store_web(form_data: StoreWebForm, user=Depends(get_current_user)):
)
-def get_loader(file, file_path):
- file_ext = file.filename.split(".")[-1].lower()
+def get_loader(filename: str, file_content_type: str, file_path: str):
+ file_ext = filename.split(".")[-1].lower()
known_type = True
known_source_ext = [
@@ -270,7 +375,7 @@ def get_loader(file, file_path):
]
if file_ext == "pdf":
- loader = PyPDFLoader(file_path)
+ loader = PyPDFLoader(file_path, extract_images=app.state.PDF_EXTRACT_IMAGES)
elif file_ext == "csv":
loader = CSVLoader(file_path)
elif file_ext == "rst":
@@ -279,23 +384,25 @@ def get_loader(file, file_path):
loader = UnstructuredXMLLoader(file_path)
elif file_ext == "md":
loader = UnstructuredMarkdownLoader(file_path)
- elif file.content_type == "application/epub+zip":
+ elif file_content_type == "application/epub+zip":
loader = UnstructuredEPubLoader(file_path)
elif (
- file.content_type
+ file_content_type
== "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
or file_ext in ["doc", "docx"]
):
loader = Docx2txtLoader(file_path)
- elif file.content_type in [
+ elif file_content_type in [
"application/vnd.ms-excel",
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
] or file_ext in ["xls", "xlsx"]:
loader = UnstructuredExcelLoader(file_path)
- elif file_ext in known_source_ext or file.content_type.find("text/") >= 0:
- loader = TextLoader(file_path)
+ elif file_ext in known_source_ext or (
+ file_content_type and file_content_type.find("text/") >= 0
+ ):
+ loader = TextLoader(file_path, autodetect_encoding=True)
else:
- loader = TextLoader(file_path)
+ loader = TextLoader(file_path, autodetect_encoding=True)
known_type = False
return loader, known_type
@@ -323,7 +430,7 @@ def store_doc(
collection_name = calculate_sha256(f)[:63]
f.close()
- loader, known_type = get_loader(file, file_path)
+ loader, known_type = get_loader(file.filename, file.content_type, file_path)
data = loader.load()
result = store_data_in_vector_db(data, collection_name)
@@ -353,6 +460,63 @@ def store_doc(
)
+@app.get("/scan")
+def scan_docs_dir(user=Depends(get_admin_user)):
+ for path in Path(DOCS_DIR).rglob("./**/*"):
+ try:
+ if path.is_file() and not path.name.startswith("."):
+ tags = extract_folders_after_data_docs(path)
+ filename = path.name
+ file_content_type = mimetypes.guess_type(path)
+
+ f = open(path, "rb")
+ collection_name = calculate_sha256(f)[:63]
+ f.close()
+
+ loader, known_type = get_loader(
+ filename, file_content_type[0], str(path)
+ )
+ data = loader.load()
+
+ result = store_data_in_vector_db(data, collection_name)
+
+ if result:
+ sanitized_filename = sanitize_filename(filename)
+ doc = Documents.get_doc_by_name(sanitized_filename)
+
+ if doc == None:
+ doc = Documents.insert_new_doc(
+ user.id,
+ DocumentForm(
+ **{
+ "name": sanitized_filename,
+ "title": filename,
+ "collection_name": collection_name,
+ "filename": filename,
+ "content": (
+ json.dumps(
+ {
+ "tags": list(
+ map(
+ lambda name: {"name": name},
+ tags,
+ )
+ )
+ }
+ )
+ if len(tags)
+ else "{}"
+ ),
+ }
+ ),
+ )
+
+ except Exception as e:
+ print(e)
+
+ return True
+
+
@app.get("/reset/db")
def reset_vector_db(user=Depends(get_admin_user)):
CHROMA_CLIENT.reset()
diff --git a/backend/apps/rag/utils.py b/backend/apps/rag/utils.py
new file mode 100644
index 00000000..a3537d4d
--- /dev/null
+++ b/backend/apps/rag/utils.py
@@ -0,0 +1,182 @@
+import re
+from typing import List
+
+from config import CHROMA_CLIENT
+
+
+def query_doc(collection_name: str, query: str, k: int, embedding_function):
+ try:
+ # if you use docker use the model from the environment variable
+ collection = CHROMA_CLIENT.get_collection(
+ name=collection_name,
+ embedding_function=embedding_function,
+ )
+ result = collection.query(
+ query_texts=[query],
+ n_results=k,
+ )
+ return result
+ except Exception as e:
+ raise e
+
+
+def merge_and_sort_query_results(query_results, k):
+ # Initialize lists to store combined data
+ combined_ids = []
+ combined_distances = []
+ combined_metadatas = []
+ combined_documents = []
+
+ # Combine data from each dictionary
+ for data in query_results:
+ combined_ids.extend(data["ids"][0])
+ combined_distances.extend(data["distances"][0])
+ combined_metadatas.extend(data["metadatas"][0])
+ combined_documents.extend(data["documents"][0])
+
+ # Create a list of tuples (distance, id, metadata, document)
+ combined = list(
+ zip(combined_distances, combined_ids, combined_metadatas, combined_documents)
+ )
+
+ # Sort the list based on distances
+ combined.sort(key=lambda x: x[0])
+
+ # Unzip the sorted list
+ sorted_distances, sorted_ids, sorted_metadatas, sorted_documents = zip(*combined)
+
+ # Slicing the lists to include only k elements
+ sorted_distances = list(sorted_distances)[:k]
+ sorted_ids = list(sorted_ids)[:k]
+ sorted_metadatas = list(sorted_metadatas)[:k]
+ sorted_documents = list(sorted_documents)[:k]
+
+ # Create the output dictionary
+ merged_query_results = {
+ "ids": [sorted_ids],
+ "distances": [sorted_distances],
+ "metadatas": [sorted_metadatas],
+ "documents": [sorted_documents],
+ "embeddings": None,
+ "uris": None,
+ "data": None,
+ }
+
+ return merged_query_results
+
+
+def query_collection(
+ collection_names: List[str], query: str, k: int, embedding_function
+):
+
+ results = []
+
+ for collection_name in collection_names:
+ try:
+ # if you use docker use the model from the environment variable
+ collection = CHROMA_CLIENT.get_collection(
+ name=collection_name,
+ embedding_function=embedding_function,
+ )
+
+ result = collection.query(
+ query_texts=[query],
+ n_results=k,
+ )
+ results.append(result)
+ except:
+ pass
+
+ return merge_and_sort_query_results(results, k)
+
+
+def rag_template(template: str, context: str, query: str):
+ template = template.replace("[context]", context)
+ template = template.replace("[query]", query)
+ return template
+
+
+def rag_messages(docs, messages, template, k, embedding_function):
+ print(docs)
+
+ last_user_message_idx = None
+ for i in range(len(messages) - 1, -1, -1):
+ if messages[i]["role"] == "user":
+ last_user_message_idx = i
+ break
+
+ user_message = messages[last_user_message_idx]
+
+ if isinstance(user_message["content"], list):
+ # Handle list content input
+ content_type = "list"
+ query = ""
+ for content_item in user_message["content"]:
+ if content_item["type"] == "text":
+ query = content_item["text"]
+ break
+ elif isinstance(user_message["content"], str):
+ # Handle text content input
+ content_type = "text"
+ query = user_message["content"]
+ else:
+ # Fallback in case the input does not match expected types
+ content_type = None
+ query = ""
+
+ relevant_contexts = []
+
+ for doc in docs:
+ context = None
+
+ try:
+ if doc["type"] == "collection":
+ context = query_collection(
+ collection_names=doc["collection_names"],
+ query=query,
+ k=k,
+ embedding_function=embedding_function,
+ )
+ else:
+ context = query_doc(
+ collection_name=doc["collection_name"],
+ query=query,
+ k=k,
+ embedding_function=embedding_function,
+ )
+ except Exception as e:
+ print(e)
+ context = None
+
+ relevant_contexts.append(context)
+
+ context_string = ""
+ for context in relevant_contexts:
+ if context:
+ context_string += " ".join(context["documents"][0]) + "\n"
+
+ ra_content = rag_template(
+ template=template,
+ context=context_string,
+ query=query,
+ )
+
+ if content_type == "list":
+ new_content = []
+ for content_item in user_message["content"]:
+ if content_item["type"] == "text":
+ # Update the text item's content with ra_content
+ new_content.append({"type": "text", "text": ra_content})
+ else:
+ # Keep other types of content as they are
+ new_content.append(content_item)
+ new_user_message = {**user_message, "content": new_content}
+ else:
+ new_user_message = {
+ **user_message,
+ "content": ra_content,
+ }
+
+ messages[last_user_message_idx] = new_user_message
+
+ return messages
diff --git a/backend/apps/web/internal/db.py b/backend/apps/web/internal/db.py
index 1f8c3bf7..d0aa9969 100644
--- a/backend/apps/web/internal/db.py
+++ b/backend/apps/web/internal/db.py
@@ -1,6 +1,16 @@
from peewee import *
from config import DATA_DIR
+import os
-DB = SqliteDatabase(f"{DATA_DIR}/ollama.db")
+# Check if the file exists
+if os.path.exists(f"{DATA_DIR}/ollama.db"):
+ # Rename the file
+ os.rename(f"{DATA_DIR}/ollama.db", f"{DATA_DIR}/webui.db")
+ print("File renamed successfully.")
+else:
+ pass
+
+
+DB = SqliteDatabase(f"{DATA_DIR}/webui.db")
DB.connect()
diff --git a/backend/apps/web/main.py b/backend/apps/web/main.py
index 400ddac0..dd5c0c70 100644
--- a/backend/apps/web/main.py
+++ b/backend/apps/web/main.py
@@ -19,6 +19,7 @@ from config import (
DEFAULT_USER_ROLE,
ENABLE_SIGNUP,
USER_PERMISSIONS,
+ WEBHOOK_URL,
)
app = FastAPI()
@@ -26,10 +27,13 @@ app = FastAPI()
origins = ["*"]
app.state.ENABLE_SIGNUP = ENABLE_SIGNUP
+app.state.JWT_EXPIRES_IN = "-1"
+
app.state.DEFAULT_MODELS = DEFAULT_MODELS
app.state.DEFAULT_PROMPT_SUGGESTIONS = DEFAULT_PROMPT_SUGGESTIONS
app.state.DEFAULT_USER_ROLE = DEFAULT_USER_ROLE
app.state.USER_PERMISSIONS = USER_PERMISSIONS
+app.state.WEBHOOK_URL = WEBHOOK_URL
app.add_middleware(
@@ -55,7 +59,6 @@ app.include_router(utils.router, prefix="/utils", tags=["utils"])
async def get_status():
return {
"status": True,
- "version": WEBUI_VERSION,
"auth": WEBUI_AUTH,
"default_models": app.state.DEFAULT_MODELS,
"default_prompt_suggestions": app.state.DEFAULT_PROMPT_SUGGESTIONS,
diff --git a/backend/apps/web/models/tags.py b/backend/apps/web/models/tags.py
index c14658cf..d4264501 100644
--- a/backend/apps/web/models/tags.py
+++ b/backend/apps/web/models/tags.py
@@ -167,6 +167,27 @@ class TagTable:
.count()
)
+ def delete_tag_by_tag_name_and_user_id(self, tag_name: str, user_id: str) -> bool:
+ try:
+ query = ChatIdTag.delete().where(
+ (ChatIdTag.tag_name == tag_name) & (ChatIdTag.user_id == user_id)
+ )
+ res = query.execute() # Remove the rows, return number of rows removed.
+ print(res)
+
+ tag_count = self.count_chat_ids_by_tag_name_and_user_id(tag_name, user_id)
+ if tag_count == 0:
+ # Remove tag item from Tag col as well
+ query = Tag.delete().where(
+ (Tag.name == tag_name) & (Tag.user_id == user_id)
+ )
+ query.execute() # Remove the rows, return number of rows removed.
+
+ return True
+ except Exception as e:
+ print("delete_tag", e)
+ return False
+
def delete_tag_by_tag_name_and_chat_id_and_user_id(
self, tag_name: str, chat_id: str, user_id: str
) -> bool:
diff --git a/backend/apps/web/routers/auths.py b/backend/apps/web/routers/auths.py
index 7ccef630..d881ec74 100644
--- a/backend/apps/web/routers/auths.py
+++ b/backend/apps/web/routers/auths.py
@@ -7,6 +7,7 @@ from fastapi import APIRouter, status
from pydantic import BaseModel
import time
import uuid
+import re
from apps.web.models.auths import (
SigninForm,
@@ -25,8 +26,9 @@ from utils.utils import (
get_admin_user,
create_token,
)
-from utils.misc import get_gravatar_url, validate_email_format
-from constants import ERROR_MESSAGES
+from utils.misc import parse_duration, validate_email_format
+from utils.webhook import post_webhook
+from constants import ERROR_MESSAGES, WEBHOOK_MESSAGES
router = APIRouter()
@@ -95,10 +97,13 @@ async def update_password(
@router.post("/signin", response_model=SigninResponse)
-async def signin(form_data: SigninForm):
+async def signin(request: Request, form_data: SigninForm):
user = Auths.authenticate_user(form_data.email.lower(), form_data.password)
if user:
- token = create_token(data={"id": user.id})
+ token = create_token(
+ data={"id": user.id},
+ expires_delta=parse_duration(request.app.state.JWT_EXPIRES_IN),
+ )
return {
"token": token,
@@ -145,9 +150,23 @@ async def signup(request: Request, form_data: SignupForm):
)
if user:
- token = create_token(data={"id": user.id})
+ token = create_token(
+ data={"id": user.id},
+ expires_delta=parse_duration(request.app.state.JWT_EXPIRES_IN),
+ )
# response.set_cookie(key='token', value=token, httponly=True)
+ if request.app.state.WEBHOOK_URL:
+ post_webhook(
+ request.app.state.WEBHOOK_URL,
+ WEBHOOK_MESSAGES.USER_SIGNUP(user.name),
+ {
+ "action": "signup",
+ "message": WEBHOOK_MESSAGES.USER_SIGNUP(user.name),
+ "user": user.model_dump_json(exclude_none=True),
+ },
+ )
+
return {
"token": token,
"token_type": "Bearer",
@@ -200,3 +219,33 @@ async def update_default_user_role(
if form_data.role in ["pending", "user", "admin"]:
request.app.state.DEFAULT_USER_ROLE = form_data.role
return request.app.state.DEFAULT_USER_ROLE
+
+
+############################
+# JWT Expiration
+############################
+
+
+@router.get("/token/expires")
+async def get_token_expires_duration(request: Request, user=Depends(get_admin_user)):
+ return request.app.state.JWT_EXPIRES_IN
+
+
+class UpdateJWTExpiresDurationForm(BaseModel):
+ duration: str
+
+
+@router.post("/token/expires/update")
+async def update_token_expires_duration(
+ request: Request,
+ form_data: UpdateJWTExpiresDurationForm,
+ user=Depends(get_admin_user),
+):
+ pattern = r"^(-1|0|(-?\d+(\.\d+)?)(ms|s|m|h|d|w))$"
+
+ # Check if the input string matches the pattern
+ if re.match(pattern, form_data.duration):
+ request.app.state.JWT_EXPIRES_IN = form_data.duration
+ return request.app.state.JWT_EXPIRES_IN
+ else:
+ return request.app.state.JWT_EXPIRES_IN
diff --git a/backend/apps/web/routers/chats.py b/backend/apps/web/routers/chats.py
index 00dcfb6e..0c0ac1ce 100644
--- a/backend/apps/web/routers/chats.py
+++ b/backend/apps/web/routers/chats.py
@@ -115,9 +115,12 @@ async def get_user_chats_by_tag_name(
for chat_id_tag in Tags.get_chat_ids_by_tag_name_and_user_id(tag_name, user.id)
]
- print(chat_ids)
+ chats = Chats.get_chat_lists_by_chat_ids(chat_ids, skip, limit)
- return Chats.get_chat_lists_by_chat_ids(chat_ids, skip, limit)
+ if len(chats) == 0:
+ Tags.delete_tag_by_tag_name_and_user_id(tag_name, user.id)
+
+ return chats
############################
@@ -268,6 +271,16 @@ async def delete_all_chat_tags_by_id(id: str, user=Depends(get_current_user)):
@router.delete("/", response_model=bool)
-async def delete_all_user_chats(user=Depends(get_current_user)):
+async def delete_all_user_chats(request: Request, user=Depends(get_current_user)):
+
+ if (
+ user.role == "user"
+ and not request.app.state.USER_PERMISSIONS["chat"]["deletion"]
+ ):
+ raise HTTPException(
+ status_code=status.HTTP_401_UNAUTHORIZED,
+ detail=ERROR_MESSAGES.ACCESS_PROHIBITED,
+ )
+
result = Chats.delete_chats_by_user_id(user.id)
return result
diff --git a/backend/apps/web/routers/documents.py b/backend/apps/web/routers/documents.py
index 5bc473fa..7c69514f 100644
--- a/backend/apps/web/routers/documents.py
+++ b/backend/apps/web/routers/documents.py
@@ -96,6 +96,10 @@ async def get_doc_by_name(name: str, user=Depends(get_current_user)):
############################
+class TagItem(BaseModel):
+ name: str
+
+
class TagDocumentForm(BaseModel):
name: str
tags: List[dict]
diff --git a/backend/apps/web/routers/utils.py b/backend/apps/web/routers/utils.py
index 86e1a9e5..0d34b040 100644
--- a/backend/apps/web/routers/utils.py
+++ b/backend/apps/web/routers/utils.py
@@ -1,6 +1,7 @@
from fastapi import APIRouter, UploadFile, File, BackgroundTasks
from fastapi import Depends, HTTPException, status
-from starlette.responses import StreamingResponse
+from starlette.responses import StreamingResponse, FileResponse
+
from pydantic import BaseModel
@@ -9,9 +10,11 @@ import os
import aiohttp
import json
+
+from utils.utils import get_admin_user
from utils.misc import calculate_sha256, get_gravatar_url
-from config import OLLAMA_API_BASE_URL, DATA_DIR, UPLOAD_DIR
+from config import OLLAMA_BASE_URLS, DATA_DIR, UPLOAD_DIR
from constants import ERROR_MESSAGES
@@ -72,7 +75,7 @@ async def download_file_stream(url, file_path, file_name, chunk_size=1024 * 1024
hashed = calculate_sha256(file)
file.seek(0)
- url = f"{OLLAMA_API_BASE_URL}/blobs/sha256:{hashed}"
+ url = f"{OLLAMA_BASE_URLS[0]}/api/blobs/sha256:{hashed}"
response = requests.post(url, data=file)
if response.ok:
@@ -144,7 +147,7 @@ def upload(file: UploadFile = File(...)):
hashed = calculate_sha256(f)
f.seek(0)
- url = f"{OLLAMA_API_BASE_URL}/blobs/sha256:{hashed}"
+ url = f"{OLLAMA_BASE_URLS[0]}/blobs/sha256:{hashed}"
response = requests.post(url, data=f)
if response.ok:
@@ -172,3 +175,13 @@ async def get_gravatar(
email: str,
):
return get_gravatar_url(email)
+
+
+@router.get("/db/download")
+async def download_db(user=Depends(get_admin_user)):
+
+ return FileResponse(
+ f"{DATA_DIR}/webui.db",
+ media_type="application/octet-stream",
+ filename="webui.db",
+ )
diff --git a/backend/config.py b/backend/config.py
index d7c89b3b..9236e8a8 100644
--- a/backend/config.py
+++ b/backend/config.py
@@ -1,10 +1,20 @@
import os
import chromadb
from chromadb import Settings
-from secrets import token_bytes
from base64 import b64encode
-from constants import ERROR_MESSAGES
+from bs4 import BeautifulSoup
+
from pathlib import Path
+import json
+import yaml
+
+import markdown
+import requests
+import shutil
+
+from secrets import token_bytes
+from constants import ERROR_MESSAGES
+
try:
from dotenv import load_dotenv, find_dotenv
@@ -13,6 +23,8 @@ try:
except ImportError:
print("dotenv not installed, skipping...")
+WEBUI_NAME = "Open WebUI"
+shutil.copyfile("../build/favicon.png", "./static/favicon.png")
####################################
# ENV (dev,test,prod)
@@ -20,6 +32,102 @@ except ImportError:
ENV = os.environ.get("ENV", "dev")
+try:
+ with open(f"../package.json", "r") as f:
+ PACKAGE_DATA = json.load(f)
+except:
+ PACKAGE_DATA = {"version": "0.0.0"}
+
+VERSION = PACKAGE_DATA["version"]
+
+
+# Function to parse each section
+def parse_section(section):
+ items = []
+ for li in section.find_all("li"):
+ # Extract raw HTML string
+ raw_html = str(li)
+
+ # Extract text without HTML tags
+ text = li.get_text(separator=" ", strip=True)
+
+ # Split into title and content
+ parts = text.split(": ", 1)
+ title = parts[0].strip() if len(parts) > 1 else ""
+ content = parts[1].strip() if len(parts) > 1 else text
+
+ items.append({"title": title, "content": content, "raw": raw_html})
+ return items
+
+
+try:
+ with open("../CHANGELOG.md", "r") as file:
+ changelog_content = file.read()
+except:
+ changelog_content = ""
+
+# Convert markdown content to HTML
+html_content = markdown.markdown(changelog_content)
+
+# Parse the HTML content
+soup = BeautifulSoup(html_content, "html.parser")
+
+# Initialize JSON structure
+changelog_json = {}
+
+# Iterate over each version
+for version in soup.find_all("h2"):
+ version_number = version.get_text().strip().split(" - ")[0][1:-1] # Remove brackets
+ date = version.get_text().strip().split(" - ")[1]
+
+ version_data = {"date": date}
+
+ # Find the next sibling that is a h3 tag (section title)
+ current = version.find_next_sibling()
+
+ while current and current.name != "h2":
+ if current.name == "h3":
+ section_title = current.get_text().lower() # e.g., "added", "fixed"
+ section_items = parse_section(current.find_next_sibling("ul"))
+ version_data[section_title] = section_items
+
+ # Move to the next element
+ current = current.find_next_sibling()
+
+ changelog_json[version_number] = version_data
+
+
+CHANGELOG = changelog_json
+
+
+####################################
+# CUSTOM_NAME
+####################################
+
+CUSTOM_NAME = os.environ.get("CUSTOM_NAME", "")
+if CUSTOM_NAME:
+ try:
+ r = requests.get(f"https://api.openwebui.com/api/v1/custom/{CUSTOM_NAME}")
+ data = r.json()
+ if r.ok:
+ if "logo" in data:
+ url = (
+ f"https://api.openwebui.com{data['logo']}"
+ if data["logo"][0] == "/"
+ else data["logo"]
+ )
+
+ r = requests.get(url, stream=True)
+ if r.status_code == 200:
+ with open("./static/favicon.png", "wb") as f:
+ r.raw.decode_content = True
+ shutil.copyfileobj(r.raw, f)
+
+ WEBUI_NAME = data["name"]
+ except Exception as e:
+ print(e)
+ pass
+
####################################
# DATA/FRONTEND BUILD DIR
@@ -28,6 +136,12 @@ ENV = os.environ.get("ENV", "dev")
DATA_DIR = str(Path(os.getenv("DATA_DIR", "./data")).resolve())
FRONTEND_BUILD_DIR = str(Path(os.getenv("FRONTEND_BUILD_DIR", "../build")))
+try:
+ with open(f"{DATA_DIR}/config.json", "r") as f:
+ CONFIG_DATA = json.load(f)
+except:
+ CONFIG_DATA = {}
+
####################################
# File Upload DIR
####################################
@@ -43,17 +157,76 @@ Path(UPLOAD_DIR).mkdir(parents=True, exist_ok=True)
CACHE_DIR = f"{DATA_DIR}/cache"
Path(CACHE_DIR).mkdir(parents=True, exist_ok=True)
+
####################################
-# OLLAMA_API_BASE_URL
+# Docs DIR
+####################################
+
+DOCS_DIR = f"{DATA_DIR}/docs"
+Path(DOCS_DIR).mkdir(parents=True, exist_ok=True)
+
+
+####################################
+# LITELLM_CONFIG
+####################################
+
+
+def create_config_file(file_path):
+ directory = os.path.dirname(file_path)
+
+ # Check if directory exists, if not, create it
+ if not os.path.exists(directory):
+ os.makedirs(directory)
+
+ # Data to write into the YAML file
+ config_data = {
+ "general_settings": {},
+ "litellm_settings": {},
+ "model_list": [],
+ "router_settings": {},
+ }
+
+ # Write data to YAML file
+ with open(file_path, "w") as file:
+ yaml.dump(config_data, file)
+
+
+LITELLM_CONFIG_PATH = f"{DATA_DIR}/litellm/config.yaml"
+
+if not os.path.exists(LITELLM_CONFIG_PATH):
+ print("Config file doesn't exist. Creating...")
+ create_config_file(LITELLM_CONFIG_PATH)
+ print("Config file created successfully.")
+
+
+####################################
+# OLLAMA_BASE_URL
####################################
OLLAMA_API_BASE_URL = os.environ.get(
"OLLAMA_API_BASE_URL", "http://localhost:11434/api"
)
+OLLAMA_BASE_URL = os.environ.get("OLLAMA_BASE_URL", "")
+
+
+if OLLAMA_BASE_URL == "" and OLLAMA_API_BASE_URL != "":
+ OLLAMA_BASE_URL = (
+ OLLAMA_API_BASE_URL[:-4]
+ if OLLAMA_API_BASE_URL.endswith("/api")
+ else OLLAMA_API_BASE_URL
+ )
+
if ENV == "prod":
- if OLLAMA_API_BASE_URL == "/ollama/api":
- OLLAMA_API_BASE_URL = "http://host.docker.internal:11434/api"
+ if OLLAMA_BASE_URL == "/ollama":
+ OLLAMA_BASE_URL = "http://host.docker.internal:11434"
+
+
+OLLAMA_BASE_URLS = os.environ.get("OLLAMA_BASE_URLS", "")
+OLLAMA_BASE_URLS = OLLAMA_BASE_URLS if OLLAMA_BASE_URLS != "" else OLLAMA_BASE_URL
+
+OLLAMA_BASE_URLS = [url.strip() for url in OLLAMA_BASE_URLS.split(";")]
+
####################################
# OPENAI_API
@@ -62,19 +235,40 @@ if ENV == "prod":
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY", "")
OPENAI_API_BASE_URL = os.environ.get("OPENAI_API_BASE_URL", "")
+
if OPENAI_API_BASE_URL == "":
OPENAI_API_BASE_URL = "https://api.openai.com/v1"
+OPENAI_API_KEYS = os.environ.get("OPENAI_API_KEYS", "")
+OPENAI_API_KEYS = OPENAI_API_KEYS if OPENAI_API_KEYS != "" else OPENAI_API_KEY
+
+OPENAI_API_KEYS = [url.strip() for url in OPENAI_API_KEYS.split(";")]
+
+
+OPENAI_API_BASE_URLS = os.environ.get("OPENAI_API_BASE_URLS", "")
+OPENAI_API_BASE_URLS = (
+ OPENAI_API_BASE_URLS if OPENAI_API_BASE_URLS != "" else OPENAI_API_BASE_URL
+)
+
+OPENAI_API_BASE_URLS = [
+ url.strip() if url != "" else "https://api.openai.com/v1"
+ for url in OPENAI_API_BASE_URLS.split(";")
+]
####################################
# WEBUI
####################################
-ENABLE_SIGNUP = os.environ.get("ENABLE_SIGNUP", True)
+ENABLE_SIGNUP = os.environ.get("ENABLE_SIGNUP", "True").lower() == "true"
DEFAULT_MODELS = os.environ.get("DEFAULT_MODELS", None)
-DEFAULT_PROMPT_SUGGESTIONS = os.environ.get(
- "DEFAULT_PROMPT_SUGGESTIONS",
- [
+
+
+DEFAULT_PROMPT_SUGGESTIONS = (
+ CONFIG_DATA["ui"]["prompt_suggestions"]
+ if "ui" in CONFIG_DATA
+ and "prompt_suggestions" in CONFIG_DATA["ui"]
+ and type(CONFIG_DATA["ui"]["prompt_suggestions"]) is list
+ else [
{
"title": ["Help me study", "vocabulary for a college entrance exam"],
"content": "Help me study vocabulary: write a sentence for me to fill in the blank, and I'll try to pick the correct option.",
@@ -91,12 +285,25 @@ DEFAULT_PROMPT_SUGGESTIONS = os.environ.get(
"title": ["Show me a code snippet", "of a website's sticky header"],
"content": "Show me a code snippet of a website's sticky header in CSS and JavaScript.",
},
- ],
+ ]
)
-DEFAULT_USER_ROLE = "pending"
-USER_PERMISSIONS = {"chat": {"deletion": True}}
+DEFAULT_USER_ROLE = os.getenv("DEFAULT_USER_ROLE", "pending")
+
+USER_PERMISSIONS_CHAT_DELETION = (
+ os.environ.get("USER_PERMISSIONS_CHAT_DELETION", "True").lower() == "true"
+)
+
+USER_PERMISSIONS = {"chat": {"deletion": USER_PERMISSIONS_CHAT_DELETION}}
+
+
+MODEL_FILTER_ENABLED = os.environ.get("MODEL_FILTER_ENABLED", "False").lower() == "true"
+MODEL_FILTER_LIST = os.environ.get("MODEL_FILTER_LIST", "")
+MODEL_FILTER_LIST = [model.strip() for model in MODEL_FILTER_LIST.split(";")]
+
+WEBHOOK_URL = os.environ.get("WEBHOOK_URL", "")
+
####################################
# WEBUI_VERSION
####################################
@@ -128,7 +335,12 @@ if WEBUI_AUTH and WEBUI_SECRET_KEY == "":
####################################
CHROMA_DATA_PATH = f"{DATA_DIR}/vector_db"
-EMBED_MODEL = "all-MiniLM-L6-v2"
+# this uses the model defined in the Dockerfile ENV variable. If you dont use docker or docker based deployments such as k8s, the default embedding model will be used (all-MiniLM-L6-v2)
+RAG_EMBEDDING_MODEL = os.environ.get("RAG_EMBEDDING_MODEL", "all-MiniLM-L6-v2")
+# device type ebbeding models - "cpu" (default), "cuda" (nvidia gpu required) or "mps" (apple silicon) - choosing this right can lead to better performance
+RAG_EMBEDDING_MODEL_DEVICE_TYPE = os.environ.get(
+ "RAG_EMBEDDING_MODEL_DEVICE_TYPE", "cpu"
+)
CHROMA_CLIENT = chromadb.PersistentClient(
path=CHROMA_DATA_PATH,
settings=Settings(allow_reset=True, anonymized_telemetry=False),
@@ -136,9 +348,31 @@ CHROMA_CLIENT = chromadb.PersistentClient(
CHUNK_SIZE = 1500
CHUNK_OVERLAP = 100
+
+RAG_TEMPLATE = """Use the following context as your learned knowledge, inside
{message.content}-