open-webui/backend/apps/litellm/main.py

373 lines
10 KiB
Python
Raw Normal View History

import sys
2024-04-21 08:13:24 +02:00
from fastapi import FastAPI, Depends, HTTPException
2024-04-21 07:52:27 +02:00
from fastapi.routing import APIRoute
from fastapi.middleware.cors import CORSMiddleware
2024-03-08 22:33:56 +01:00
2024-04-21 07:52:27 +02:00
import logging
2024-03-21 03:28:33 +01:00
from fastapi import FastAPI, Request, Depends, status, Response
2024-03-08 22:33:56 +01:00
from fastapi.responses import JSONResponse
2024-03-21 03:28:33 +01:00
from starlette.middleware.base import BaseHTTPMiddleware, RequestResponseEndpoint
from starlette.responses import StreamingResponse
import json
2024-04-22 00:37:59 +02:00
import time
2024-04-21 08:13:24 +02:00
import requests
2024-03-21 03:28:33 +01:00
2024-04-22 01:25:53 +02:00
from pydantic import BaseModel, ConfigDict
2024-04-21 23:10:01 +02:00
from typing import Optional, List
2024-04-21 08:51:38 +02:00
from utils.utils import get_verified_user, get_current_user, get_admin_user
from config import SRC_LOG_LEVELS, ENV
2024-04-22 00:37:59 +02:00
from constants import MESSAGES
import os
log = logging.getLogger(__name__)
log.setLevel(SRC_LOG_LEVELS["LITELLM"])
2024-03-08 22:33:56 +01:00
2024-03-21 03:28:33 +01:00
from config import (
2024-04-28 03:17:19 +02:00
ENABLE_LITELLM,
ENABLE_MODEL_FILTER,
MODEL_FILTER_LIST,
DATA_DIR,
LITELLM_PROXY_PORT,
LITELLM_PROXY_HOST,
)
2024-03-21 03:28:33 +01:00
2024-04-22 01:25:53 +02:00
from litellm.utils import get_llm_provider
2024-03-21 03:28:33 +01:00
2024-04-21 07:52:27 +02:00
import asyncio
import subprocess
2024-04-21 23:10:01 +02:00
import yaml
2024-03-08 22:33:56 +01:00
2024-04-21 07:52:27 +02:00
app = FastAPI()
2024-03-08 22:33:56 +01:00
2024-04-21 07:52:27 +02:00
origins = ["*"]
2024-03-08 22:33:56 +01:00
2024-04-21 07:52:27 +02:00
app.add_middleware(
CORSMiddleware,
allow_origins=origins,
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
2024-03-08 22:33:56 +01:00
2024-04-21 07:52:27 +02:00
2024-04-21 23:10:01 +02:00
LITELLM_CONFIG_DIR = f"{DATA_DIR}/litellm/config.yaml"
with open(LITELLM_CONFIG_DIR, "r") as file:
litellm_config = yaml.safe_load(file)
2024-04-28 03:17:19 +02:00
app.state.ENABLE = ENABLE_LITELLM
2024-04-21 23:10:01 +02:00
app.state.CONFIG = litellm_config
2024-04-21 08:46:09 +02:00
# Global variable to store the subprocess reference
background_process = None
CONFLICT_ENV_VARS = [
# Uvicorn uses PORT, so LiteLLM might use it as well
"PORT",
# LiteLLM uses DATABASE_URL for Prisma connections
"DATABASE_URL",
]
2024-04-21 08:22:02 +02:00
2024-04-21 08:46:09 +02:00
async def run_background_process(command):
global background_process
2024-04-21 23:10:01 +02:00
log.info("run_background_process")
2024-04-21 08:46:09 +02:00
try:
# Log the command to be executed
2024-04-21 23:10:01 +02:00
log.info(f"Executing command: {command}")
# Filter environment variables known to conflict with litellm
env = {k: v for k, v in os.environ.items() if k not in CONFLICT_ENV_VARS}
2024-04-21 08:46:09 +02:00
# Execute the command and create a subprocess
process = await asyncio.create_subprocess_exec(
*command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=env
2024-04-21 08:46:09 +02:00
)
background_process = process
2024-04-21 23:10:01 +02:00
log.info("Subprocess started successfully.")
2024-04-21 08:46:09 +02:00
# Capture STDERR for debugging purposes
stderr_output = await process.stderr.read()
stderr_text = stderr_output.decode().strip()
if stderr_text:
2024-04-21 23:10:01 +02:00
log.info(f"Subprocess STDERR: {stderr_text}")
2024-04-21 08:46:09 +02:00
2024-04-21 23:10:01 +02:00
# log.info output line by line
2024-04-21 08:46:09 +02:00
async for line in process.stdout:
2024-04-21 23:10:01 +02:00
log.info(line.decode().strip())
2024-04-21 08:46:09 +02:00
# Wait for the process to finish
returncode = await process.wait()
2024-04-21 23:10:01 +02:00
log.info(f"Subprocess exited with return code {returncode}")
2024-04-21 08:46:09 +02:00
except Exception as e:
log.error(f"Failed to start subprocess: {e}")
raise # Optionally re-raise the exception if you want it to propagate
2024-04-21 07:52:27 +02:00
async def start_litellm_background():
2024-04-21 23:10:01 +02:00
log.info("start_litellm_background")
2024-04-21 07:52:27 +02:00
# Command to run in the background
command = [
"litellm",
"--port",
str(LITELLM_PROXY_PORT),
"--host",
LITELLM_PROXY_HOST,
"--telemetry",
"False",
"--config",
LITELLM_CONFIG_DIR,
]
2024-04-21 08:22:02 +02:00
2024-04-21 07:52:27 +02:00
await run_background_process(command)
2024-03-08 22:33:56 +01:00
2024-04-21 08:46:09 +02:00
async def shutdown_litellm_background():
2024-04-21 23:10:01 +02:00
log.info("shutdown_litellm_background")
2024-04-21 08:46:09 +02:00
global background_process
if background_process:
background_process.terminate()
await background_process.wait() # Ensure the process has terminated
2024-04-21 23:10:01 +02:00
log.info("Subprocess terminated")
2024-04-22 00:26:22 +02:00
background_process = None
2024-04-21 08:46:09 +02:00
2024-03-08 22:33:56 +01:00
@app.on_event("startup")
2024-04-21 07:52:27 +02:00
async def startup_event():
2024-04-21 23:10:01 +02:00
log.info("startup_event")
2024-04-21 08:13:24 +02:00
# TODO: Check config.yaml file and create one
2024-04-21 07:52:27 +02:00
asyncio.create_task(start_litellm_background())
2024-03-08 22:33:56 +01:00
app.state.ENABLE_MODEL_FILTER = ENABLE_MODEL_FILTER
2024-03-21 03:28:33 +01:00
app.state.MODEL_FILTER_LIST = MODEL_FILTER_LIST
2024-04-21 07:52:27 +02:00
@app.get("/")
async def get_status():
return {"status": True}
2024-04-21 23:10:01 +02:00
async def restart_litellm():
2024-04-21 08:51:38 +02:00
"""
Endpoint to restart the litellm background service.
"""
log.info("Requested restart of litellm service.")
try:
# Shut down the existing process if it is running
await shutdown_litellm_background()
log.info("litellm service shutdown complete.")
# Restart the background service
2024-04-21 23:10:01 +02:00
asyncio.create_task(start_litellm_background())
2024-04-21 08:51:38 +02:00
log.info("litellm service restart complete.")
return {
"status": "success",
"message": "litellm service restarted successfully.",
}
except Exception as e:
2024-04-21 23:10:01 +02:00
log.info(f"Error restarting litellm service: {e}")
2024-04-21 08:51:38 +02:00
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e)
)
2024-04-21 23:10:01 +02:00
@app.get("/restart")
async def restart_litellm_handler(user=Depends(get_admin_user)):
return await restart_litellm()
@app.get("/config")
async def get_config(user=Depends(get_admin_user)):
return app.state.CONFIG
class LiteLLMConfigForm(BaseModel):
general_settings: Optional[dict] = None
litellm_settings: Optional[dict] = None
model_list: Optional[List[dict]] = None
router_settings: Optional[dict] = None
2024-04-22 01:25:53 +02:00
model_config = ConfigDict(protected_namespaces=())
2024-04-21 23:10:01 +02:00
@app.post("/config/update")
async def update_config(form_data: LiteLLMConfigForm, user=Depends(get_admin_user)):
app.state.CONFIG = form_data.model_dump(exclude_none=True)
with open(LITELLM_CONFIG_DIR, "w") as file:
yaml.dump(app.state.CONFIG, file)
await restart_litellm()
return app.state.CONFIG
2024-04-21 08:13:24 +02:00
@app.get("/models")
@app.get("/v1/models")
async def get_models(user=Depends(get_current_user)):
2024-04-21 07:52:27 +02:00
2024-04-28 03:17:19 +02:00
if app.state.ENABLE:
while not background_process:
await asyncio.sleep(0.1)
url = f"http://localhost:{LITELLM_PROXY_PORT}/v1"
r = None
try:
r = requests.request(method="GET", url=f"{url}/models")
r.raise_for_status()
data = r.json()
if app.state.ENABLE_MODEL_FILTER:
if user and user.role == "user":
data["data"] = list(
filter(
lambda model: model["id"] in app.state.MODEL_FILTER_LIST,
data["data"],
)
2024-04-21 08:13:24 +02:00
)
2024-04-21 07:52:27 +02:00
2024-04-28 03:17:19 +02:00
return data
except Exception as e:
log.exception(e)
error_detail = "Open WebUI: Server Connection Error"
if r is not None:
try:
res = r.json()
if "error" in res:
error_detail = f"External: {res['error']}"
except:
error_detail = f"External: {e}"
return {
"data": [
{
"id": model["model_name"],
"object": "model",
"created": int(time.time()),
"owned_by": "openai",
}
for model in app.state.CONFIG["model_list"]
],
"object": "list",
}
else:
2024-04-22 00:37:59 +02:00
return {
2024-04-28 03:17:19 +02:00
"data": [],
2024-04-22 00:37:59 +02:00
"object": "list",
}
2024-04-21 07:52:27 +02:00
2024-04-22 00:26:22 +02:00
@app.get("/model/info")
async def get_model_list(user=Depends(get_admin_user)):
return {"data": app.state.CONFIG["model_list"]}
class AddLiteLLMModelForm(BaseModel):
model_name: str
litellm_params: dict
2024-04-22 01:25:53 +02:00
model_config = ConfigDict(protected_namespaces=())
2024-04-22 00:26:22 +02:00
@app.post("/model/new")
async def add_model_to_config(
form_data: AddLiteLLMModelForm, user=Depends(get_admin_user)
):
2024-04-22 01:25:53 +02:00
try:
get_llm_provider(model=form_data.model_name)
app.state.CONFIG["model_list"].append(form_data.model_dump())
2024-04-22 00:37:59 +02:00
2024-04-22 01:25:53 +02:00
with open(LITELLM_CONFIG_DIR, "w") as file:
yaml.dump(app.state.CONFIG, file)
2024-04-22 00:26:22 +02:00
2024-04-22 01:25:53 +02:00
await restart_litellm()
2024-04-22 00:26:22 +02:00
2024-04-22 01:25:53 +02:00
return {"message": MESSAGES.MODEL_ADDED(form_data.model_name)}
except Exception as e:
print(e)
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e)
)
2024-04-22 00:26:22 +02:00
class DeleteLiteLLMModelForm(BaseModel):
id: str
@app.post("/model/delete")
async def delete_model_from_config(
form_data: DeleteLiteLLMModelForm, user=Depends(get_admin_user)
):
app.state.CONFIG["model_list"] = [
model
for model in app.state.CONFIG["model_list"]
if model["model_name"] != form_data.id
]
with open(LITELLM_CONFIG_DIR, "w") as file:
yaml.dump(app.state.CONFIG, file)
await restart_litellm()
2024-04-22 00:37:59 +02:00
return {"message": MESSAGES.MODEL_DELETED(form_data.id)}
2024-04-22 00:26:22 +02:00
2024-04-21 08:13:24 +02:00
@app.api_route("/{path:path}", methods=["GET", "POST", "PUT", "DELETE"])
async def proxy(path: str, request: Request, user=Depends(get_verified_user)):
body = await request.body()
2024-04-21 07:52:27 +02:00
url = f"http://localhost:{LITELLM_PROXY_PORT}"
2024-04-21 07:52:27 +02:00
2024-04-21 08:13:24 +02:00
target_url = f"{url}/{path}"
2024-04-21 07:52:27 +02:00
2024-04-21 08:13:24 +02:00
headers = {}
# headers["Authorization"] = f"Bearer {key}"
headers["Content-Type"] = "application/json"
2024-04-21 07:52:27 +02:00
2024-04-21 08:13:24 +02:00
r = None
2024-04-21 07:52:27 +02:00
2024-04-21 08:13:24 +02:00
try:
r = requests.request(
method=request.method,
url=target_url,
data=body,
headers=headers,
stream=True,
)
r.raise_for_status()
# Check if response is SSE
if "text/event-stream" in r.headers.get("Content-Type", ""):
return StreamingResponse(
r.iter_content(chunk_size=8192),
status_code=r.status_code,
headers=dict(r.headers),
)
else:
response_data = r.json()
return response_data
except Exception as e:
log.exception(e)
error_detail = "Open WebUI: Server Connection Error"
if r is not None:
try:
res = r.json()
if "error" in res:
error_detail = f"External: {res['error']['message'] if 'message' in res['error'] else res['error']}"
except:
error_detail = f"External: {e}"
raise HTTPException(
status_code=r.status_code if r else 500, detail=error_detail
)