Merge Updates & Dockerfile improvements

This commit is contained in:
lainedfles 2024-04-02 03:25:20 -06:00 committed by GitHub
parent fdef2abdfb
commit 9763d885be
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
155 changed files with 14509 additions and 4803 deletions

View file

@ -1,24 +1,49 @@
from fastapi import FastAPI, Request, Response, HTTPException, Depends, status
from fastapi import (
FastAPI,
Request,
Response,
HTTPException,
Depends,
status,
UploadFile,
File,
BackgroundTasks,
)
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import StreamingResponse
from fastapi.concurrency import run_in_threadpool
from pydantic import BaseModel, ConfigDict
import os
import copy
import random
import requests
import json
import uuid
import aiohttp
import asyncio
import logging
from urllib.parse import urlparse
from typing import Optional, List, Union
from apps.web.models.users import Users
from constants import ERROR_MESSAGES
from utils.utils import decode_token, get_current_user, get_admin_user
from config import OLLAMA_BASE_URLS, MODEL_FILTER_ENABLED, MODEL_FILTER_LIST
from typing import Optional, List, Union
from config import (
SRC_LOG_LEVELS,
OLLAMA_BASE_URLS,
MODEL_FILTER_ENABLED,
MODEL_FILTER_LIST,
UPLOAD_DIR,
)
from utils.misc import calculate_sha256
log = logging.getLogger(__name__)
log.setLevel(SRC_LOG_LEVELS["OLLAMA"])
app = FastAPI()
app.add_middleware(
@ -69,7 +94,7 @@ class UrlUpdateForm(BaseModel):
async def update_ollama_api_url(form_data: UrlUpdateForm, user=Depends(get_admin_user)):
app.state.OLLAMA_BASE_URLS = form_data.urls
print(app.state.OLLAMA_BASE_URLS)
log.info(f"app.state.OLLAMA_BASE_URLS: {app.state.OLLAMA_BASE_URLS}")
return {"OLLAMA_BASE_URLS": app.state.OLLAMA_BASE_URLS}
@ -90,7 +115,7 @@ async def fetch_url(url):
return await response.json()
except Exception as e:
# Handle connection error here
print(f"Connection error: {e}")
log.error(f"Connection error: {e}")
return None
@ -98,13 +123,14 @@ def merge_models_lists(model_lists):
merged_models = {}
for idx, model_list in enumerate(model_lists):
for model in model_list:
digest = model["digest"]
if digest not in merged_models:
model["urls"] = [idx]
merged_models[digest] = model
else:
merged_models[digest]["urls"].append(idx)
if model_list is not None:
for model in model_list:
digest = model["digest"]
if digest not in merged_models:
model["urls"] = [idx]
merged_models[digest] = model
else:
merged_models[digest]["urls"].append(idx)
return list(merged_models.values())
@ -113,16 +139,16 @@ def merge_models_lists(model_lists):
async def get_all_models():
print("get_all_models")
log.info("get_all_models()")
tasks = [fetch_url(f"{url}/api/tags") for url in app.state.OLLAMA_BASE_URLS]
responses = await asyncio.gather(*tasks)
responses = list(filter(lambda x: x is not None, responses))
models = {
"models": merge_models_lists(
map(lambda response: response["models"], responses)
map(lambda response: response["models"] if response else None, responses)
)
}
app.state.MODELS = {model["model"]: model for model in models["models"]}
return models
@ -154,7 +180,7 @@ async def get_ollama_tags(
return r.json()
except Exception as e:
print(e)
log.exception(e)
error_detail = "Open WebUI: Server Connection Error"
if r is not None:
try:
@ -181,11 +207,17 @@ async def get_ollama_versions(url_idx: Optional[int] = None):
responses = await asyncio.gather(*tasks)
responses = list(filter(lambda x: x is not None, responses))
lowest_version = min(
responses, key=lambda x: tuple(map(int, x["version"].split(".")))
)
if len(responses) > 0:
lowest_version = min(
responses, key=lambda x: tuple(map(int, x["version"].split(".")))
)
return {"version": lowest_version["version"]}
return {"version": lowest_version["version"]}
else:
raise HTTPException(
status_code=500,
detail=ERROR_MESSAGES.OLLAMA_NOT_FOUND,
)
else:
url = app.state.OLLAMA_BASE_URLS[url_idx]
try:
@ -194,7 +226,7 @@ async def get_ollama_versions(url_idx: Optional[int] = None):
return r.json()
except Exception as e:
print(e)
log.exception(e)
error_detail = "Open WebUI: Server Connection Error"
if r is not None:
try:
@ -220,18 +252,33 @@ async def pull_model(
form_data: ModelNameForm, url_idx: int = 0, user=Depends(get_admin_user)
):
url = app.state.OLLAMA_BASE_URLS[url_idx]
print(url)
log.info(f"url: {url}")
r = None
def get_request():
nonlocal url
nonlocal r
request_id = str(uuid.uuid4())
try:
REQUEST_POOL.append(request_id)
def stream_content():
for chunk in r.iter_content(chunk_size=8192):
yield chunk
try:
yield json.dumps({"id": request_id, "done": False}) + "\n"
for chunk in r.iter_content(chunk_size=8192):
if request_id in REQUEST_POOL:
yield chunk
else:
log.warning("User: canceled request")
break
finally:
if hasattr(r, "close"):
r.close()
if request_id in REQUEST_POOL:
REQUEST_POOL.remove(request_id)
r = requests.request(
method="POST",
@ -252,8 +299,9 @@ async def pull_model(
try:
return await run_in_threadpool(get_request)
except Exception as e:
print(e)
log.exception(e)
error_detail = "Open WebUI: Server Connection Error"
if r is not None:
try:
@ -292,7 +340,7 @@ async def push_model(
)
url = app.state.OLLAMA_BASE_URLS[url_idx]
print(url)
log.debug(f"url: {url}")
r = None
@ -324,7 +372,7 @@ async def push_model(
try:
return await run_in_threadpool(get_request)
except Exception as e:
print(e)
log.exception(e)
error_detail = "Open WebUI: Server Connection Error"
if r is not None:
try:
@ -352,9 +400,9 @@ class CreateModelForm(BaseModel):
async def create_model(
form_data: CreateModelForm, url_idx: int = 0, user=Depends(get_admin_user)
):
print(form_data)
log.debug(f"form_data: {form_data}")
url = app.state.OLLAMA_BASE_URLS[url_idx]
print(url)
log.info(f"url: {url}")
r = None
@ -376,7 +424,7 @@ async def create_model(
r.raise_for_status()
print(r)
log.debug(f"r: {r}")
return StreamingResponse(
stream_content(),
@ -389,7 +437,7 @@ async def create_model(
try:
return await run_in_threadpool(get_request)
except Exception as e:
print(e)
log.exception(e)
error_detail = "Open WebUI: Server Connection Error"
if r is not None:
try:
@ -427,7 +475,7 @@ async def copy_model(
)
url = app.state.OLLAMA_BASE_URLS[url_idx]
print(url)
log.info(f"url: {url}")
try:
r = requests.request(
@ -437,11 +485,11 @@ async def copy_model(
)
r.raise_for_status()
print(r.text)
log.debug(f"r.text: {r.text}")
return True
except Exception as e:
print(e)
log.exception(e)
error_detail = "Open WebUI: Server Connection Error"
if r is not None:
try:
@ -474,7 +522,7 @@ async def delete_model(
)
url = app.state.OLLAMA_BASE_URLS[url_idx]
print(url)
log.info(f"url: {url}")
try:
r = requests.request(
@ -484,11 +532,11 @@ async def delete_model(
)
r.raise_for_status()
print(r.text)
log.debug(f"r.text: {r.text}")
return True
except Exception as e:
print(e)
log.exception(e)
error_detail = "Open WebUI: Server Connection Error"
if r is not None:
try:
@ -514,7 +562,7 @@ async def show_model_info(form_data: ModelNameForm, user=Depends(get_current_use
url_idx = random.choice(app.state.MODELS[form_data.name]["urls"])
url = app.state.OLLAMA_BASE_URLS[url_idx]
print(url)
log.info(f"url: {url}")
try:
r = requests.request(
@ -526,7 +574,7 @@ async def show_model_info(form_data: ModelNameForm, user=Depends(get_current_use
return r.json()
except Exception as e:
print(e)
log.exception(e)
error_detail = "Open WebUI: Server Connection Error"
if r is not None:
try:
@ -566,7 +614,7 @@ async def generate_embeddings(
)
url = app.state.OLLAMA_BASE_URLS[url_idx]
print(url)
log.info(f"url: {url}")
try:
r = requests.request(
@ -578,7 +626,7 @@ async def generate_embeddings(
return r.json()
except Exception as e:
print(e)
log.exception(e)
error_detail = "Open WebUI: Server Connection Error"
if r is not None:
try:
@ -622,11 +670,11 @@ async def generate_completion(
else:
raise HTTPException(
status_code=400,
detail="error_detail",
detail=ERROR_MESSAGES.MODEL_NOT_FOUND(form_data.model),
)
url = app.state.OLLAMA_BASE_URLS[url_idx]
print(url)
log.info(f"url: {url}")
r = None
@ -647,7 +695,7 @@ async def generate_completion(
if request_id in REQUEST_POOL:
yield chunk
else:
print("User: canceled request")
log.warning("User: canceled request")
break
finally:
if hasattr(r, "close"):
@ -702,7 +750,7 @@ class GenerateChatCompletionForm(BaseModel):
format: Optional[str] = None
options: Optional[dict] = None
template: Optional[str] = None
stream: Optional[bool] = True
stream: Optional[bool] = None
keep_alive: Optional[Union[int, str]] = None
@ -724,11 +772,15 @@ async def generate_chat_completion(
)
url = app.state.OLLAMA_BASE_URLS[url_idx]
print(url)
log.info(f"url: {url}")
r = None
print(form_data.model_dump_json(exclude_none=True).encode())
log.debug(
"form_data.model_dump_json(exclude_none=True).encode(): {0} ".format(
form_data.model_dump_json(exclude_none=True).encode()
)
)
def get_request():
nonlocal form_data
@ -747,7 +799,7 @@ async def generate_chat_completion(
if request_id in REQUEST_POOL:
yield chunk
else:
print("User: canceled request")
log.warning("User: canceled request")
break
finally:
if hasattr(r, "close"):
@ -770,7 +822,7 @@ async def generate_chat_completion(
headers=dict(r.headers),
)
except Exception as e:
print(e)
log.exception(e)
raise e
try:
@ -824,7 +876,7 @@ async def generate_openai_chat_completion(
)
url = app.state.OLLAMA_BASE_URLS[url_idx]
print(url)
log.info(f"url: {url}")
r = None
@ -847,7 +899,7 @@ async def generate_openai_chat_completion(
if request_id in REQUEST_POOL:
yield chunk
else:
print("User: canceled request")
log.warning("User: canceled request")
break
finally:
if hasattr(r, "close"):
@ -890,6 +942,220 @@ async def generate_openai_chat_completion(
)
class UrlForm(BaseModel):
url: str
class UploadBlobForm(BaseModel):
filename: str
def parse_huggingface_url(hf_url):
try:
# Parse the URL
parsed_url = urlparse(hf_url)
# Get the path and split it into components
path_components = parsed_url.path.split("/")
# Extract the desired output
user_repo = "/".join(path_components[1:3])
model_file = path_components[-1]
return model_file
except ValueError:
return None
async def download_file_stream(
ollama_url, file_url, file_path, file_name, chunk_size=1024 * 1024
):
done = False
if os.path.exists(file_path):
current_size = os.path.getsize(file_path)
else:
current_size = 0
headers = {"Range": f"bytes={current_size}-"} if current_size > 0 else {}
timeout = aiohttp.ClientTimeout(total=600) # Set the timeout
async with aiohttp.ClientSession(timeout=timeout) as session:
async with session.get(file_url, headers=headers) as response:
total_size = int(response.headers.get("content-length", 0)) + current_size
with open(file_path, "ab+") as file:
async for data in response.content.iter_chunked(chunk_size):
current_size += len(data)
file.write(data)
done = current_size == total_size
progress = round((current_size / total_size) * 100, 2)
yield f'data: {{"progress": {progress}, "completed": {current_size}, "total": {total_size}}}\n\n'
if done:
file.seek(0)
hashed = calculate_sha256(file)
file.seek(0)
url = f"{ollama_url}/api/blobs/sha256:{hashed}"
response = requests.post(url, data=file)
if response.ok:
res = {
"done": done,
"blob": f"sha256:{hashed}",
"name": file_name,
}
os.remove(file_path)
yield f"data: {json.dumps(res)}\n\n"
else:
raise "Ollama: Could not create blob, Please try again."
# def number_generator():
# for i in range(1, 101):
# yield f"data: {i}\n"
# url = "https://huggingface.co/TheBloke/stablelm-zephyr-3b-GGUF/resolve/main/stablelm-zephyr-3b.Q2_K.gguf"
@app.post("/models/download")
@app.post("/models/download/{url_idx}")
async def download_model(
form_data: UrlForm,
url_idx: Optional[int] = None,
):
allowed_hosts = ["https://huggingface.co/", "https://github.com/"]
if not any(form_data.url.startswith(host) for host in allowed_hosts):
raise HTTPException(
status_code=400,
detail="Invalid file_url. Only URLs from allowed hosts are permitted.",
)
if url_idx == None:
url_idx = 0
url = app.state.OLLAMA_BASE_URLS[url_idx]
file_name = parse_huggingface_url(form_data.url)
if file_name:
file_path = f"{UPLOAD_DIR}/{file_name}"
return StreamingResponse(
download_file_stream(url, form_data.url, file_path, file_name),
)
else:
return None
@app.post("/models/upload")
@app.post("/models/upload/{url_idx}")
def upload_model(file: UploadFile = File(...), url_idx: Optional[int] = None):
if url_idx == None:
url_idx = 0
ollama_url = app.state.OLLAMA_BASE_URLS[url_idx]
file_path = f"{UPLOAD_DIR}/{file.filename}"
# Save file in chunks
with open(file_path, "wb+") as f:
for chunk in file.file:
f.write(chunk)
def file_process_stream():
nonlocal ollama_url
total_size = os.path.getsize(file_path)
chunk_size = 1024 * 1024
try:
with open(file_path, "rb") as f:
total = 0
done = False
while not done:
chunk = f.read(chunk_size)
if not chunk:
done = True
continue
total += len(chunk)
progress = round((total / total_size) * 100, 2)
res = {
"progress": progress,
"total": total_size,
"completed": total,
}
yield f"data: {json.dumps(res)}\n\n"
if done:
f.seek(0)
hashed = calculate_sha256(f)
f.seek(0)
url = f"{ollama_url}/api/blobs/sha256:{hashed}"
response = requests.post(url, data=f)
if response.ok:
res = {
"done": done,
"blob": f"sha256:{hashed}",
"name": file.filename,
}
os.remove(file_path)
yield f"data: {json.dumps(res)}\n\n"
else:
raise Exception(
"Ollama: Could not create blob, Please try again."
)
except Exception as e:
res = {"error": str(e)}
yield f"data: {json.dumps(res)}\n\n"
return StreamingResponse(file_process_stream(), media_type="text/event-stream")
# async def upload_model(file: UploadFile = File(), url_idx: Optional[int] = None):
# if url_idx == None:
# url_idx = 0
# url = app.state.OLLAMA_BASE_URLS[url_idx]
# file_location = os.path.join(UPLOAD_DIR, file.filename)
# total_size = file.size
# async def file_upload_generator(file):
# print(file)
# try:
# async with aiofiles.open(file_location, "wb") as f:
# completed_size = 0
# while True:
# chunk = await file.read(1024*1024)
# if not chunk:
# break
# await f.write(chunk)
# completed_size += len(chunk)
# progress = (completed_size / total_size) * 100
# print(progress)
# yield f'data: {json.dumps({"status": "uploading", "percentage": progress, "total": total_size, "completed": completed_size, "done": False})}\n'
# except Exception as e:
# print(e)
# yield f"data: {json.dumps({'status': 'error', 'message': str(e)})}\n"
# finally:
# await file.close()
# print("done")
# yield f'data: {json.dumps({"status": "completed", "percentage": 100, "total": total_size, "completed": completed_size, "done": True})}\n'
# return StreamingResponse(
# file_upload_generator(copy.deepcopy(file)), media_type="text/event-stream"
# )
@app.api_route("/{path:path}", methods=["GET", "POST", "PUT", "DELETE"])
async def deprecated_proxy(path: str, request: Request, user=Depends(get_current_user)):
url = app.state.OLLAMA_BASE_URLS[0]
@ -940,7 +1206,7 @@ async def deprecated_proxy(path: str, request: Request, user=Depends(get_current
if request_id in REQUEST_POOL:
yield chunk
else:
print("User: canceled request")
log.warning("User: canceled request")
break
finally:
if hasattr(r, "close"):