forked from open-webui/open-webui
fixes and updates
This commit is contained in:
commit
9bcb37ea10
6 changed files with 150 additions and 81 deletions
|
@ -28,6 +28,7 @@ from config import (
|
|||
UPLOAD_DIR,
|
||||
WHISPER_MODEL,
|
||||
WHISPER_MODEL_DIR,
|
||||
DEVICE_TYPE,
|
||||
)
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
@ -42,6 +43,10 @@ app.add_middleware(
|
|||
allow_headers=["*"],
|
||||
)
|
||||
|
||||
# setting device type for whisper model
|
||||
whisper_device_type = DEVICE_TYPE if DEVICE_TYPE and DEVICE_TYPE == "cuda" else "cpu"
|
||||
log.info(f"whisper_device_type: {whisper_device_type}")
|
||||
|
||||
|
||||
@app.post("/transcribe")
|
||||
def transcribe(
|
||||
|
@ -66,7 +71,7 @@ def transcribe(
|
|||
|
||||
model = WhisperModel(
|
||||
WHISPER_MODEL,
|
||||
device="auto",
|
||||
device=whisper_device_type,
|
||||
compute_type="int8",
|
||||
download_root=WHISPER_MODEL_DIR,
|
||||
)
|
||||
|
|
|
@ -59,7 +59,7 @@ from config import (
|
|||
UPLOAD_DIR,
|
||||
DOCS_DIR,
|
||||
RAG_EMBEDDING_MODEL,
|
||||
RAG_EMBEDDING_MODEL_DEVICE_TYPE,
|
||||
DEVICE_TYPE,
|
||||
CHROMA_CLIENT,
|
||||
CHUNK_SIZE,
|
||||
CHUNK_OVERLAP,
|
||||
|
@ -71,15 +71,6 @@ from constants import ERROR_MESSAGES
|
|||
log = logging.getLogger(__name__)
|
||||
log.setLevel(SRC_LOG_LEVELS["RAG"])
|
||||
|
||||
#
|
||||
# if RAG_EMBEDDING_MODEL:
|
||||
# sentence_transformer_ef = SentenceTransformer(
|
||||
# model_name_or_path=RAG_EMBEDDING_MODEL,
|
||||
# cache_folder=RAG_EMBEDDING_MODEL_DIR,
|
||||
# device=RAG_EMBEDDING_MODEL_DEVICE_TYPE,
|
||||
# )
|
||||
|
||||
|
||||
app = FastAPI()
|
||||
|
||||
app.state.PDF_EXTRACT_IMAGES = False
|
||||
|
@ -92,7 +83,7 @@ app.state.TOP_K = 4
|
|||
app.state.sentence_transformer_ef = (
|
||||
embedding_functions.SentenceTransformerEmbeddingFunction(
|
||||
model_name=app.state.RAG_EMBEDDING_MODEL,
|
||||
device=RAG_EMBEDDING_MODEL_DEVICE_TYPE,
|
||||
device=DEVICE_TYPE,
|
||||
)
|
||||
)
|
||||
|
||||
|
@ -147,10 +138,9 @@ async def update_embedding_model(
|
|||
app.state.sentence_transformer_ef = (
|
||||
embedding_functions.SentenceTransformerEmbeddingFunction(
|
||||
model_name=app.state.RAG_EMBEDDING_MODEL,
|
||||
device=RAG_EMBEDDING_MODEL_DEVICE_TYPE,
|
||||
device=DEVICE_TYPE,
|
||||
)
|
||||
)
|
||||
|
||||
return {
|
||||
"status": True,
|
||||
"embedding_model": app.state.RAG_EMBEDDING_MODEL,
|
||||
|
|
|
@ -253,6 +253,8 @@ OLLAMA_API_BASE_URL = os.environ.get(
|
|||
|
||||
OLLAMA_BASE_URL = os.environ.get("OLLAMA_BASE_URL", "")
|
||||
K8S_FLAG = os.environ.get("K8S_FLAG", "")
|
||||
USE_OLLAMA_DOCKER = os.environ.get("USE_OLLAMA_DOCKER", "false")
|
||||
|
||||
|
||||
if OLLAMA_BASE_URL == "" and OLLAMA_API_BASE_URL != "":
|
||||
OLLAMA_BASE_URL = (
|
||||
|
@ -263,7 +265,12 @@ if OLLAMA_BASE_URL == "" and OLLAMA_API_BASE_URL != "":
|
|||
|
||||
if ENV == "prod":
|
||||
if OLLAMA_BASE_URL == "/ollama":
|
||||
OLLAMA_BASE_URL = "http://host.docker.internal:11434"
|
||||
if USE_OLLAMA_DOCKER.lower() == "true":
|
||||
# if you use all-in-one docker container (Open WebUI + Ollama)
|
||||
# with the docker build arg USE_OLLAMA=true (--build-arg="USE_OLLAMA=true") this only works with http://localhost:11434
|
||||
OLLAMA_BASE_URL = "http://localhost:11434"
|
||||
else:
|
||||
OLLAMA_BASE_URL = "http://host.docker.internal:11434"
|
||||
|
||||
elif K8S_FLAG:
|
||||
OLLAMA_BASE_URL = "http://ollama-service.open-webui.svc.cluster.local:11434"
|
||||
|
@ -384,10 +391,16 @@ if WEBUI_AUTH and WEBUI_SECRET_KEY == "":
|
|||
CHROMA_DATA_PATH = f"{DATA_DIR}/vector_db"
|
||||
# this uses the model defined in the Dockerfile ENV variable. If you dont use docker or docker based deployments such as k8s, the default embedding model will be used (all-MiniLM-L6-v2)
|
||||
RAG_EMBEDDING_MODEL = os.environ.get("RAG_EMBEDDING_MODEL", "all-MiniLM-L6-v2")
|
||||
log.info(f"Embedding model set: {RAG_EMBEDDING_MODEL}"),
|
||||
# device type ebbeding models - "cpu" (default), "cuda" (nvidia gpu required) or "mps" (apple silicon) - choosing this right can lead to better performance
|
||||
RAG_EMBEDDING_MODEL_DEVICE_TYPE = os.environ.get(
|
||||
"RAG_EMBEDDING_MODEL_DEVICE_TYPE", "cpu"
|
||||
)
|
||||
USE_CUDA = os.environ.get("USE_CUDA_DOCKER", "false")
|
||||
|
||||
if USE_CUDA.lower() == "true":
|
||||
DEVICE_TYPE = "cuda"
|
||||
else:
|
||||
DEVICE_TYPE = "cpu"
|
||||
|
||||
|
||||
CHROMA_CLIENT = chromadb.PersistentClient(
|
||||
path=CHROMA_DATA_PATH,
|
||||
settings=Settings(allow_reset=True, anonymized_telemetry=False),
|
||||
|
|
|
@ -7,16 +7,26 @@ KEY_FILE=.webui_secret_key
|
|||
|
||||
PORT="${PORT:-8080}"
|
||||
if test "$WEBUI_SECRET_KEY $WEBUI_JWT_SECRET_KEY" = " "; then
|
||||
echo No WEBUI_SECRET_KEY provided
|
||||
echo "No WEBUI_SECRET_KEY provided"
|
||||
|
||||
if ! [ -e "$KEY_FILE" ]; then
|
||||
echo Generating WEBUI_SECRET_KEY
|
||||
echo "Generating WEBUI_SECRET_KEY"
|
||||
# Generate a random value to use as a WEBUI_SECRET_KEY in case the user didn't provide one.
|
||||
echo $(head -c 12 /dev/random | base64) > $KEY_FILE
|
||||
echo $(head -c 12 /dev/random | base64) > "$KEY_FILE"
|
||||
fi
|
||||
|
||||
echo Loading WEBUI_SECRET_KEY from $KEY_FILE
|
||||
WEBUI_SECRET_KEY=`cat $KEY_FILE`
|
||||
echo "Loading WEBUI_SECRET_KEY from $KEY_FILE"
|
||||
WEBUI_SECRET_KEY=$(cat "$KEY_FILE")
|
||||
fi
|
||||
|
||||
WEBUI_SECRET_KEY="$WEBUI_SECRET_KEY" exec uvicorn main:app --host 0.0.0.0 --port "$PORT" --forwarded-allow-ips '*'
|
||||
if [ "$USE_OLLAMA_DOCKER" = "true" ]; then
|
||||
echo "USE_OLLAMA is set to true, starting ollama serve."
|
||||
ollama serve &
|
||||
fi
|
||||
|
||||
if [ "$USE_CUDA_DOCKER" = "true" ]; then
|
||||
echo "CUDA is enabled, appending LD_LIBRARY_PATH to include torch/cudnn & cublas libraries."
|
||||
export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/usr/local/lib/python3.11/site-packages/torch/lib:/usr/local/lib/python3.11/site-packages/nvidia/cublas/lib"
|
||||
fi
|
||||
|
||||
WEBUI_SECRET_KEY="$WEBUI_SECRET_KEY" exec uvicorn main:app --host 0.0.0.0 --port "$PORT" --forwarded-allow-ips '*'
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue