forked from open-webui/open-webui
feat: full integration
This commit is contained in:
parent
28c1192ac0
commit
9634e2da3e
6 changed files with 116 additions and 25 deletions
|
@ -9,6 +9,7 @@ from fastapi import (
|
||||||
Form,
|
Form,
|
||||||
)
|
)
|
||||||
from fastapi.middleware.cors import CORSMiddleware
|
from fastapi.middleware.cors import CORSMiddleware
|
||||||
|
import os, shutil
|
||||||
|
|
||||||
from chromadb.utils import embedding_functions
|
from chromadb.utils import embedding_functions
|
||||||
|
|
||||||
|
@ -23,7 +24,7 @@ from typing import Optional
|
||||||
|
|
||||||
import uuid
|
import uuid
|
||||||
|
|
||||||
from config import EMBED_MODEL, CHROMA_CLIENT, CHUNK_SIZE, CHUNK_OVERLAP
|
from config import UPLOAD_DIR, EMBED_MODEL, CHROMA_CLIENT, CHUNK_SIZE, CHUNK_OVERLAP
|
||||||
from constants import ERROR_MESSAGES
|
from constants import ERROR_MESSAGES
|
||||||
|
|
||||||
EMBEDDING_FUNC = embedding_functions.SentenceTransformerEmbeddingFunction(
|
EMBEDDING_FUNC = embedding_functions.SentenceTransformerEmbeddingFunction(
|
||||||
|
@ -51,7 +52,7 @@ class StoreWebForm(CollectionNameForm):
|
||||||
url: str
|
url: str
|
||||||
|
|
||||||
|
|
||||||
def store_data_in_vector_db(data, collection_name):
|
def store_data_in_vector_db(data, collection_name) -> bool:
|
||||||
text_splitter = RecursiveCharacterTextSplitter(
|
text_splitter = RecursiveCharacterTextSplitter(
|
||||||
chunk_size=CHUNK_SIZE, chunk_overlap=CHUNK_OVERLAP
|
chunk_size=CHUNK_SIZE, chunk_overlap=CHUNK_OVERLAP
|
||||||
)
|
)
|
||||||
|
@ -60,6 +61,7 @@ def store_data_in_vector_db(data, collection_name):
|
||||||
texts = [doc.page_content for doc in docs]
|
texts = [doc.page_content for doc in docs]
|
||||||
metadatas = [doc.metadata for doc in docs]
|
metadatas = [doc.metadata for doc in docs]
|
||||||
|
|
||||||
|
try:
|
||||||
collection = CHROMA_CLIENT.create_collection(
|
collection = CHROMA_CLIENT.create_collection(
|
||||||
name=collection_name, embedding_function=EMBEDDING_FUNC
|
name=collection_name, embedding_function=EMBEDDING_FUNC
|
||||||
)
|
)
|
||||||
|
@ -67,6 +69,14 @@ def store_data_in_vector_db(data, collection_name):
|
||||||
collection.add(
|
collection.add(
|
||||||
documents=texts, metadatas=metadatas, ids=[str(uuid.uuid1()) for _ in texts]
|
documents=texts, metadatas=metadatas, ids=[str(uuid.uuid1()) for _ in texts]
|
||||||
)
|
)
|
||||||
|
return True
|
||||||
|
except Exception as e:
|
||||||
|
print(e)
|
||||||
|
print(e.__class__.__name__)
|
||||||
|
if e.__class__.__name__ == "UniqueConstraintError":
|
||||||
|
return True
|
||||||
|
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
@app.get("/")
|
@app.get("/")
|
||||||
|
@ -116,7 +126,7 @@ def store_doc(collection_name: str = Form(...), file: UploadFile = File(...)):
|
||||||
|
|
||||||
try:
|
try:
|
||||||
filename = file.filename
|
filename = file.filename
|
||||||
file_path = f"./data/{filename}"
|
file_path = f"{UPLOAD_DIR}/{filename}"
|
||||||
contents = file.file.read()
|
contents = file.file.read()
|
||||||
with open(file_path, "wb") as f:
|
with open(file_path, "wb") as f:
|
||||||
f.write(contents)
|
f.write(contents)
|
||||||
|
@ -128,8 +138,15 @@ def store_doc(collection_name: str = Form(...), file: UploadFile = File(...)):
|
||||||
loader = TextLoader(file_path)
|
loader = TextLoader(file_path)
|
||||||
|
|
||||||
data = loader.load()
|
data = loader.load()
|
||||||
store_data_in_vector_db(data, collection_name)
|
result = store_data_in_vector_db(data, collection_name)
|
||||||
|
|
||||||
|
if result:
|
||||||
return {"status": True, "collection_name": collection_name}
|
return {"status": True, "collection_name": collection_name}
|
||||||
|
else:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||||
|
detail=ERROR_MESSAGES.DEFAULT(),
|
||||||
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(e)
|
print(e)
|
||||||
raise HTTPException(
|
raise HTTPException(
|
||||||
|
@ -138,6 +155,27 @@ def store_doc(collection_name: str = Form(...), file: UploadFile = File(...)):
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/reset/db")
|
||||||
def reset_vector_db():
|
def reset_vector_db():
|
||||||
CHROMA_CLIENT.reset()
|
CHROMA_CLIENT.reset()
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/reset")
|
||||||
|
def reset():
|
||||||
|
folder = f"{UPLOAD_DIR}"
|
||||||
|
for filename in os.listdir(folder):
|
||||||
|
file_path = os.path.join(folder, filename)
|
||||||
|
try:
|
||||||
|
if os.path.isfile(file_path) or os.path.islink(file_path):
|
||||||
|
os.unlink(file_path)
|
||||||
|
elif os.path.isdir(file_path):
|
||||||
|
shutil.rmtree(file_path)
|
||||||
|
except Exception as e:
|
||||||
|
print("Failed to delete %s. Reason: %s" % (file_path, e))
|
||||||
|
|
||||||
|
try:
|
||||||
|
CHROMA_CLIENT.reset()
|
||||||
|
except Exception as e:
|
||||||
|
print(e)
|
||||||
|
|
||||||
return {"status": True}
|
return {"status": True}
|
||||||
|
|
|
@ -1,14 +1,31 @@
|
||||||
from dotenv import load_dotenv, find_dotenv
|
from dotenv import load_dotenv, find_dotenv
|
||||||
import os
|
import os
|
||||||
|
|
||||||
|
|
||||||
import chromadb
|
import chromadb
|
||||||
|
from chromadb import Settings
|
||||||
|
|
||||||
|
|
||||||
from secrets import token_bytes
|
from secrets import token_bytes
|
||||||
from base64 import b64encode
|
from base64 import b64encode
|
||||||
|
|
||||||
from constants import ERROR_MESSAGES
|
from constants import ERROR_MESSAGES
|
||||||
|
|
||||||
|
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
load_dotenv(find_dotenv("../.env"))
|
load_dotenv(find_dotenv("../.env"))
|
||||||
|
|
||||||
|
|
||||||
|
####################################
|
||||||
|
# File Upload
|
||||||
|
####################################
|
||||||
|
|
||||||
|
|
||||||
|
UPLOAD_DIR = "./data/uploads"
|
||||||
|
Path(UPLOAD_DIR).mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
|
||||||
####################################
|
####################################
|
||||||
# ENV (dev,test,prod)
|
# ENV (dev,test,prod)
|
||||||
####################################
|
####################################
|
||||||
|
@ -64,6 +81,8 @@ if WEBUI_AUTH and WEBUI_JWT_SECRET_KEY == "":
|
||||||
|
|
||||||
CHROMA_DATA_PATH = "./data/vector_db"
|
CHROMA_DATA_PATH = "./data/vector_db"
|
||||||
EMBED_MODEL = "all-MiniLM-L6-v2"
|
EMBED_MODEL = "all-MiniLM-L6-v2"
|
||||||
CHROMA_CLIENT = chromadb.PersistentClient(path=CHROMA_DATA_PATH)
|
CHROMA_CLIENT = chromadb.PersistentClient(
|
||||||
|
path=CHROMA_DATA_PATH, settings=Settings(allow_reset=True)
|
||||||
|
)
|
||||||
CHUNK_SIZE = 1500
|
CHUNK_SIZE = 1500
|
||||||
CHUNK_OVERLAP = 100
|
CHUNK_OVERLAP = 100
|
||||||
|
|
|
@ -124,16 +124,16 @@
|
||||||
reader.readAsDataURL(file);
|
reader.readAsDataURL(file);
|
||||||
} else if (['application/pdf', 'text/plain'].includes(file['type'])) {
|
} else if (['application/pdf', 'text/plain'].includes(file['type'])) {
|
||||||
console.log(file);
|
console.log(file);
|
||||||
const hash = await calculateSHA256(file);
|
const hash = (await calculateSHA256(file)).substring(0, 63);
|
||||||
// const res = uploadDocToVectorDB(localStorage.token, hash,file);
|
const res = await uploadDocToVectorDB(localStorage.token, hash, file);
|
||||||
|
|
||||||
if (true) {
|
if (res) {
|
||||||
files = [
|
files = [
|
||||||
...files,
|
...files,
|
||||||
{
|
{
|
||||||
type: 'doc',
|
type: 'doc',
|
||||||
name: file.name,
|
name: file.name,
|
||||||
collection_name: hash
|
collection_name: res.collection_name
|
||||||
}
|
}
|
||||||
];
|
];
|
||||||
}
|
}
|
||||||
|
@ -243,16 +243,16 @@
|
||||||
reader.readAsDataURL(file);
|
reader.readAsDataURL(file);
|
||||||
} else if (['application/pdf', 'text/plain'].includes(file['type'])) {
|
} else if (['application/pdf', 'text/plain'].includes(file['type'])) {
|
||||||
console.log(file);
|
console.log(file);
|
||||||
const hash = await calculateSHA256(file);
|
const hash = (await calculateSHA256(file)).substring(0, 63);
|
||||||
// const res = uploadDocToVectorDB(localStorage.token,hash,file);
|
const res = await uploadDocToVectorDB(localStorage.token, hash, file);
|
||||||
|
|
||||||
if (true) {
|
if (res) {
|
||||||
files = [
|
files = [
|
||||||
...files,
|
...files,
|
||||||
{
|
{
|
||||||
type: 'doc',
|
type: 'doc',
|
||||||
name: file.name,
|
name: file.name,
|
||||||
collection_name: hash
|
collection_name: res.collection_name
|
||||||
}
|
}
|
||||||
];
|
];
|
||||||
filesInputElement.value = '';
|
filesInputElement.value = '';
|
||||||
|
@ -280,7 +280,7 @@
|
||||||
<img src={file.url} alt="input" class=" h-16 w-16 rounded-xl object-cover" />
|
<img src={file.url} alt="input" class=" h-16 w-16 rounded-xl object-cover" />
|
||||||
{:else if file.type === 'doc'}
|
{:else if file.type === 'doc'}
|
||||||
<div
|
<div
|
||||||
class="h-16 w-[15rem] flex items-center space-x-3 px-2 bg-gray-600 rounded-xl"
|
class="h-16 w-[15rem] flex items-center space-x-3 px-2.5 bg-gray-600 rounded-xl"
|
||||||
>
|
>
|
||||||
<div class="p-2.5 bg-red-400 rounded-lg">
|
<div class="p-2.5 bg-red-400 rounded-lg">
|
||||||
<svg
|
<svg
|
||||||
|
|
|
@ -53,11 +53,41 @@
|
||||||
class="prose chat-{message.role} w-full max-w-full dark:prose-invert prose-headings:my-0 prose-p:my-0 prose-p:-mb-4 prose-pre:my-0 prose-table:my-0 prose-blockquote:my-0 prose-img:my-0 prose-ul:-my-4 prose-ol:-my-4 prose-li:-my-3 prose-ul:-mb-6 prose-ol:-mb-6 prose-li:-mb-4 whitespace-pre-line"
|
class="prose chat-{message.role} w-full max-w-full dark:prose-invert prose-headings:my-0 prose-p:my-0 prose-p:-mb-4 prose-pre:my-0 prose-table:my-0 prose-blockquote:my-0 prose-img:my-0 prose-ul:-my-4 prose-ol:-my-4 prose-li:-my-3 prose-ul:-mb-6 prose-ol:-mb-6 prose-li:-mb-4 whitespace-pre-line"
|
||||||
>
|
>
|
||||||
{#if message.files}
|
{#if message.files}
|
||||||
<div class="my-3 w-full flex overflow-x-auto space-x-2">
|
<div class="my-2.5 w-full flex overflow-x-auto space-x-2 flex-wrap">
|
||||||
{#each message.files as file}
|
{#each message.files as file}
|
||||||
<div>
|
<div>
|
||||||
{#if file.type === 'image'}
|
{#if file.type === 'image'}
|
||||||
<img src={file.url} alt="input" class=" max-h-96 rounded-lg" draggable="false" />
|
<img src={file.url} alt="input" class=" max-h-96 rounded-lg" draggable="false" />
|
||||||
|
{:else if file.type === 'doc'}
|
||||||
|
<div
|
||||||
|
class="h-16 w-[15rem] flex items-center space-x-3 px-2.5 bg-gray-600 rounded-xl"
|
||||||
|
>
|
||||||
|
<div class="p-2.5 bg-red-400 rounded-lg">
|
||||||
|
<svg
|
||||||
|
xmlns="http://www.w3.org/2000/svg"
|
||||||
|
viewBox="0 0 24 24"
|
||||||
|
fill="currentColor"
|
||||||
|
class="w-6 h-6"
|
||||||
|
>
|
||||||
|
<path
|
||||||
|
fill-rule="evenodd"
|
||||||
|
d="M5.625 1.5c-1.036 0-1.875.84-1.875 1.875v17.25c0 1.035.84 1.875 1.875 1.875h12.75c1.035 0 1.875-.84 1.875-1.875V12.75A3.75 3.75 0 0 0 16.5 9h-1.875a1.875 1.875 0 0 1-1.875-1.875V5.25A3.75 3.75 0 0 0 9 1.5H5.625ZM7.5 15a.75.75 0 0 1 .75-.75h7.5a.75.75 0 0 1 0 1.5h-7.5A.75.75 0 0 1 7.5 15Zm.75 2.25a.75.75 0 0 0 0 1.5H12a.75.75 0 0 0 0-1.5H8.25Z"
|
||||||
|
clip-rule="evenodd"
|
||||||
|
/>
|
||||||
|
<path
|
||||||
|
d="M12.971 1.816A5.23 5.23 0 0 1 14.25 5.25v1.875c0 .207.168.375.375.375H16.5a5.23 5.23 0 0 1 3.434 1.279 9.768 9.768 0 0 0-6.963-6.963Z"
|
||||||
|
/>
|
||||||
|
</svg>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="flex flex-col justify-center -space-y-0.5">
|
||||||
|
<div class=" text-gray-100 text-sm line-clamp-1">
|
||||||
|
{file.name}
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class=" text-gray-500 text-sm">Document</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
{/if}
|
{/if}
|
||||||
</div>
|
</div>
|
||||||
{/each}
|
{/each}
|
||||||
|
|
|
@ -129,7 +129,6 @@ export const findWordIndices = (text) => {
|
||||||
};
|
};
|
||||||
|
|
||||||
export const calculateSHA256 = async (file) => {
|
export const calculateSHA256 = async (file) => {
|
||||||
console.log(file);
|
|
||||||
// Create a FileReader to read the file asynchronously
|
// Create a FileReader to read the file asynchronously
|
||||||
const reader = new FileReader();
|
const reader = new FileReader();
|
||||||
|
|
||||||
|
@ -156,7 +155,7 @@ export const calculateSHA256 = async (file) => {
|
||||||
const hashArray = Array.from(new Uint8Array(hashBuffer));
|
const hashArray = Array.from(new Uint8Array(hashBuffer));
|
||||||
const hashHex = hashArray.map((byte) => byte.toString(16).padStart(2, '0')).join('');
|
const hashHex = hashArray.map((byte) => byte.toString(16).padStart(2, '0')).join('');
|
||||||
|
|
||||||
return `sha256:${hashHex}`;
|
return `${hashHex}`;
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error('Error calculating SHA-256 hash:', error);
|
console.error('Error calculating SHA-256 hash:', error);
|
||||||
throw error;
|
throw error;
|
||||||
|
|
|
@ -186,8 +186,11 @@
|
||||||
const _chatId = JSON.parse(JSON.stringify($chatId));
|
const _chatId = JSON.parse(JSON.stringify($chatId));
|
||||||
|
|
||||||
// TODO: update below to include all ancestral files
|
// TODO: update below to include all ancestral files
|
||||||
const docs = history.messages[parentId].files.filter((item) => item.type === 'file');
|
|
||||||
|
|
||||||
|
console.log(history.messages[parentId]);
|
||||||
|
const docs = history.messages[parentId]?.files?.filter((item) => item.type === 'doc') ?? [];
|
||||||
|
|
||||||
|
console.log(docs);
|
||||||
if (docs.length > 0) {
|
if (docs.length > 0) {
|
||||||
const query = history.messages[parentId].content;
|
const query = history.messages[parentId].content;
|
||||||
|
|
||||||
|
@ -207,6 +210,8 @@
|
||||||
return `${a}${context.documents.join(' ')}\n`;
|
return `${a}${context.documents.join(' ')}\n`;
|
||||||
}, '');
|
}, '');
|
||||||
|
|
||||||
|
console.log(contextString);
|
||||||
|
|
||||||
history.messages[parentId].raContent = RAGTemplate(contextString, query);
|
history.messages[parentId].raContent = RAGTemplate(contextString, query);
|
||||||
history.messages[parentId].contexts = relevantContexts;
|
history.messages[parentId].contexts = relevantContexts;
|
||||||
await tick();
|
await tick();
|
||||||
|
|
Loading…
Reference in a new issue