recreate rag collection is now optional and only used for web requests

This commit is contained in:
Firat Birlik 2024-03-04 10:00:06 -06:00
parent 5d4ff85228
commit 6782e95c75

View file

@ -108,7 +108,7 @@ class StoreWebForm(CollectionNameForm):
url: str url: str
def store_data_in_vector_db(data, collection_name) -> bool: def store_data_in_vector_db(data, collection_name, overwrite: bool = False) -> bool:
text_splitter = RecursiveCharacterTextSplitter( text_splitter = RecursiveCharacterTextSplitter(
chunk_size=app.state.CHUNK_SIZE, chunk_overlap=app.state.CHUNK_OVERLAP chunk_size=app.state.CHUNK_SIZE, chunk_overlap=app.state.CHUNK_OVERLAP
) )
@ -118,9 +118,11 @@ def store_data_in_vector_db(data, collection_name) -> bool:
metadatas = [doc.metadata for doc in docs] metadatas = [doc.metadata for doc in docs]
try: try:
for collection in CHROMA_CLIENT.list_collections(): if overwrite:
if collection_name == collection.name: for collection in CHROMA_CLIENT.list_collections():
CHROMA_CLIENT.delete_collection(name=collection_name) if collection_name == collection.name:
print(f"deleting existing collection {collection_name}")
CHROMA_CLIENT.delete_collection(name=collection_name)
collection = CHROMA_CLIENT.create_collection( collection = CHROMA_CLIENT.create_collection(
name=collection_name, name=collection_name,
@ -359,7 +361,7 @@ def store_web(form_data: StoreWebForm, user=Depends(get_current_user)):
if collection_name == "": if collection_name == "":
collection_name = calculate_sha256_string(form_data.url)[:63] collection_name = calculate_sha256_string(form_data.url)[:63]
store_data_in_vector_db(data, collection_name) store_data_in_vector_db(data, collection_name, overwrite=True)
return { return {
"status": True, "status": True,
"collection_name": collection_name, "collection_name": collection_name,