From 5d4ff852281cf18dac9955442b60737e25447257 Mon Sep 17 00:00:00 2001 From: Firat Birlik Date: Sun, 3 Mar 2024 21:25:00 -0600 Subject: [PATCH 1/2] recreate rag collection instead of falling back to stale version --- backend/apps/rag/main.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/backend/apps/rag/main.py b/backend/apps/rag/main.py index 2a8b2a49..0a5fde2c 100644 --- a/backend/apps/rag/main.py +++ b/backend/apps/rag/main.py @@ -118,6 +118,10 @@ def store_data_in_vector_db(data, collection_name) -> bool: metadatas = [doc.metadata for doc in docs] try: + for collection in CHROMA_CLIENT.list_collections(): + if collection_name == collection.name: + CHROMA_CLIENT.delete_collection(name=collection_name) + collection = CHROMA_CLIENT.create_collection( name=collection_name, embedding_function=app.state.sentence_transformer_ef, From 6782e95c7584defd990bb01cf2732f721038d190 Mon Sep 17 00:00:00 2001 From: Firat Birlik Date: Mon, 4 Mar 2024 10:00:06 -0600 Subject: [PATCH 2/2] recreate rag collection is now optional and only used for web requests --- backend/apps/rag/main.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/backend/apps/rag/main.py b/backend/apps/rag/main.py index 0a5fde2c..99aa6959 100644 --- a/backend/apps/rag/main.py +++ b/backend/apps/rag/main.py @@ -108,7 +108,7 @@ class StoreWebForm(CollectionNameForm): url: str -def store_data_in_vector_db(data, collection_name) -> bool: +def store_data_in_vector_db(data, collection_name, overwrite: bool = False) -> bool: text_splitter = RecursiveCharacterTextSplitter( chunk_size=app.state.CHUNK_SIZE, chunk_overlap=app.state.CHUNK_OVERLAP ) @@ -118,9 +118,11 @@ def store_data_in_vector_db(data, collection_name) -> bool: metadatas = [doc.metadata for doc in docs] try: - for collection in CHROMA_CLIENT.list_collections(): - if collection_name == collection.name: - CHROMA_CLIENT.delete_collection(name=collection_name) + if overwrite: + for collection in CHROMA_CLIENT.list_collections(): + if collection_name == collection.name: + print(f"deleting existing collection {collection_name}") + CHROMA_CLIENT.delete_collection(name=collection_name) collection = CHROMA_CLIENT.create_collection( name=collection_name, @@ -359,7 +361,7 @@ def store_web(form_data: StoreWebForm, user=Depends(get_current_user)): if collection_name == "": collection_name = calculate_sha256_string(form_data.url)[:63] - store_data_in_vector_db(data, collection_name) + store_data_in_vector_db(data, collection_name, overwrite=True) return { "status": True, "collection_name": collection_name,