From d4b2578f6e6b4676cb301d474446a02e78240aa7 Mon Sep 17 00:00:00 2001 From: "Timothy J. Baek" Date: Sun, 7 Jan 2024 09:05:52 -0800 Subject: [PATCH] feat: rag csv support --- backend/apps/rag/main.py | 11 +++++++++-- src/lib/components/chat/MessageInput.svelte | 6 ++---- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/backend/apps/rag/main.py b/backend/apps/rag/main.py index b3844d69..1c84241e 100644 --- a/backend/apps/rag/main.py +++ b/backend/apps/rag/main.py @@ -13,7 +13,12 @@ import os, shutil # from chromadb.utils import embedding_functions -from langchain_community.document_loaders import WebBaseLoader, TextLoader, PyPDFLoader +from langchain_community.document_loaders import ( + WebBaseLoader, + TextLoader, + PyPDFLoader, + CSVLoader, +) from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain_community.vectorstores import Chroma from langchain.chains import RetrievalQA @@ -129,7 +134,7 @@ def store_doc( ): # "https://www.gutenberg.org/files/1727/1727-h/1727-h.htm" - if file.content_type not in ["application/pdf", "text/plain"]: + if file.content_type not in ["application/pdf", "text/plain", "text/csv"]: raise HTTPException( status_code=status.HTTP_400_BAD_REQUEST, detail=ERROR_MESSAGES.FILE_NOT_SUPPORTED, @@ -152,6 +157,8 @@ def store_doc( loader = PyPDFLoader(file_path) elif file.content_type == "text/plain": loader = TextLoader(file_path) + elif file.content_type == "text/csv": + loader = CSVLoader(file_path) data = loader.load() result = store_data_in_vector_db(data, collection_name) diff --git a/src/lib/components/chat/MessageInput.svelte b/src/lib/components/chat/MessageInput.svelte index 9653f81e..5768be48 100644 --- a/src/lib/components/chat/MessageInput.svelte +++ b/src/lib/components/chat/MessageInput.svelte @@ -122,9 +122,8 @@ const file = inputFiles[0]; if (['image/gif', 'image/jpeg', 'image/png'].includes(file['type'])) { reader.readAsDataURL(file); - } else if (['application/pdf', 'text/plain'].includes(file['type'])) { + } else if (['application/pdf', 'text/plain', 'text/csv'].includes(file['type'])) { console.log(file); - // const hash = (await calculateSHA256(file)).substring(0, 63); const res = await uploadDocToVectorDB(localStorage.token, '', file); if (res) { @@ -241,9 +240,8 @@ const file = inputFiles[0]; if (['image/gif', 'image/jpeg', 'image/png'].includes(file['type'])) { reader.readAsDataURL(file); - } else if (['application/pdf', 'text/plain'].includes(file['type'])) { + } else if (['application/pdf', 'text/plain', 'text/csv'].includes(file['type'])) { console.log(file); - // const hash = (await calculateSHA256(file)).substring(0, 63); const res = await uploadDocToVectorDB(localStorage.token, '', file); if (res) {