From d4b2578f6e6b4676cb301d474446a02e78240aa7 Mon Sep 17 00:00:00 2001
From: "Timothy J. Baek" <timothyjrbeck@gmail.com>
Date: Sun, 7 Jan 2024 09:05:52 -0800
Subject: [PATCH] feat: rag csv support

---
 backend/apps/rag/main.py                    | 11 +++++++++--
 src/lib/components/chat/MessageInput.svelte |  6 ++----
 2 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/backend/apps/rag/main.py b/backend/apps/rag/main.py
index b3844d69..1c84241e 100644
--- a/backend/apps/rag/main.py
+++ b/backend/apps/rag/main.py
@@ -13,7 +13,12 @@ import os, shutil
 
 # from chromadb.utils import embedding_functions
 
-from langchain_community.document_loaders import WebBaseLoader, TextLoader, PyPDFLoader
+from langchain_community.document_loaders import (
+    WebBaseLoader,
+    TextLoader,
+    PyPDFLoader,
+    CSVLoader,
+)
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain_community.vectorstores import Chroma
 from langchain.chains import RetrievalQA
@@ -129,7 +134,7 @@ def store_doc(
 ):
     # "https://www.gutenberg.org/files/1727/1727-h/1727-h.htm"
 
-    if file.content_type not in ["application/pdf", "text/plain"]:
+    if file.content_type not in ["application/pdf", "text/plain", "text/csv"]:
         raise HTTPException(
             status_code=status.HTTP_400_BAD_REQUEST,
             detail=ERROR_MESSAGES.FILE_NOT_SUPPORTED,
@@ -152,6 +157,8 @@ def store_doc(
             loader = PyPDFLoader(file_path)
         elif file.content_type == "text/plain":
             loader = TextLoader(file_path)
+        elif file.content_type == "text/csv":
+            loader = CSVLoader(file_path)
 
         data = loader.load()
         result = store_data_in_vector_db(data, collection_name)
diff --git a/src/lib/components/chat/MessageInput.svelte b/src/lib/components/chat/MessageInput.svelte
index 9653f81e..5768be48 100644
--- a/src/lib/components/chat/MessageInput.svelte
+++ b/src/lib/components/chat/MessageInput.svelte
@@ -122,9 +122,8 @@
 					const file = inputFiles[0];
 					if (['image/gif', 'image/jpeg', 'image/png'].includes(file['type'])) {
 						reader.readAsDataURL(file);
-					} else if (['application/pdf', 'text/plain'].includes(file['type'])) {
+					} else if (['application/pdf', 'text/plain', 'text/csv'].includes(file['type'])) {
 						console.log(file);
-						// const hash = (await calculateSHA256(file)).substring(0, 63);
 						const res = await uploadDocToVectorDB(localStorage.token, '', file);
 
 						if (res) {
@@ -241,9 +240,8 @@
 							const file = inputFiles[0];
 							if (['image/gif', 'image/jpeg', 'image/png'].includes(file['type'])) {
 								reader.readAsDataURL(file);
-							} else if (['application/pdf', 'text/plain'].includes(file['type'])) {
+							} else if (['application/pdf', 'text/plain', 'text/csv'].includes(file['type'])) {
 								console.log(file);
-								// const hash = (await calculateSHA256(file)).substring(0, 63);
 								const res = await uploadDocToVectorDB(localStorage.token, '', file);
 
 								if (res) {