From 35ace5778494782943b10890a0cb90382c716ce8 Mon Sep 17 00:00:00 2001 From: Marclass Date: Fri, 19 Jan 2024 10:48:04 -0700 Subject: [PATCH] add rst document for RAG --- backend/apps/rag/main.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/backend/apps/rag/main.py b/backend/apps/rag/main.py index 65dde89a..44cb78f3 100644 --- a/backend/apps/rag/main.py +++ b/backend/apps/rag/main.py @@ -22,6 +22,7 @@ from langchain_community.document_loaders import ( UnstructuredWordDocumentLoader, UnstructuredMarkdownLoader, UnstructuredXMLLoader, + UnstructuredRSTLoader, ) from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain_community.vectorstores import Chroma @@ -178,6 +179,8 @@ def store_doc( loader = Docx2txtLoader(file_path) elif file_ext=="csv": loader = CSVLoader(file_path) + elif file_ext=="rst": + loader = UnstructuredRSTLoader(file_path, mode="elements") elif file_ext in text_xml: loader=UnstructuredXMLLoader(file_path) elif file_ext in known_source_ext or file.content_type.find("text/")>=0: