add rst document for RAG

This commit is contained in:
Marclass 2024-01-19 10:48:04 -07:00
parent e2edbedede
commit 35ace57784

View file

@ -22,6 +22,7 @@ from langchain_community.document_loaders import (
UnstructuredWordDocumentLoader, UnstructuredWordDocumentLoader,
UnstructuredMarkdownLoader, UnstructuredMarkdownLoader,
UnstructuredXMLLoader, UnstructuredXMLLoader,
UnstructuredRSTLoader,
) )
from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma from langchain_community.vectorstores import Chroma
@ -178,6 +179,8 @@ def store_doc(
loader = Docx2txtLoader(file_path) loader = Docx2txtLoader(file_path)
elif file_ext=="csv": elif file_ext=="csv":
loader = CSVLoader(file_path) loader = CSVLoader(file_path)
elif file_ext=="rst":
loader = UnstructuredRSTLoader(file_path, mode="elements")
elif file_ext in text_xml: elif file_ext in text_xml:
loader=UnstructuredXMLLoader(file_path) loader=UnstructuredXMLLoader(file_path)
elif file_ext in known_source_ext or file.content_type.find("text/")>=0: elif file_ext in known_source_ext or file.content_type.find("text/")>=0: