forked from open-webui/open-webui
add excel document support
This commit is contained in:
parent
7eea3ef313
commit
8bfda730d9
2 changed files with 12 additions and 1 deletions
|
@ -23,6 +23,7 @@ from langchain_community.document_loaders import (
|
|||
UnstructuredMarkdownLoader,
|
||||
UnstructuredXMLLoader,
|
||||
UnstructuredRSTLoader,
|
||||
UnstructuredExcelLoader,
|
||||
)
|
||||
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
||||
from langchain_community.vectorstores import Chroma
|
||||
|
@ -157,6 +158,9 @@ def store_doc(
|
|||
]
|
||||
docx_type="application/vnd.openxmlformats-officedocument.wordprocessingml.document"
|
||||
known_doc_ext=["doc","docx"]
|
||||
excel_types=["application/vnd.ms-excel", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"]
|
||||
known_excel_ext=["xls", "xlsx"]
|
||||
|
||||
file_ext=file.filename.split(".")[-1].lower()
|
||||
known_type=True
|
||||
|
||||
|
@ -179,6 +183,8 @@ def store_doc(
|
|||
loader = Docx2txtLoader(file_path)
|
||||
elif file_ext=="csv":
|
||||
loader = CSVLoader(file_path)
|
||||
elif (file.content_type in excel_types or file_ext in known_excel_ext):
|
||||
loader = UnstructuredExcelLoader(file_path)
|
||||
elif file_ext=="rst":
|
||||
loader = UnstructuredRSTLoader(file_path, mode="elements")
|
||||
elif file_ext in text_xml:
|
||||
|
|
|
@ -28,4 +28,9 @@ markdown
|
|||
PyJWT
|
||||
pyjwt[crypto]
|
||||
|
||||
black
|
||||
black
|
||||
|
||||
pandas
|
||||
openpyxl
|
||||
pyxlsb
|
||||
xlrd
|
Loading…
Reference in a new issue