forked from open-webui/open-webui
add excel document support
This commit is contained in:
parent
7eea3ef313
commit
8bfda730d9
2 changed files with 12 additions and 1 deletions
|
@ -23,6 +23,7 @@ from langchain_community.document_loaders import (
|
||||||
UnstructuredMarkdownLoader,
|
UnstructuredMarkdownLoader,
|
||||||
UnstructuredXMLLoader,
|
UnstructuredXMLLoader,
|
||||||
UnstructuredRSTLoader,
|
UnstructuredRSTLoader,
|
||||||
|
UnstructuredExcelLoader,
|
||||||
)
|
)
|
||||||
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
||||||
from langchain_community.vectorstores import Chroma
|
from langchain_community.vectorstores import Chroma
|
||||||
|
@ -157,6 +158,9 @@ def store_doc(
|
||||||
]
|
]
|
||||||
docx_type="application/vnd.openxmlformats-officedocument.wordprocessingml.document"
|
docx_type="application/vnd.openxmlformats-officedocument.wordprocessingml.document"
|
||||||
known_doc_ext=["doc","docx"]
|
known_doc_ext=["doc","docx"]
|
||||||
|
excel_types=["application/vnd.ms-excel", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"]
|
||||||
|
known_excel_ext=["xls", "xlsx"]
|
||||||
|
|
||||||
file_ext=file.filename.split(".")[-1].lower()
|
file_ext=file.filename.split(".")[-1].lower()
|
||||||
known_type=True
|
known_type=True
|
||||||
|
|
||||||
|
@ -179,6 +183,8 @@ def store_doc(
|
||||||
loader = Docx2txtLoader(file_path)
|
loader = Docx2txtLoader(file_path)
|
||||||
elif file_ext=="csv":
|
elif file_ext=="csv":
|
||||||
loader = CSVLoader(file_path)
|
loader = CSVLoader(file_path)
|
||||||
|
elif (file.content_type in excel_types or file_ext in known_excel_ext):
|
||||||
|
loader = UnstructuredExcelLoader(file_path)
|
||||||
elif file_ext=="rst":
|
elif file_ext=="rst":
|
||||||
loader = UnstructuredRSTLoader(file_path, mode="elements")
|
loader = UnstructuredRSTLoader(file_path, mode="elements")
|
||||||
elif file_ext in text_xml:
|
elif file_ext in text_xml:
|
||||||
|
|
|
@ -28,4 +28,9 @@ markdown
|
||||||
PyJWT
|
PyJWT
|
||||||
pyjwt[crypto]
|
pyjwt[crypto]
|
||||||
|
|
||||||
black
|
black
|
||||||
|
|
||||||
|
pandas
|
||||||
|
openpyxl
|
||||||
|
pyxlsb
|
||||||
|
xlrd
|
Loading…
Reference in a new issue