forked from open-webui/open-webui
switch to using BeautifulSoup HTML loader so title is also captured
This commit is contained in:
parent
77f4ffddc1
commit
c91a5d8b1f
1 changed files with 2 additions and 2 deletions
|
@ -21,7 +21,7 @@ from langchain_community.document_loaders import (
|
|||
TextLoader,
|
||||
PyPDFLoader,
|
||||
CSVLoader,
|
||||
UnstructuredHTMLLoader,
|
||||
BSHTMLLoader,
|
||||
Docx2txtLoader,
|
||||
UnstructuredEPubLoader,
|
||||
UnstructuredWordDocumentLoader,
|
||||
|
@ -404,7 +404,7 @@ def get_loader(filename: str, file_content_type: str, file_path: str):
|
|||
elif file_ext == "xml":
|
||||
loader = UnstructuredXMLLoader(file_path)
|
||||
elif file_ext in ["htm", "html"]:
|
||||
loader = UnstructuredHTMLLoader(file_path)
|
||||
loader = BSHTMLLoader(file_path)
|
||||
elif file_ext == "md":
|
||||
loader = UnstructuredMarkdownLoader(file_path)
|
||||
elif file_content_type == "application/epub+zip":
|
||||
|
|
Loading…
Reference in a new issue