fix: encoding issue

This commit is contained in:
Timothy J. Baek 2024-03-25 23:50:52 -07:00
parent 6307adfba1
commit 3688955c77

View file

@ -411,7 +411,7 @@ def get_loader(filename: str, file_content_type: str, file_path: str):
elif file_ext == "xml": elif file_ext == "xml":
loader = UnstructuredXMLLoader(file_path) loader = UnstructuredXMLLoader(file_path)
elif file_ext in ["htm", "html"]: elif file_ext in ["htm", "html"]:
loader = BSHTMLLoader(file_path) loader = BSHTMLLoader(file_path, open_encoding="unicode_escape")
elif file_ext == "md": elif file_ext == "md":
loader = UnstructuredMarkdownLoader(file_path) loader = UnstructuredMarkdownLoader(file_path)
elif file_content_type == "application/epub+zip": elif file_content_type == "application/epub+zip":