forked from open-webui/open-webui
		
	switch to using BeautifulSoup HTML loader so title is also captured
This commit is contained in:
		
							parent
							
								
									77f4ffddc1
								
							
						
					
					
						commit
						c91a5d8b1f
					
				
					 1 changed files with 2 additions and 2 deletions
				
			
		|  | @ -21,7 +21,7 @@ from langchain_community.document_loaders import ( | |||
|     TextLoader, | ||||
|     PyPDFLoader, | ||||
|     CSVLoader, | ||||
|     UnstructuredHTMLLoader, | ||||
|     BSHTMLLoader, | ||||
|     Docx2txtLoader, | ||||
|     UnstructuredEPubLoader, | ||||
|     UnstructuredWordDocumentLoader, | ||||
|  | @ -404,7 +404,7 @@ def get_loader(filename: str, file_content_type: str, file_path: str): | |||
|     elif file_ext == "xml": | ||||
|         loader = UnstructuredXMLLoader(file_path) | ||||
|     elif file_ext in ["htm", "html"]: | ||||
|         loader = UnstructuredHTMLLoader(file_path) | ||||
|         loader = BSHTMLLoader(file_path) | ||||
|     elif file_ext == "md": | ||||
|         loader = UnstructuredMarkdownLoader(file_path) | ||||
|     elif file_content_type == "application/epub+zip": | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue