forked from open-webui/open-webui
		
	include html langchain loader for RAG
This commit is contained in:
		
							parent
							
								
									eb004ccfc2
								
							
						
					
					
						commit
						784a6ec85e
					
				
					 1 changed files with 3 additions and 0 deletions
				
			
		|  | @ -21,6 +21,7 @@ from langchain_community.document_loaders import ( | ||||||
|     TextLoader, |     TextLoader, | ||||||
|     PyPDFLoader, |     PyPDFLoader, | ||||||
|     CSVLoader, |     CSVLoader, | ||||||
|  |     UnstructuredHTMLLoader, | ||||||
|     Docx2txtLoader, |     Docx2txtLoader, | ||||||
|     UnstructuredEPubLoader, |     UnstructuredEPubLoader, | ||||||
|     UnstructuredWordDocumentLoader, |     UnstructuredWordDocumentLoader, | ||||||
|  | @ -402,6 +403,8 @@ def get_loader(filename: str, file_content_type: str, file_path: str): | ||||||
|         loader = UnstructuredRSTLoader(file_path, mode="elements") |         loader = UnstructuredRSTLoader(file_path, mode="elements") | ||||||
|     elif file_ext == "xml": |     elif file_ext == "xml": | ||||||
|         loader = UnstructuredXMLLoader(file_path) |         loader = UnstructuredXMLLoader(file_path) | ||||||
|  |     elif file_ext in ["htm", "html"]: | ||||||
|  |         loader = UnstructuredHTMLLoader(file_path) | ||||||
|     elif file_ext == "md": |     elif file_ext == "md": | ||||||
|         loader = UnstructuredMarkdownLoader(file_path) |         loader = UnstructuredMarkdownLoader(file_path) | ||||||
|     elif file_content_type == "application/epub+zip": |     elif file_content_type == "application/epub+zip": | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Doug Danat
						Doug Danat