forked from open-webui/open-webui
		
	add excel document support
This commit is contained in:
		
							parent
							
								
									7eea3ef313
								
							
						
					
					
						commit
						8bfda730d9
					
				
					 2 changed files with 12 additions and 1 deletions
				
			
		|  | @ -23,6 +23,7 @@ from langchain_community.document_loaders import ( | ||||||
|     UnstructuredMarkdownLoader, |     UnstructuredMarkdownLoader, | ||||||
|     UnstructuredXMLLoader, |     UnstructuredXMLLoader, | ||||||
|     UnstructuredRSTLoader, |     UnstructuredRSTLoader, | ||||||
|  |     UnstructuredExcelLoader, | ||||||
| ) | ) | ||||||
| from langchain.text_splitter import RecursiveCharacterTextSplitter | from langchain.text_splitter import RecursiveCharacterTextSplitter | ||||||
| from langchain_community.vectorstores import Chroma | from langchain_community.vectorstores import Chroma | ||||||
|  | @ -157,6 +158,9 @@ def store_doc( | ||||||
|         ] |         ] | ||||||
|     docx_type="application/vnd.openxmlformats-officedocument.wordprocessingml.document" |     docx_type="application/vnd.openxmlformats-officedocument.wordprocessingml.document" | ||||||
|     known_doc_ext=["doc","docx"] |     known_doc_ext=["doc","docx"] | ||||||
|  |     excel_types=["application/vnd.ms-excel", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"] | ||||||
|  |     known_excel_ext=["xls", "xlsx"] | ||||||
|  | 
 | ||||||
|     file_ext=file.filename.split(".")[-1].lower() |     file_ext=file.filename.split(".")[-1].lower() | ||||||
|     known_type=True |     known_type=True | ||||||
|      |      | ||||||
|  | @ -179,6 +183,8 @@ def store_doc( | ||||||
|             loader = Docx2txtLoader(file_path) |             loader = Docx2txtLoader(file_path) | ||||||
|         elif file_ext=="csv": |         elif file_ext=="csv": | ||||||
|             loader = CSVLoader(file_path) |             loader = CSVLoader(file_path) | ||||||
|  |         elif (file.content_type in excel_types or file_ext in known_excel_ext): | ||||||
|  |             loader = UnstructuredExcelLoader(file_path) | ||||||
|         elif file_ext=="rst": |         elif file_ext=="rst": | ||||||
|             loader = UnstructuredRSTLoader(file_path, mode="elements") |             loader = UnstructuredRSTLoader(file_path, mode="elements") | ||||||
|         elif file_ext in text_xml: |         elif file_ext in text_xml: | ||||||
|  |  | ||||||
|  | @ -29,3 +29,8 @@ PyJWT | ||||||
| pyjwt[crypto] | pyjwt[crypto] | ||||||
| 
 | 
 | ||||||
| black | black | ||||||
|  | 
 | ||||||
|  | pandas | ||||||
|  | openpyxl | ||||||
|  | pyxlsb | ||||||
|  | xlrd | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Marclass
						Marclass