forked from open-webui/open-webui
		
	feat: docx support
This commit is contained in:
		
							parent
							
								
									537a7f5f00
								
							
						
					
					
						commit
						57c050326c
					
				
					 3 changed files with 29 additions and 4 deletions
				
			
		|  | @ -18,6 +18,7 @@ from langchain_community.document_loaders import ( | |||
|     TextLoader, | ||||
|     PyPDFLoader, | ||||
|     CSVLoader, | ||||
|     Docx2txtLoader, | ||||
| ) | ||||
| from langchain.text_splitter import RecursiveCharacterTextSplitter | ||||
| from langchain_community.vectorstores import Chroma | ||||
|  | @ -135,7 +136,12 @@ def store_doc( | |||
| ): | ||||
|     # "https://www.gutenberg.org/files/1727/1727-h/1727-h.htm" | ||||
| 
 | ||||
|     if file.content_type not in ["application/pdf", "text/plain", "text/csv"]: | ||||
|     if file.content_type not in [ | ||||
|         "application/pdf", | ||||
|         "text/plain", | ||||
|         "text/csv", | ||||
|         "application/vnd.openxmlformats-officedocument.wordprocessingml.document", | ||||
|     ]: | ||||
|         raise HTTPException( | ||||
|             status_code=status.HTTP_400_BAD_REQUEST, | ||||
|             detail=ERROR_MESSAGES.FILE_NOT_SUPPORTED, | ||||
|  | @ -156,6 +162,11 @@ def store_doc( | |||
| 
 | ||||
|         if file.content_type == "application/pdf": | ||||
|             loader = PyPDFLoader(file_path) | ||||
|         elif ( | ||||
|             file.content_type | ||||
|             == "application/vnd.openxmlformats-officedocument.wordprocessingml.document" | ||||
|         ): | ||||
|             loader = Docx2txtLoader(file_path) | ||||
|         elif file.content_type == "text/plain": | ||||
|             loader = TextLoader(file_path) | ||||
|         elif file.content_type == "text/csv": | ||||
|  |  | |||
|  | @ -16,12 +16,12 @@ aiohttp | |||
| peewee | ||||
| bcrypt | ||||
| 
 | ||||
| 
 | ||||
| langchain | ||||
| langchain-community | ||||
| chromadb | ||||
| sentence_transformers | ||||
| pypdf | ||||
| docx2txt | ||||
| 
 | ||||
| PyJWT | ||||
| pyjwt[crypto] | ||||
|  |  | |||
|  | @ -143,7 +143,14 @@ | |||
| 					const file = inputFiles[0]; | ||||
| 					if (['image/gif', 'image/jpeg', 'image/png'].includes(file['type'])) { | ||||
| 						reader.readAsDataURL(file); | ||||
| 					} else if (['application/pdf', 'text/plain', 'text/csv'].includes(file['type'])) { | ||||
| 					} else if ( | ||||
| 						[ | ||||
| 							'application/pdf', | ||||
| 							'application/vnd.openxmlformats-officedocument.wordprocessingml.document', | ||||
| 							'text/plain', | ||||
| 							'text/csv' | ||||
| 						].includes(file['type']) | ||||
| 					) { | ||||
| 						uploadDoc(file); | ||||
| 					} else { | ||||
| 						toast.error(`Unsupported File Type '${file['type']}'.`); | ||||
|  | @ -249,7 +256,14 @@ | |||
| 							const file = inputFiles[0]; | ||||
| 							if (['image/gif', 'image/jpeg', 'image/png'].includes(file['type'])) { | ||||
| 								reader.readAsDataURL(file); | ||||
| 							} else if (['application/pdf', 'text/plain', 'text/csv'].includes(file['type'])) { | ||||
| 							} else if ( | ||||
| 								[ | ||||
| 									'application/pdf', | ||||
| 									'application/vnd.openxmlformats-officedocument.wordprocessingml.document', | ||||
| 									'text/plain', | ||||
| 									'text/csv' | ||||
| 								].includes(file['type']) | ||||
| 							) { | ||||
| 								uploadDoc(file); | ||||
| 								filesInputElement.value = ''; | ||||
| 							} else { | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Timothy J. Baek
						Timothy J. Baek