forked from open-webui/open-webui
		
	feat: docx support
This commit is contained in:
		
							parent
							
								
									537a7f5f00
								
							
						
					
					
						commit
						57c050326c
					
				
					 3 changed files with 29 additions and 4 deletions
				
			
		|  | @ -18,6 +18,7 @@ from langchain_community.document_loaders import ( | ||||||
|     TextLoader, |     TextLoader, | ||||||
|     PyPDFLoader, |     PyPDFLoader, | ||||||
|     CSVLoader, |     CSVLoader, | ||||||
|  |     Docx2txtLoader, | ||||||
| ) | ) | ||||||
| from langchain.text_splitter import RecursiveCharacterTextSplitter | from langchain.text_splitter import RecursiveCharacterTextSplitter | ||||||
| from langchain_community.vectorstores import Chroma | from langchain_community.vectorstores import Chroma | ||||||
|  | @ -135,7 +136,12 @@ def store_doc( | ||||||
| ): | ): | ||||||
|     # "https://www.gutenberg.org/files/1727/1727-h/1727-h.htm" |     # "https://www.gutenberg.org/files/1727/1727-h/1727-h.htm" | ||||||
| 
 | 
 | ||||||
|     if file.content_type not in ["application/pdf", "text/plain", "text/csv"]: |     if file.content_type not in [ | ||||||
|  |         "application/pdf", | ||||||
|  |         "text/plain", | ||||||
|  |         "text/csv", | ||||||
|  |         "application/vnd.openxmlformats-officedocument.wordprocessingml.document", | ||||||
|  |     ]: | ||||||
|         raise HTTPException( |         raise HTTPException( | ||||||
|             status_code=status.HTTP_400_BAD_REQUEST, |             status_code=status.HTTP_400_BAD_REQUEST, | ||||||
|             detail=ERROR_MESSAGES.FILE_NOT_SUPPORTED, |             detail=ERROR_MESSAGES.FILE_NOT_SUPPORTED, | ||||||
|  | @ -156,6 +162,11 @@ def store_doc( | ||||||
| 
 | 
 | ||||||
|         if file.content_type == "application/pdf": |         if file.content_type == "application/pdf": | ||||||
|             loader = PyPDFLoader(file_path) |             loader = PyPDFLoader(file_path) | ||||||
|  |         elif ( | ||||||
|  |             file.content_type | ||||||
|  |             == "application/vnd.openxmlformats-officedocument.wordprocessingml.document" | ||||||
|  |         ): | ||||||
|  |             loader = Docx2txtLoader(file_path) | ||||||
|         elif file.content_type == "text/plain": |         elif file.content_type == "text/plain": | ||||||
|             loader = TextLoader(file_path) |             loader = TextLoader(file_path) | ||||||
|         elif file.content_type == "text/csv": |         elif file.content_type == "text/csv": | ||||||
|  |  | ||||||
|  | @ -16,12 +16,12 @@ aiohttp | ||||||
| peewee | peewee | ||||||
| bcrypt | bcrypt | ||||||
| 
 | 
 | ||||||
| 
 |  | ||||||
| langchain | langchain | ||||||
| langchain-community | langchain-community | ||||||
| chromadb | chromadb | ||||||
| sentence_transformers | sentence_transformers | ||||||
| pypdf | pypdf | ||||||
|  | docx2txt | ||||||
| 
 | 
 | ||||||
| PyJWT | PyJWT | ||||||
| pyjwt[crypto] | pyjwt[crypto] | ||||||
|  |  | ||||||
|  | @ -143,7 +143,14 @@ | ||||||
| 					const file = inputFiles[0]; | 					const file = inputFiles[0]; | ||||||
| 					if (['image/gif', 'image/jpeg', 'image/png'].includes(file['type'])) { | 					if (['image/gif', 'image/jpeg', 'image/png'].includes(file['type'])) { | ||||||
| 						reader.readAsDataURL(file); | 						reader.readAsDataURL(file); | ||||||
| 					} else if (['application/pdf', 'text/plain', 'text/csv'].includes(file['type'])) { | 					} else if ( | ||||||
|  | 						[ | ||||||
|  | 							'application/pdf', | ||||||
|  | 							'application/vnd.openxmlformats-officedocument.wordprocessingml.document', | ||||||
|  | 							'text/plain', | ||||||
|  | 							'text/csv' | ||||||
|  | 						].includes(file['type']) | ||||||
|  | 					) { | ||||||
| 						uploadDoc(file); | 						uploadDoc(file); | ||||||
| 					} else { | 					} else { | ||||||
| 						toast.error(`Unsupported File Type '${file['type']}'.`); | 						toast.error(`Unsupported File Type '${file['type']}'.`); | ||||||
|  | @ -249,7 +256,14 @@ | ||||||
| 							const file = inputFiles[0]; | 							const file = inputFiles[0]; | ||||||
| 							if (['image/gif', 'image/jpeg', 'image/png'].includes(file['type'])) { | 							if (['image/gif', 'image/jpeg', 'image/png'].includes(file['type'])) { | ||||||
| 								reader.readAsDataURL(file); | 								reader.readAsDataURL(file); | ||||||
| 							} else if (['application/pdf', 'text/plain', 'text/csv'].includes(file['type'])) { | 							} else if ( | ||||||
|  | 								[ | ||||||
|  | 									'application/pdf', | ||||||
|  | 									'application/vnd.openxmlformats-officedocument.wordprocessingml.document', | ||||||
|  | 									'text/plain', | ||||||
|  | 									'text/csv' | ||||||
|  | 								].includes(file['type']) | ||||||
|  | 							) { | ||||||
| 								uploadDoc(file); | 								uploadDoc(file); | ||||||
| 								filesInputElement.value = ''; | 								filesInputElement.value = ''; | ||||||
| 							} else { | 							} else { | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Timothy J. Baek
						Timothy J. Baek