forked from open-webui/open-webui
		
	feat: Add epub support
This commit is contained in:
		
							parent
							
								
									f079cb6b56
								
							
						
					
					
						commit
						f559068186
					
				
					 6 changed files with 35 additions and 11 deletions
				
			
		|  | @ -28,6 +28,11 @@ ENV WEBUI_JWT_SECRET_KEY "SECRET_KEY" | |||
| 
 | ||||
| WORKDIR /app | ||||
| 
 | ||||
| # Install pandoc | ||||
| RUN apt-get update \ | ||||
|     && apt-get install -y pandoc \ | ||||
|     && rm -rf /var/lib/apt/lists/* | ||||
| 
 | ||||
| # copy embedding weight from build | ||||
| RUN mkdir -p /root/.cache/chroma/onnx_models/all-MiniLM-L6-v2 | ||||
| COPY --from=build /app/onnx.tar.gz /root/.cache/chroma/onnx_models/all-MiniLM-L6-v2 | ||||
|  |  | |||
|  | @ -19,6 +19,7 @@ from langchain_community.document_loaders import ( | |||
|     PyPDFLoader, | ||||
|     CSVLoader, | ||||
|     Docx2txtLoader, | ||||
|     UnstructuredEPubLoader, | ||||
|     UnstructuredWordDocumentLoader, | ||||
|     UnstructuredMarkdownLoader, | ||||
|     UnstructuredXMLLoader, | ||||
|  | @ -184,6 +185,8 @@ def store_doc( | |||
|             loader = TextLoader(file_path) | ||||
|         elif file_ext in octet_markdown: | ||||
|             loader = UnstructuredMarkdownLoader(file_path) | ||||
|         elif file.content_type == "application/epub+zip": | ||||
|             loader = UnstructuredEPubLoader(file_path) | ||||
|         else: | ||||
|             loader = TextLoader(file_path) | ||||
|             known_type=False | ||||
|  | @ -206,10 +209,16 @@ def store_doc( | |||
|             ) | ||||
|     except Exception as e: | ||||
|         print(e) | ||||
|         raise HTTPException( | ||||
|             status_code=status.HTTP_400_BAD_REQUEST, | ||||
|             detail=ERROR_MESSAGES.DEFAULT(e), | ||||
|         ) | ||||
|         if "No pandoc was found" in str(e): | ||||
|             raise HTTPException( | ||||
|                 status_code=status.HTTP_400_BAD_REQUEST, | ||||
|                 detail=ERROR_MESSAGES.PANDOC_NOT_INSTALLED, | ||||
|             ) | ||||
|         else: | ||||
|             raise HTTPException( | ||||
|                 status_code=status.HTTP_400_BAD_REQUEST, | ||||
|                 detail=ERROR_MESSAGES.DEFAULT(e), | ||||
|             ) | ||||
| 
 | ||||
| 
 | ||||
| @app.get("/reset/db") | ||||
|  |  | |||
|  | @ -42,3 +42,5 @@ class ERROR_MESSAGES(str, Enum): | |||
|     USER_NOT_FOUND = "We could not find what you're looking for :/" | ||||
|     API_KEY_NOT_FOUND = "Oops! It looks like there's a hiccup. The API key is missing. Please make sure to provide a valid API key to access this feature." | ||||
|     MALICIOUS = "Unusual activities detected, please try again in a few minutes." | ||||
| 
 | ||||
|     PANDOC_NOT_INSTALLED = "Pandoc is not installed on the server. Please contact your administrator for assistance." | ||||
|  |  | |||
|  | @ -24,6 +24,7 @@ pypdf | |||
| docx2txt | ||||
| unstructured | ||||
| markdown | ||||
| pypandoc | ||||
| 
 | ||||
| PyJWT | ||||
| pyjwt[crypto] | ||||
|  |  | |||
|  | @ -121,13 +121,19 @@ | |||
| 			error: '' | ||||
| 		}; | ||||
| 
 | ||||
| 		files = [...files, doc]; | ||||
| 		const res = await uploadDocToVectorDB(localStorage.token, '', file); | ||||
| 		try { | ||||
| 			files = [...files, doc]; | ||||
| 			const res = await uploadDocToVectorDB(localStorage.token, '', file); | ||||
| 
 | ||||
| 		if (res) { | ||||
| 			doc.upload_status = true; | ||||
| 			doc.collection_name = res.collection_name; | ||||
| 			files = files; | ||||
| 			if (res) { | ||||
| 				doc.upload_status = true; | ||||
| 				doc.collection_name = res.collection_name; | ||||
| 				files = files; | ||||
| 			} | ||||
| 		} catch (e) { | ||||
| 			// Remove the failed doc from the files array | ||||
| 			files = files.filter((f) => f.name !== file.name); | ||||
| 			toast.error(e); | ||||
| 		} | ||||
| 	}; | ||||
| 
 | ||||
|  |  | |||
|  | @ -12,6 +12,7 @@ export const WEB_UI_VERSION = 'v1.0.0-alpha-static'; | |||
| export const REQUIRED_OLLAMA_VERSION = '0.1.16'; | ||||
| 
 | ||||
| export const SUPPORTED_FILE_TYPE = [ | ||||
| 	'application/epub+zip', | ||||
| 	'application/pdf', | ||||
| 	'text/plain', | ||||
| 	'text/csv', | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Dave Bauman
						Dave Bauman