forked from open-webui/open-webui
		
	feat: Add epub support
This commit is contained in:
		
							parent
							
								
									f079cb6b56
								
							
						
					
					
						commit
						f559068186
					
				
					 6 changed files with 35 additions and 11 deletions
				
			
		|  | @ -28,6 +28,11 @@ ENV WEBUI_JWT_SECRET_KEY "SECRET_KEY" | ||||||
| 
 | 
 | ||||||
| WORKDIR /app | WORKDIR /app | ||||||
| 
 | 
 | ||||||
|  | # Install pandoc | ||||||
|  | RUN apt-get update \ | ||||||
|  |     && apt-get install -y pandoc \ | ||||||
|  |     && rm -rf /var/lib/apt/lists/* | ||||||
|  | 
 | ||||||
| # copy embedding weight from build | # copy embedding weight from build | ||||||
| RUN mkdir -p /root/.cache/chroma/onnx_models/all-MiniLM-L6-v2 | RUN mkdir -p /root/.cache/chroma/onnx_models/all-MiniLM-L6-v2 | ||||||
| COPY --from=build /app/onnx.tar.gz /root/.cache/chroma/onnx_models/all-MiniLM-L6-v2 | COPY --from=build /app/onnx.tar.gz /root/.cache/chroma/onnx_models/all-MiniLM-L6-v2 | ||||||
|  |  | ||||||
|  | @ -19,6 +19,7 @@ from langchain_community.document_loaders import ( | ||||||
|     PyPDFLoader, |     PyPDFLoader, | ||||||
|     CSVLoader, |     CSVLoader, | ||||||
|     Docx2txtLoader, |     Docx2txtLoader, | ||||||
|  |     UnstructuredEPubLoader, | ||||||
|     UnstructuredWordDocumentLoader, |     UnstructuredWordDocumentLoader, | ||||||
|     UnstructuredMarkdownLoader, |     UnstructuredMarkdownLoader, | ||||||
|     UnstructuredXMLLoader, |     UnstructuredXMLLoader, | ||||||
|  | @ -184,6 +185,8 @@ def store_doc( | ||||||
|             loader = TextLoader(file_path) |             loader = TextLoader(file_path) | ||||||
|         elif file_ext in octet_markdown: |         elif file_ext in octet_markdown: | ||||||
|             loader = UnstructuredMarkdownLoader(file_path) |             loader = UnstructuredMarkdownLoader(file_path) | ||||||
|  |         elif file.content_type == "application/epub+zip": | ||||||
|  |             loader = UnstructuredEPubLoader(file_path) | ||||||
|         else: |         else: | ||||||
|             loader = TextLoader(file_path) |             loader = TextLoader(file_path) | ||||||
|             known_type=False |             known_type=False | ||||||
|  | @ -206,10 +209,16 @@ def store_doc( | ||||||
|             ) |             ) | ||||||
|     except Exception as e: |     except Exception as e: | ||||||
|         print(e) |         print(e) | ||||||
|         raise HTTPException( |         if "No pandoc was found" in str(e): | ||||||
|             status_code=status.HTTP_400_BAD_REQUEST, |             raise HTTPException( | ||||||
|             detail=ERROR_MESSAGES.DEFAULT(e), |                 status_code=status.HTTP_400_BAD_REQUEST, | ||||||
|         ) |                 detail=ERROR_MESSAGES.PANDOC_NOT_INSTALLED, | ||||||
|  |             ) | ||||||
|  |         else: | ||||||
|  |             raise HTTPException( | ||||||
|  |                 status_code=status.HTTP_400_BAD_REQUEST, | ||||||
|  |                 detail=ERROR_MESSAGES.DEFAULT(e), | ||||||
|  |             ) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| @app.get("/reset/db") | @app.get("/reset/db") | ||||||
|  |  | ||||||
|  | @ -42,3 +42,5 @@ class ERROR_MESSAGES(str, Enum): | ||||||
|     USER_NOT_FOUND = "We could not find what you're looking for :/" |     USER_NOT_FOUND = "We could not find what you're looking for :/" | ||||||
|     API_KEY_NOT_FOUND = "Oops! It looks like there's a hiccup. The API key is missing. Please make sure to provide a valid API key to access this feature." |     API_KEY_NOT_FOUND = "Oops! It looks like there's a hiccup. The API key is missing. Please make sure to provide a valid API key to access this feature." | ||||||
|     MALICIOUS = "Unusual activities detected, please try again in a few minutes." |     MALICIOUS = "Unusual activities detected, please try again in a few minutes." | ||||||
|  | 
 | ||||||
|  |     PANDOC_NOT_INSTALLED = "Pandoc is not installed on the server. Please contact your administrator for assistance." | ||||||
|  |  | ||||||
|  | @ -24,8 +24,9 @@ pypdf | ||||||
| docx2txt | docx2txt | ||||||
| unstructured | unstructured | ||||||
| markdown | markdown | ||||||
|  | pypandoc | ||||||
| 
 | 
 | ||||||
| PyJWT | PyJWT | ||||||
| pyjwt[crypto] | pyjwt[crypto] | ||||||
| 
 | 
 | ||||||
| black | black | ||||||
|  |  | ||||||
|  | @ -121,13 +121,19 @@ | ||||||
| 			error: '' | 			error: '' | ||||||
| 		}; | 		}; | ||||||
| 
 | 
 | ||||||
| 		files = [...files, doc]; | 		try { | ||||||
| 		const res = await uploadDocToVectorDB(localStorage.token, '', file); | 			files = [...files, doc]; | ||||||
|  | 			const res = await uploadDocToVectorDB(localStorage.token, '', file); | ||||||
| 
 | 
 | ||||||
| 		if (res) { | 			if (res) { | ||||||
| 			doc.upload_status = true; | 				doc.upload_status = true; | ||||||
| 			doc.collection_name = res.collection_name; | 				doc.collection_name = res.collection_name; | ||||||
| 			files = files; | 				files = files; | ||||||
|  | 			} | ||||||
|  | 		} catch (e) { | ||||||
|  | 			// Remove the failed doc from the files array | ||||||
|  | 			files = files.filter((f) => f.name !== file.name); | ||||||
|  | 			toast.error(e); | ||||||
| 		} | 		} | ||||||
| 	}; | 	}; | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -12,6 +12,7 @@ export const WEB_UI_VERSION = 'v1.0.0-alpha-static'; | ||||||
| export const REQUIRED_OLLAMA_VERSION = '0.1.16'; | export const REQUIRED_OLLAMA_VERSION = '0.1.16'; | ||||||
| 
 | 
 | ||||||
| export const SUPPORTED_FILE_TYPE = [ | export const SUPPORTED_FILE_TYPE = [ | ||||||
|  | 	'application/epub+zip', | ||||||
| 	'application/pdf', | 	'application/pdf', | ||||||
| 	'text/plain', | 	'text/plain', | ||||||
| 	'text/csv', | 	'text/csv', | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Dave Bauman
						Dave Bauman