feat: Add epub support

This commit is contained in:
Dave Bauman 2024-01-13 08:46:56 -05:00
parent f079cb6b56
commit f559068186
No known key found for this signature in database
GPG key ID: 3AF96C27EDA2C9D4
6 changed files with 35 additions and 11 deletions

View file

@ -19,6 +19,7 @@ from langchain_community.document_loaders import (
PyPDFLoader,
CSVLoader,
Docx2txtLoader,
UnstructuredEPubLoader,
UnstructuredWordDocumentLoader,
UnstructuredMarkdownLoader,
UnstructuredXMLLoader,
@ -184,6 +185,8 @@ def store_doc(
loader = TextLoader(file_path)
elif file_ext in octet_markdown:
loader = UnstructuredMarkdownLoader(file_path)
elif file.content_type == "application/epub+zip":
loader = UnstructuredEPubLoader(file_path)
else:
loader = TextLoader(file_path)
known_type=False
@ -206,10 +209,16 @@ def store_doc(
)
except Exception as e:
print(e)
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=ERROR_MESSAGES.DEFAULT(e),
)
if "No pandoc was found" in str(e):
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=ERROR_MESSAGES.PANDOC_NOT_INSTALLED,
)
else:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=ERROR_MESSAGES.DEFAULT(e),
)
@app.get("/reset/db")