Merge branch 'main' into main

This commit is contained in:
Timothy Jaeryang Baek 2024-01-25 00:13:12 -08:00 committed by GitHub
commit fa5918ad13
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 54 additions and 26 deletions

View file

@ -19,6 +19,7 @@ from langchain_community.document_loaders import (
PyPDFLoader,
CSVLoader,
Docx2txtLoader,
UnstructuredEPubLoader,
UnstructuredWordDocumentLoader,
UnstructuredMarkdownLoader,
UnstructuredXMLLoader,
@ -193,6 +194,8 @@ def store_doc(
loader = TextLoader(file_path)
elif file_ext in octet_markdown:
loader = UnstructuredMarkdownLoader(file_path)
elif file.content_type == "application/epub+zip":
loader = UnstructuredEPubLoader(file_path)
else:
loader = TextLoader(file_path)
known_type=False
@ -215,10 +218,16 @@ def store_doc(
)
except Exception as e:
print(e)
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=ERROR_MESSAGES.DEFAULT(e),
)
if "No pandoc was found" in str(e):
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=ERROR_MESSAGES.PANDOC_NOT_INSTALLED,
)
else:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=ERROR_MESSAGES.DEFAULT(e),
)
@app.get("/reset/db")

View file

@ -42,3 +42,5 @@ class ERROR_MESSAGES(str, Enum):
USER_NOT_FOUND = "We could not find what you're looking for :/"
API_KEY_NOT_FOUND = "Oops! It looks like there's a hiccup. The API key is missing. Please make sure to provide a valid API key to access this feature."
MALICIOUS = "Unusual activities detected, please try again in a few minutes."
PANDOC_NOT_INSTALLED = "Pandoc is not installed on the server. Please contact your administrator for assistance."

View file

@ -24,13 +24,13 @@ pypdf
docx2txt
unstructured
markdown
pypandoc
pandas
openpyxl
pyxlsb
xlrd
PyJWT
pyjwt[crypto]
black
pandas
openpyxl
pyxlsb
xlrd