Merge pull request #866 from asedmammad/fix-rag-scan-unsupported-mime

fix: RAG scan unsupported mime type
This commit is contained in:
Timothy Jaeryang Baek 2024-02-23 14:03:24 -05:00 committed by GitHub
commit a974d736d2
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -423,7 +423,7 @@ def get_loader(filename: str, file_content_type: str, file_path: str):
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
] or file_ext in ["xls", "xlsx"]: ] or file_ext in ["xls", "xlsx"]:
loader = UnstructuredExcelLoader(file_path) loader = UnstructuredExcelLoader(file_path)
elif file_ext in known_source_ext or file_content_type.find("text/") >= 0: elif file_ext in known_source_ext or (file_content_type and file_content_type.find("text/") >= 0):
loader = TextLoader(file_path) loader = TextLoader(file_path)
else: else:
loader = TextLoader(file_path) loader = TextLoader(file_path)
@ -486,8 +486,8 @@ def store_doc(
@app.get("/scan") @app.get("/scan")
def scan_docs_dir(user=Depends(get_admin_user)): def scan_docs_dir(user=Depends(get_admin_user)):
try:
for path in Path(DOCS_DIR).rglob("./**/*"): for path in Path(DOCS_DIR).rglob("./**/*"):
try:
if path.is_file() and not path.name.startswith("."): if path.is_file() and not path.name.startswith("."):
tags = extract_folders_after_data_docs(path) tags = extract_folders_after_data_docs(path)
filename = path.name filename = path.name