fix: ocr issue

This commit is contained in:
Timothy J. Baek 2024-03-06 17:54:42 -08:00
parent dc6e91a23d
commit b88c64f80e
2 changed files with 2 additions and 1 deletions

View file

@ -425,7 +425,7 @@ def get_loader(filename: str, file_content_type: str, file_path: str):
] ]
if file_ext == "pdf": if file_ext == "pdf":
loader = PyPDFLoader(file_path) loader = PyPDFLoader(file_path, extract_images=True)
elif file_ext == "csv": elif file_ext == "csv":
loader = CSVLoader(file_path) loader = CSVLoader(file_path)
elif file_ext == "rst": elif file_ext == "rst":

View file

@ -35,6 +35,7 @@ openpyxl
pyxlsb pyxlsb
xlrd xlrd
rapidocr-onnxruntime rapidocr-onnxruntime
opencv-python-headless
faster-whisper faster-whisper