feat: added ocr functionality to the pdf loader

This commit is contained in:
Jannik Streidl 2024-03-05 22:25:25 +01:00
parent eb51ad14e4
commit 089a63e0c6
2 changed files with 2 additions and 1 deletions

View file

@ -419,7 +419,7 @@ def get_loader(filename: str, file_content_type: str, file_path: str):
]
if file_ext == "pdf":
loader = PyPDFLoader(file_path)
loader = PyPDFLoader(file_path, extract_images=True)
elif file_ext == "csv":
loader = CSVLoader(file_path)
elif file_ext == "rst":

View file

@ -33,6 +33,7 @@ pandas
openpyxl
pyxlsb
xlrd
rapidocr-onnxruntime
faster-whisper