forked from open-webui/open-webui
		
	Merge pull request #1050 from jannikstdl/rag-pdf-ocr
feat: added ocr functionality to the pdf loader
This commit is contained in:
		
						commit
						8fb5f54751
					
				
					 2 changed files with 2 additions and 1 deletions
				
			
		|  | @ -425,7 +425,7 @@ def get_loader(filename: str, file_content_type: str, file_path: str): | |||
|     ] | ||||
| 
 | ||||
|     if file_ext == "pdf": | ||||
|         loader = PyPDFLoader(file_path) | ||||
|         loader = PyPDFLoader(file_path, extract_images=True) | ||||
|     elif file_ext == "csv": | ||||
|         loader = CSVLoader(file_path) | ||||
|     elif file_ext == "rst": | ||||
|  |  | |||
|  | @ -34,6 +34,7 @@ pandas | |||
| openpyxl | ||||
| pyxlsb | ||||
| xlrd | ||||
| rapidocr-onnxruntime | ||||
| 
 | ||||
| faster-whisper | ||||
| 
 | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Timothy Jaeryang Baek
						Timothy Jaeryang Baek