forked from open-webui/open-webui
		
	Merge pull request #1050 from jannikstdl/rag-pdf-ocr
feat: added ocr functionality to the pdf loader
This commit is contained in:
		
						commit
						8fb5f54751
					
				
					 2 changed files with 2 additions and 1 deletions
				
			
		|  | @ -425,7 +425,7 @@ def get_loader(filename: str, file_content_type: str, file_path: str): | ||||||
|     ] |     ] | ||||||
| 
 | 
 | ||||||
|     if file_ext == "pdf": |     if file_ext == "pdf": | ||||||
|         loader = PyPDFLoader(file_path) |         loader = PyPDFLoader(file_path, extract_images=True) | ||||||
|     elif file_ext == "csv": |     elif file_ext == "csv": | ||||||
|         loader = CSVLoader(file_path) |         loader = CSVLoader(file_path) | ||||||
|     elif file_ext == "rst": |     elif file_ext == "rst": | ||||||
|  |  | ||||||
|  | @ -34,6 +34,7 @@ pandas | ||||||
| openpyxl | openpyxl | ||||||
| pyxlsb | pyxlsb | ||||||
| xlrd | xlrd | ||||||
|  | rapidocr-onnxruntime | ||||||
| 
 | 
 | ||||||
| faster-whisper | faster-whisper | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Timothy Jaeryang Baek
						Timothy Jaeryang Baek