forked from open-webui/open-webui
		
	feat: added ocr functionality to the pdf loader
This commit is contained in:
		
							parent
							
								
									eb51ad14e4
								
							
						
					
					
						commit
						089a63e0c6
					
				
					 2 changed files with 2 additions and 1 deletions
				
			
		|  | @ -419,7 +419,7 @@ def get_loader(filename: str, file_content_type: str, file_path: str): | |||
|     ] | ||||
| 
 | ||||
|     if file_ext == "pdf": | ||||
|         loader = PyPDFLoader(file_path) | ||||
|         loader = PyPDFLoader(file_path, extract_images=True) | ||||
|     elif file_ext == "csv": | ||||
|         loader = CSVLoader(file_path) | ||||
|     elif file_ext == "rst": | ||||
|  |  | |||
|  | @ -33,6 +33,7 @@ pandas | |||
| openpyxl | ||||
| pyxlsb | ||||
| xlrd | ||||
| rapidocr-onnxruntime | ||||
| 
 | ||||
| faster-whisper | ||||
| 
 | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Jannik Streidl
						Jannik Streidl