forked from open-webui/open-webui
		
	Merge pull request #547 from Marclass/main
feat: Add reStructuredText specific parser for RAG
This commit is contained in:
		
						commit
						d517a3ebb4
					
				
					 4 changed files with 26 additions and 13 deletions
				
			
		|  | @ -22,6 +22,7 @@ from langchain_community.document_loaders import ( | |||
|     UnstructuredWordDocumentLoader, | ||||
|     UnstructuredMarkdownLoader, | ||||
|     UnstructuredXMLLoader, | ||||
|     UnstructuredRSTLoader, | ||||
| ) | ||||
| from langchain.text_splitter import RecursiveCharacterTextSplitter | ||||
| from langchain_community.vectorstores import Chroma | ||||
|  | @ -178,6 +179,8 @@ def store_doc( | |||
|             loader = Docx2txtLoader(file_path) | ||||
|         elif file_ext=="csv": | ||||
|             loader = CSVLoader(file_path) | ||||
|         elif file_ext=="rst": | ||||
|             loader = UnstructuredRSTLoader(file_path, mode="elements") | ||||
|         elif file_ext in text_xml: | ||||
|             loader=UnstructuredXMLLoader(file_path) | ||||
|         elif file_ext in known_source_ext or file.content_type.find("text/")>=0: | ||||
|  |  | |||
|  | @ -8,7 +8,7 @@ | |||
| 	import Suggestions from './MessageInput/Suggestions.svelte'; | ||||
| 	import { uploadDocToVectorDB } from '$lib/apis/rag'; | ||||
| 	import AddFilesPlaceholder from '../AddFilesPlaceholder.svelte'; | ||||
| 	import { SUPPORTED_FILE_TYPE } from '$lib/constants'; | ||||
| 	import { SUPPORTED_FILE_TYPE, SUPPORTED_FILE_EXTENSIONS } from '$lib/constants'; | ||||
| 	import Documents from './MessageInput/Documents.svelte'; | ||||
| 	import Models from './MessageInput/Models.svelte'; | ||||
| 
 | ||||
|  | @ -169,11 +169,13 @@ | |||
| 						reader.readAsDataURL(file); | ||||
| 					} else if ( | ||||
| 						SUPPORTED_FILE_TYPE.includes(file['type']) || | ||||
| 						['md'].includes(file.name.split('.').at(-1)) | ||||
| 						SUPPORTED_FILE_EXTENSIONS.includes(file.name.split('.').at(-1)) | ||||
| 					) { | ||||
| 						uploadDoc(file); | ||||
| 					} else { | ||||
| 						toast.error(`Unknown File Type '${file['type']}', but accepting and treating as plain text`); | ||||
| 						toast.error( | ||||
| 							`Unknown File Type '${file['type']}', but accepting and treating as plain text` | ||||
| 						); | ||||
| 						uploadDoc(file); | ||||
| 					} | ||||
| 				} else { | ||||
|  | @ -304,12 +306,14 @@ | |||
| 								reader.readAsDataURL(file); | ||||
| 							} else if ( | ||||
| 								SUPPORTED_FILE_TYPE.includes(file['type']) || | ||||
| 								['md'].includes(file.name.split('.').at(-1)) | ||||
| 								SUPPORTED_FILE_EXTENSIONS.includes(file.name.split('.').at(-1)) | ||||
| 							) { | ||||
| 								uploadDoc(file); | ||||
| 								filesInputElement.value = ''; | ||||
| 							} else { | ||||
| 								toast.error(`Unknown File Type '${file['type']}', but accepting and treating as plain text`); | ||||
| 								toast.error( | ||||
| 									`Unknown File Type '${file['type']}', but accepting and treating as plain text` | ||||
| 								); | ||||
| 								uploadDoc(file); | ||||
| 								filesInputElement.value = ''; | ||||
| 							} | ||||
|  |  | |||
|  | @ -21,9 +21,11 @@ export const SUPPORTED_FILE_TYPE = [ | |||
| 	'application/vnd.openxmlformats-officedocument.wordprocessingml.document', | ||||
| 	'application/octet-stream', | ||||
| 	'application/x-javascript', | ||||
| 	'text/markdown', | ||||
| 	'text/markdown' | ||||
| ]; | ||||
| 
 | ||||
| export const SUPPORTED_FILE_EXTENSIONS = ['md', 'rst']; | ||||
| 
 | ||||
| // Source: https://kit.svelte.dev/docs/modules#$env-static-public
 | ||||
| // This feature, akin to $env/static/private, exclusively incorporates environment variables
 | ||||
| // that are prefixed with config.kit.env.publicPrefix (usually set to PUBLIC_).
 | ||||
|  |  | |||
|  | @ -7,7 +7,7 @@ | |||
| 	import { documents } from '$lib/stores'; | ||||
| 	import { createNewDoc, deleteDocByName, getDocs } from '$lib/apis/documents'; | ||||
| 
 | ||||
| 	import { SUPPORTED_FILE_TYPE } from '$lib/constants'; | ||||
| 	import { SUPPORTED_FILE_TYPE, SUPPORTED_FILE_EXTENSIONS } from '$lib/constants'; | ||||
| 	import { uploadDocToVectorDB } from '$lib/apis/rag'; | ||||
| 	import { transformFileName } from '$lib/utils'; | ||||
| 
 | ||||
|  | @ -69,11 +69,13 @@ | |||
| 				const file = inputFiles[0]; | ||||
| 				if ( | ||||
| 					SUPPORTED_FILE_TYPE.includes(file['type']) || | ||||
| 					['md'].includes(file.name.split('.').at(-1)) | ||||
| 					SUPPORTED_FILE_EXTENSIONS.includes(file.name.split('.').at(-1)) | ||||
| 				) { | ||||
| 					uploadDoc(file); | ||||
| 				} else { | ||||
| 					toast.error(`Unknown File Type '${file['type']}', but accepting and treating as plain text`); | ||||
| 					toast.error( | ||||
| 						`Unknown File Type '${file['type']}', but accepting and treating as plain text` | ||||
| 					); | ||||
| 					uploadDoc(file); | ||||
| 				} | ||||
| 			} else { | ||||
|  | @ -150,11 +152,13 @@ | |||
| 						const file = inputFiles[0]; | ||||
| 						if ( | ||||
| 							SUPPORTED_FILE_TYPE.includes(file['type']) || | ||||
| 							['md'].includes(file.name.split('.').at(-1)) | ||||
| 							SUPPORTED_FILE_EXTENSIONS.includes(file.name.split('.').at(-1)) | ||||
| 						) { | ||||
| 							uploadDoc(file); | ||||
| 						} else { | ||||
| 							toast.error(`Unknown File Type '${file['type']}', but accepting and treating as plain text`); | ||||
| 							toast.error( | ||||
| 								`Unknown File Type '${file['type']}', but accepting and treating as plain text` | ||||
| 							); | ||||
| 							uploadDoc(file); | ||||
| 						} | ||||
| 
 | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Timothy Jaeryang Baek
						Timothy Jaeryang Baek