forked from open-webui/open-webui
		
	
						commit
						1633ba4443
					
				
					 4 changed files with 23 additions and 1 deletions
				
			
		|  | @ -19,6 +19,8 @@ from langchain_community.document_loaders import ( | ||||||
|     PyPDFLoader, |     PyPDFLoader, | ||||||
|     CSVLoader, |     CSVLoader, | ||||||
|     Docx2txtLoader, |     Docx2txtLoader, | ||||||
|  |     UnstructuredWordDocumentLoader, | ||||||
|  |     UnstructuredMarkdownLoader, | ||||||
| ) | ) | ||||||
| from langchain.text_splitter import RecursiveCharacterTextSplitter | from langchain.text_splitter import RecursiveCharacterTextSplitter | ||||||
| from langchain_community.vectorstores import Chroma | from langchain_community.vectorstores import Chroma | ||||||
|  | @ -140,17 +142,27 @@ def store_doc( | ||||||
| ): | ): | ||||||
|     # "https://www.gutenberg.org/files/1727/1727-h/1727-h.htm" |     # "https://www.gutenberg.org/files/1727/1727-h/1727-h.htm" | ||||||
| 
 | 
 | ||||||
|  |     print(file.content_type) | ||||||
|     if file.content_type not in [ |     if file.content_type not in [ | ||||||
|         "application/pdf", |         "application/pdf", | ||||||
|         "text/plain", |         "text/plain", | ||||||
|         "text/csv", |         "text/csv", | ||||||
|         "application/vnd.openxmlformats-officedocument.wordprocessingml.document", |         "application/vnd.openxmlformats-officedocument.wordprocessingml.document", | ||||||
|  |         "application/octet-stream", | ||||||
|     ]: |     ]: | ||||||
|         raise HTTPException( |         raise HTTPException( | ||||||
|             status_code=status.HTTP_400_BAD_REQUEST, |             status_code=status.HTTP_400_BAD_REQUEST, | ||||||
|             detail=ERROR_MESSAGES.FILE_NOT_SUPPORTED, |             detail=ERROR_MESSAGES.FILE_NOT_SUPPORTED, | ||||||
|         ) |         ) | ||||||
| 
 | 
 | ||||||
|  |     if file.content_type == "application/octet-stream" and file.filename.split(".")[ | ||||||
|  |         -1 | ||||||
|  |     ] not in ["md"]: | ||||||
|  |         raise HTTPException( | ||||||
|  |             status_code=status.HTTP_400_BAD_REQUEST, | ||||||
|  |             detail=ERROR_MESSAGES.FILE_NOT_SUPPORTED, | ||||||
|  |         ) | ||||||
|  | 
 | ||||||
|     try: |     try: | ||||||
|         filename = file.filename |         filename = file.filename | ||||||
|         file_path = f"{UPLOAD_DIR}/{filename}" |         file_path = f"{UPLOAD_DIR}/{filename}" | ||||||
|  | @ -175,6 +187,9 @@ def store_doc( | ||||||
|             loader = TextLoader(file_path) |             loader = TextLoader(file_path) | ||||||
|         elif file.content_type == "text/csv": |         elif file.content_type == "text/csv": | ||||||
|             loader = CSVLoader(file_path) |             loader = CSVLoader(file_path) | ||||||
|  |         elif file.content_type == "application/octet-stream": | ||||||
|  |             if file.filename.split(".")[-1] == "md": | ||||||
|  |                 loader = UnstructuredMarkdownLoader(file_path) | ||||||
| 
 | 
 | ||||||
|         data = loader.load() |         data = loader.load() | ||||||
|         result = store_data_in_vector_db(data, collection_name) |         result = store_data_in_vector_db(data, collection_name) | ||||||
|  |  | ||||||
|  | @ -22,6 +22,8 @@ chromadb | ||||||
| sentence_transformers | sentence_transformers | ||||||
| pypdf | pypdf | ||||||
| docx2txt | docx2txt | ||||||
|  | unstructured | ||||||
|  | markdown | ||||||
| 
 | 
 | ||||||
| PyJWT | PyJWT | ||||||
| pyjwt[crypto] | pyjwt[crypto] | ||||||
|  |  | ||||||
|  | @ -149,9 +149,13 @@ | ||||||
| 
 | 
 | ||||||
| 				if (inputFiles && inputFiles.length > 0) { | 				if (inputFiles && inputFiles.length > 0) { | ||||||
| 					const file = inputFiles[0]; | 					const file = inputFiles[0]; | ||||||
|  | 					console.log(file, file.name.split('.').at(-1)); | ||||||
| 					if (['image/gif', 'image/jpeg', 'image/png'].includes(file['type'])) { | 					if (['image/gif', 'image/jpeg', 'image/png'].includes(file['type'])) { | ||||||
| 						reader.readAsDataURL(file); | 						reader.readAsDataURL(file); | ||||||
| 					} else if (SUPPORTED_FILE_TYPE.includes(file['type'])) { | 					} else if ( | ||||||
|  | 						SUPPORTED_FILE_TYPE.includes(file['type']) || | ||||||
|  | 						['md'].includes(file.name.split('.').at(-1)) | ||||||
|  | 					) { | ||||||
| 						uploadDoc(file); | 						uploadDoc(file); | ||||||
| 					} else { | 					} else { | ||||||
| 						toast.error(`Unsupported File Type '${file['type']}'.`); | 						toast.error(`Unsupported File Type '${file['type']}'.`); | ||||||
|  |  | ||||||
|  | @ -14,6 +14,7 @@ export const REQUIRED_OLLAMA_VERSION = '0.1.16'; | ||||||
| export const SUPPORTED_FILE_TYPE = [ | export const SUPPORTED_FILE_TYPE = [ | ||||||
| 	'application/pdf', | 	'application/pdf', | ||||||
| 	'application/vnd.openxmlformats-officedocument.wordprocessingml.document', | 	'application/vnd.openxmlformats-officedocument.wordprocessingml.document', | ||||||
|  | 	'text/markdown', | ||||||
| 	'text/plain', | 	'text/plain', | ||||||
| 	'text/csv' | 	'text/csv' | ||||||
| ]; | ]; | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Timothy Jaeryang Baek
						Timothy Jaeryang Baek