forked from open-webui/open-webui
Merge pull request #547 from Marclass/main
feat: Add reStructuredText specific parser for RAG
This commit is contained in:
commit
d517a3ebb4
4 changed files with 26 additions and 13 deletions
|
@ -22,6 +22,7 @@ from langchain_community.document_loaders import (
|
|||
UnstructuredWordDocumentLoader,
|
||||
UnstructuredMarkdownLoader,
|
||||
UnstructuredXMLLoader,
|
||||
UnstructuredRSTLoader,
|
||||
)
|
||||
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
||||
from langchain_community.vectorstores import Chroma
|
||||
|
@ -178,6 +179,8 @@ def store_doc(
|
|||
loader = Docx2txtLoader(file_path)
|
||||
elif file_ext=="csv":
|
||||
loader = CSVLoader(file_path)
|
||||
elif file_ext=="rst":
|
||||
loader = UnstructuredRSTLoader(file_path, mode="elements")
|
||||
elif file_ext in text_xml:
|
||||
loader=UnstructuredXMLLoader(file_path)
|
||||
elif file_ext in known_source_ext or file.content_type.find("text/")>=0:
|
||||
|
|
|
@ -8,7 +8,7 @@
|
|||
import Suggestions from './MessageInput/Suggestions.svelte';
|
||||
import { uploadDocToVectorDB } from '$lib/apis/rag';
|
||||
import AddFilesPlaceholder from '../AddFilesPlaceholder.svelte';
|
||||
import { SUPPORTED_FILE_TYPE } from '$lib/constants';
|
||||
import { SUPPORTED_FILE_TYPE, SUPPORTED_FILE_EXTENSIONS } from '$lib/constants';
|
||||
import Documents from './MessageInput/Documents.svelte';
|
||||
import Models from './MessageInput/Models.svelte';
|
||||
|
||||
|
@ -169,11 +169,13 @@
|
|||
reader.readAsDataURL(file);
|
||||
} else if (
|
||||
SUPPORTED_FILE_TYPE.includes(file['type']) ||
|
||||
['md'].includes(file.name.split('.').at(-1))
|
||||
SUPPORTED_FILE_EXTENSIONS.includes(file.name.split('.').at(-1))
|
||||
) {
|
||||
uploadDoc(file);
|
||||
} else {
|
||||
toast.error(`Unknown File Type '${file['type']}', but accepting and treating as plain text`);
|
||||
toast.error(
|
||||
`Unknown File Type '${file['type']}', but accepting and treating as plain text`
|
||||
);
|
||||
uploadDoc(file);
|
||||
}
|
||||
} else {
|
||||
|
@ -304,12 +306,14 @@
|
|||
reader.readAsDataURL(file);
|
||||
} else if (
|
||||
SUPPORTED_FILE_TYPE.includes(file['type']) ||
|
||||
['md'].includes(file.name.split('.').at(-1))
|
||||
SUPPORTED_FILE_EXTENSIONS.includes(file.name.split('.').at(-1))
|
||||
) {
|
||||
uploadDoc(file);
|
||||
filesInputElement.value = '';
|
||||
} else {
|
||||
toast.error(`Unknown File Type '${file['type']}', but accepting and treating as plain text`);
|
||||
toast.error(
|
||||
`Unknown File Type '${file['type']}', but accepting and treating as plain text`
|
||||
);
|
||||
uploadDoc(file);
|
||||
filesInputElement.value = '';
|
||||
}
|
||||
|
|
|
@ -21,9 +21,11 @@ export const SUPPORTED_FILE_TYPE = [
|
|||
'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
|
||||
'application/octet-stream',
|
||||
'application/x-javascript',
|
||||
'text/markdown',
|
||||
'text/markdown'
|
||||
];
|
||||
|
||||
export const SUPPORTED_FILE_EXTENSIONS = ['md', 'rst'];
|
||||
|
||||
// Source: https://kit.svelte.dev/docs/modules#$env-static-public
|
||||
// This feature, akin to $env/static/private, exclusively incorporates environment variables
|
||||
// that are prefixed with config.kit.env.publicPrefix (usually set to PUBLIC_).
|
||||
|
|
|
@ -7,7 +7,7 @@
|
|||
import { documents } from '$lib/stores';
|
||||
import { createNewDoc, deleteDocByName, getDocs } from '$lib/apis/documents';
|
||||
|
||||
import { SUPPORTED_FILE_TYPE } from '$lib/constants';
|
||||
import { SUPPORTED_FILE_TYPE, SUPPORTED_FILE_EXTENSIONS } from '$lib/constants';
|
||||
import { uploadDocToVectorDB } from '$lib/apis/rag';
|
||||
import { transformFileName } from '$lib/utils';
|
||||
|
||||
|
@ -69,11 +69,13 @@
|
|||
const file = inputFiles[0];
|
||||
if (
|
||||
SUPPORTED_FILE_TYPE.includes(file['type']) ||
|
||||
['md'].includes(file.name.split('.').at(-1))
|
||||
SUPPORTED_FILE_EXTENSIONS.includes(file.name.split('.').at(-1))
|
||||
) {
|
||||
uploadDoc(file);
|
||||
} else {
|
||||
toast.error(`Unknown File Type '${file['type']}', but accepting and treating as plain text`);
|
||||
toast.error(
|
||||
`Unknown File Type '${file['type']}', but accepting and treating as plain text`
|
||||
);
|
||||
uploadDoc(file);
|
||||
}
|
||||
} else {
|
||||
|
@ -150,11 +152,13 @@
|
|||
const file = inputFiles[0];
|
||||
if (
|
||||
SUPPORTED_FILE_TYPE.includes(file['type']) ||
|
||||
['md'].includes(file.name.split('.').at(-1))
|
||||
SUPPORTED_FILE_EXTENSIONS.includes(file.name.split('.').at(-1))
|
||||
) {
|
||||
uploadDoc(file);
|
||||
} else {
|
||||
toast.error(`Unknown File Type '${file['type']}', but accepting and treating as plain text`);
|
||||
toast.error(
|
||||
`Unknown File Type '${file['type']}', but accepting and treating as plain text`
|
||||
);
|
||||
uploadDoc(file);
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in a new issue