forked from open-webui/open-webui
commit
b1d9d511a1
3 changed files with 29 additions and 4 deletions
|
@ -18,6 +18,7 @@ from langchain_community.document_loaders import (
|
||||||
TextLoader,
|
TextLoader,
|
||||||
PyPDFLoader,
|
PyPDFLoader,
|
||||||
CSVLoader,
|
CSVLoader,
|
||||||
|
Docx2txtLoader,
|
||||||
)
|
)
|
||||||
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
||||||
from langchain_community.vectorstores import Chroma
|
from langchain_community.vectorstores import Chroma
|
||||||
|
@ -135,7 +136,12 @@ def store_doc(
|
||||||
):
|
):
|
||||||
# "https://www.gutenberg.org/files/1727/1727-h/1727-h.htm"
|
# "https://www.gutenberg.org/files/1727/1727-h/1727-h.htm"
|
||||||
|
|
||||||
if file.content_type not in ["application/pdf", "text/plain", "text/csv"]:
|
if file.content_type not in [
|
||||||
|
"application/pdf",
|
||||||
|
"text/plain",
|
||||||
|
"text/csv",
|
||||||
|
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
||||||
|
]:
|
||||||
raise HTTPException(
|
raise HTTPException(
|
||||||
status_code=status.HTTP_400_BAD_REQUEST,
|
status_code=status.HTTP_400_BAD_REQUEST,
|
||||||
detail=ERROR_MESSAGES.FILE_NOT_SUPPORTED,
|
detail=ERROR_MESSAGES.FILE_NOT_SUPPORTED,
|
||||||
|
@ -156,6 +162,11 @@ def store_doc(
|
||||||
|
|
||||||
if file.content_type == "application/pdf":
|
if file.content_type == "application/pdf":
|
||||||
loader = PyPDFLoader(file_path)
|
loader = PyPDFLoader(file_path)
|
||||||
|
elif (
|
||||||
|
file.content_type
|
||||||
|
== "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
|
||||||
|
):
|
||||||
|
loader = Docx2txtLoader(file_path)
|
||||||
elif file.content_type == "text/plain":
|
elif file.content_type == "text/plain":
|
||||||
loader = TextLoader(file_path)
|
loader = TextLoader(file_path)
|
||||||
elif file.content_type == "text/csv":
|
elif file.content_type == "text/csv":
|
||||||
|
|
|
@ -16,12 +16,12 @@ aiohttp
|
||||||
peewee
|
peewee
|
||||||
bcrypt
|
bcrypt
|
||||||
|
|
||||||
|
|
||||||
langchain
|
langchain
|
||||||
langchain-community
|
langchain-community
|
||||||
chromadb
|
chromadb
|
||||||
sentence_transformers
|
sentence_transformers
|
||||||
pypdf
|
pypdf
|
||||||
|
docx2txt
|
||||||
|
|
||||||
PyJWT
|
PyJWT
|
||||||
pyjwt[crypto]
|
pyjwt[crypto]
|
||||||
|
|
|
@ -143,7 +143,14 @@
|
||||||
const file = inputFiles[0];
|
const file = inputFiles[0];
|
||||||
if (['image/gif', 'image/jpeg', 'image/png'].includes(file['type'])) {
|
if (['image/gif', 'image/jpeg', 'image/png'].includes(file['type'])) {
|
||||||
reader.readAsDataURL(file);
|
reader.readAsDataURL(file);
|
||||||
} else if (['application/pdf', 'text/plain', 'text/csv'].includes(file['type'])) {
|
} else if (
|
||||||
|
[
|
||||||
|
'application/pdf',
|
||||||
|
'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
|
||||||
|
'text/plain',
|
||||||
|
'text/csv'
|
||||||
|
].includes(file['type'])
|
||||||
|
) {
|
||||||
uploadDoc(file);
|
uploadDoc(file);
|
||||||
} else {
|
} else {
|
||||||
toast.error(`Unsupported File Type '${file['type']}'.`);
|
toast.error(`Unsupported File Type '${file['type']}'.`);
|
||||||
|
@ -249,7 +256,14 @@
|
||||||
const file = inputFiles[0];
|
const file = inputFiles[0];
|
||||||
if (['image/gif', 'image/jpeg', 'image/png'].includes(file['type'])) {
|
if (['image/gif', 'image/jpeg', 'image/png'].includes(file['type'])) {
|
||||||
reader.readAsDataURL(file);
|
reader.readAsDataURL(file);
|
||||||
} else if (['application/pdf', 'text/plain', 'text/csv'].includes(file['type'])) {
|
} else if (
|
||||||
|
[
|
||||||
|
'application/pdf',
|
||||||
|
'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
|
||||||
|
'text/plain',
|
||||||
|
'text/csv'
|
||||||
|
].includes(file['type'])
|
||||||
|
) {
|
||||||
uploadDoc(file);
|
uploadDoc(file);
|
||||||
filesInputElement.value = '';
|
filesInputElement.value = '';
|
||||||
} else {
|
} else {
|
||||||
|
|
Loading…
Reference in a new issue