forked from open-webui/open-webui
feat: rag csv support
This commit is contained in:
parent
d6a1bf1406
commit
d4b2578f6e
2 changed files with 11 additions and 6 deletions
|
@ -13,7 +13,12 @@ import os, shutil
|
||||||
|
|
||||||
# from chromadb.utils import embedding_functions
|
# from chromadb.utils import embedding_functions
|
||||||
|
|
||||||
from langchain_community.document_loaders import WebBaseLoader, TextLoader, PyPDFLoader
|
from langchain_community.document_loaders import (
|
||||||
|
WebBaseLoader,
|
||||||
|
TextLoader,
|
||||||
|
PyPDFLoader,
|
||||||
|
CSVLoader,
|
||||||
|
)
|
||||||
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
||||||
from langchain_community.vectorstores import Chroma
|
from langchain_community.vectorstores import Chroma
|
||||||
from langchain.chains import RetrievalQA
|
from langchain.chains import RetrievalQA
|
||||||
|
@ -129,7 +134,7 @@ def store_doc(
|
||||||
):
|
):
|
||||||
# "https://www.gutenberg.org/files/1727/1727-h/1727-h.htm"
|
# "https://www.gutenberg.org/files/1727/1727-h/1727-h.htm"
|
||||||
|
|
||||||
if file.content_type not in ["application/pdf", "text/plain"]:
|
if file.content_type not in ["application/pdf", "text/plain", "text/csv"]:
|
||||||
raise HTTPException(
|
raise HTTPException(
|
||||||
status_code=status.HTTP_400_BAD_REQUEST,
|
status_code=status.HTTP_400_BAD_REQUEST,
|
||||||
detail=ERROR_MESSAGES.FILE_NOT_SUPPORTED,
|
detail=ERROR_MESSAGES.FILE_NOT_SUPPORTED,
|
||||||
|
@ -152,6 +157,8 @@ def store_doc(
|
||||||
loader = PyPDFLoader(file_path)
|
loader = PyPDFLoader(file_path)
|
||||||
elif file.content_type == "text/plain":
|
elif file.content_type == "text/plain":
|
||||||
loader = TextLoader(file_path)
|
loader = TextLoader(file_path)
|
||||||
|
elif file.content_type == "text/csv":
|
||||||
|
loader = CSVLoader(file_path)
|
||||||
|
|
||||||
data = loader.load()
|
data = loader.load()
|
||||||
result = store_data_in_vector_db(data, collection_name)
|
result = store_data_in_vector_db(data, collection_name)
|
||||||
|
|
|
@ -122,9 +122,8 @@
|
||||||
const file = inputFiles[0];
|
const file = inputFiles[0];
|
||||||
if (['image/gif', 'image/jpeg', 'image/png'].includes(file['type'])) {
|
if (['image/gif', 'image/jpeg', 'image/png'].includes(file['type'])) {
|
||||||
reader.readAsDataURL(file);
|
reader.readAsDataURL(file);
|
||||||
} else if (['application/pdf', 'text/plain'].includes(file['type'])) {
|
} else if (['application/pdf', 'text/plain', 'text/csv'].includes(file['type'])) {
|
||||||
console.log(file);
|
console.log(file);
|
||||||
// const hash = (await calculateSHA256(file)).substring(0, 63);
|
|
||||||
const res = await uploadDocToVectorDB(localStorage.token, '', file);
|
const res = await uploadDocToVectorDB(localStorage.token, '', file);
|
||||||
|
|
||||||
if (res) {
|
if (res) {
|
||||||
|
@ -241,9 +240,8 @@
|
||||||
const file = inputFiles[0];
|
const file = inputFiles[0];
|
||||||
if (['image/gif', 'image/jpeg', 'image/png'].includes(file['type'])) {
|
if (['image/gif', 'image/jpeg', 'image/png'].includes(file['type'])) {
|
||||||
reader.readAsDataURL(file);
|
reader.readAsDataURL(file);
|
||||||
} else if (['application/pdf', 'text/plain'].includes(file['type'])) {
|
} else if (['application/pdf', 'text/plain', 'text/csv'].includes(file['type'])) {
|
||||||
console.log(file);
|
console.log(file);
|
||||||
// const hash = (await calculateSHA256(file)).substring(0, 63);
|
|
||||||
const res = await uploadDocToVectorDB(localStorage.token, '', file);
|
const res = await uploadDocToVectorDB(localStorage.token, '', file);
|
||||||
|
|
||||||
if (res) {
|
if (res) {
|
||||||
|
|
Loading…
Reference in a new issue