forked from open-webui/open-webui
Merge pull request #466 from baumandm/feat/epub-support
feat: Add epub support
This commit is contained in:
commit
7054f02891
6 changed files with 37 additions and 11 deletions
|
@ -45,6 +45,13 @@ COPY ./backend/requirements.txt ./requirements.txt
|
|||
RUN pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu --no-cache-dir
|
||||
RUN pip3 install -r requirements.txt --no-cache-dir
|
||||
|
||||
|
||||
# Install pandoc
|
||||
# RUN python -c "import pypandoc; pypandoc.download_pandoc()"
|
||||
RUN apt-get update \
|
||||
&& apt-get install -y pandoc \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# RUN python -c "from sentence_transformers import SentenceTransformer; model = SentenceTransformer('all-MiniLM-L6-v2')"
|
||||
|
||||
COPY ./backend .
|
||||
|
|
|
@ -19,6 +19,7 @@ from langchain_community.document_loaders import (
|
|||
PyPDFLoader,
|
||||
CSVLoader,
|
||||
Docx2txtLoader,
|
||||
UnstructuredEPubLoader,
|
||||
UnstructuredWordDocumentLoader,
|
||||
UnstructuredMarkdownLoader,
|
||||
UnstructuredXMLLoader,
|
||||
|
@ -187,6 +188,8 @@ def store_doc(
|
|||
loader = TextLoader(file_path)
|
||||
elif file_ext in octet_markdown:
|
||||
loader = UnstructuredMarkdownLoader(file_path)
|
||||
elif file.content_type == "application/epub+zip":
|
||||
loader = UnstructuredEPubLoader(file_path)
|
||||
else:
|
||||
loader = TextLoader(file_path)
|
||||
known_type=False
|
||||
|
@ -209,6 +212,12 @@ def store_doc(
|
|||
)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
if "No pandoc was found" in str(e):
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
detail=ERROR_MESSAGES.PANDOC_NOT_INSTALLED,
|
||||
)
|
||||
else:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
detail=ERROR_MESSAGES.DEFAULT(e),
|
||||
|
|
|
@ -42,3 +42,5 @@ class ERROR_MESSAGES(str, Enum):
|
|||
USER_NOT_FOUND = "We could not find what you're looking for :/"
|
||||
API_KEY_NOT_FOUND = "Oops! It looks like there's a hiccup. The API key is missing. Please make sure to provide a valid API key to access this feature."
|
||||
MALICIOUS = "Unusual activities detected, please try again in a few minutes."
|
||||
|
||||
PANDOC_NOT_INSTALLED = "Pandoc is not installed on the server. Please contact your administrator for assistance."
|
||||
|
|
|
@ -24,6 +24,7 @@ pypdf
|
|||
docx2txt
|
||||
unstructured
|
||||
markdown
|
||||
pypandoc
|
||||
|
||||
PyJWT
|
||||
pyjwt[crypto]
|
||||
|
|
|
@ -121,6 +121,7 @@
|
|||
error: ''
|
||||
};
|
||||
|
||||
try {
|
||||
files = [...files, doc];
|
||||
const res = await uploadDocToVectorDB(localStorage.token, '', file);
|
||||
|
||||
|
@ -129,6 +130,11 @@
|
|||
doc.collection_name = res.collection_name;
|
||||
files = files;
|
||||
}
|
||||
} catch (e) {
|
||||
// Remove the failed doc from the files array
|
||||
files = files.filter((f) => f.name !== file.name);
|
||||
toast.error(e);
|
||||
}
|
||||
};
|
||||
|
||||
onMount(() => {
|
||||
|
|
|
@ -12,6 +12,7 @@ export const WEB_UI_VERSION = 'v1.0.0-alpha-static';
|
|||
export const REQUIRED_OLLAMA_VERSION = '0.1.16';
|
||||
|
||||
export const SUPPORTED_FILE_TYPE = [
|
||||
'application/epub+zip',
|
||||
'application/pdf',
|
||||
'text/plain',
|
||||
'text/csv',
|
||||
|
|
Loading…
Reference in a new issue