Merge branch 'main' into main

This commit is contained in:
Timothy Jaeryang Baek 2024-01-25 00:13:12 -08:00 committed by GitHub
commit fa5918ad13
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 54 additions and 26 deletions

View file

@ -1,4 +1,7 @@
.github
.DS_Store .DS_Store
docs
kubernetes
node_modules node_modules
/.svelte-kit /.svelte-kit
/package /package

View file

@ -26,7 +26,21 @@ ENV OPENAI_API_KEY ""
ENV WEBUI_JWT_SECRET_KEY "SECRET_KEY" ENV WEBUI_JWT_SECRET_KEY "SECRET_KEY"
WORKDIR /app WORKDIR /app/backend
# install python dependencies
COPY ./backend/requirements.txt ./requirements.txt
RUN pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu --no-cache-dir
RUN pip3 install -r requirements.txt --no-cache-dir
# Install pandoc
# RUN python -c "import pypandoc; pypandoc.download_pandoc()"
RUN apt-get update \
&& apt-get install -y pandoc \
&& rm -rf /var/lib/apt/lists/*
# RUN python -c "from sentence_transformers import SentenceTransformer; model = SentenceTransformer('all-MiniLM-L6-v2')"
# copy embedding weight from build # copy embedding weight from build
RUN mkdir -p /root/.cache/chroma/onnx_models/all-MiniLM-L6-v2 RUN mkdir -p /root/.cache/chroma/onnx_models/all-MiniLM-L6-v2
@ -38,15 +52,7 @@ RUN cd /root/.cache/chroma/onnx_models/all-MiniLM-L6-v2 &&\
# copy built frontend files # copy built frontend files
COPY --from=build /app/build /app/build COPY --from=build /app/build /app/build
WORKDIR /app/backend # copy backend files
COPY ./backend/requirements.txt ./requirements.txt
RUN pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu --no-cache-dir
RUN pip3 install -r requirements.txt --no-cache-dir
# RUN python -c "from sentence_transformers import SentenceTransformer; model = SentenceTransformer('all-MiniLM-L6-v2')"
COPY ./backend . COPY ./backend .
CMD [ "sh", "start.sh"] CMD [ "sh", "start.sh"]

View file

@ -19,6 +19,7 @@ from langchain_community.document_loaders import (
PyPDFLoader, PyPDFLoader,
CSVLoader, CSVLoader,
Docx2txtLoader, Docx2txtLoader,
UnstructuredEPubLoader,
UnstructuredWordDocumentLoader, UnstructuredWordDocumentLoader,
UnstructuredMarkdownLoader, UnstructuredMarkdownLoader,
UnstructuredXMLLoader, UnstructuredXMLLoader,
@ -193,6 +194,8 @@ def store_doc(
loader = TextLoader(file_path) loader = TextLoader(file_path)
elif file_ext in octet_markdown: elif file_ext in octet_markdown:
loader = UnstructuredMarkdownLoader(file_path) loader = UnstructuredMarkdownLoader(file_path)
elif file.content_type == "application/epub+zip":
loader = UnstructuredEPubLoader(file_path)
else: else:
loader = TextLoader(file_path) loader = TextLoader(file_path)
known_type=False known_type=False
@ -215,6 +218,12 @@ def store_doc(
) )
except Exception as e: except Exception as e:
print(e) print(e)
if "No pandoc was found" in str(e):
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=ERROR_MESSAGES.PANDOC_NOT_INSTALLED,
)
else:
raise HTTPException( raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST, status_code=status.HTTP_400_BAD_REQUEST,
detail=ERROR_MESSAGES.DEFAULT(e), detail=ERROR_MESSAGES.DEFAULT(e),

View file

@ -42,3 +42,5 @@ class ERROR_MESSAGES(str, Enum):
USER_NOT_FOUND = "We could not find what you're looking for :/" USER_NOT_FOUND = "We could not find what you're looking for :/"
API_KEY_NOT_FOUND = "Oops! It looks like there's a hiccup. The API key is missing. Please make sure to provide a valid API key to access this feature." API_KEY_NOT_FOUND = "Oops! It looks like there's a hiccup. The API key is missing. Please make sure to provide a valid API key to access this feature."
MALICIOUS = "Unusual activities detected, please try again in a few minutes." MALICIOUS = "Unusual activities detected, please try again in a few minutes."
PANDOC_NOT_INSTALLED = "Pandoc is not installed on the server. Please contact your administrator for assistance."

View file

@ -24,13 +24,13 @@ pypdf
docx2txt docx2txt
unstructured unstructured
markdown markdown
pypandoc
pandas
openpyxl
pyxlsb
xlrd
PyJWT PyJWT
pyjwt[crypto] pyjwt[crypto]
black black
pandas
openpyxl
pyxlsb
xlrd

View file

@ -121,6 +121,7 @@
error: '' error: ''
}; };
try {
files = [...files, doc]; files = [...files, doc];
const res = await uploadDocToVectorDB(localStorage.token, '', file); const res = await uploadDocToVectorDB(localStorage.token, '', file);
@ -129,6 +130,11 @@
doc.collection_name = res.collection_name; doc.collection_name = res.collection_name;
files = files; files = files;
} }
} catch (e) {
// Remove the failed doc from the files array
files = files.filter((f) => f.name !== file.name);
toast.error(e);
}
}; };
onMount(() => { onMount(() => {

View file

@ -321,8 +321,9 @@
return true; return true;
} else { } else {
let title = chat.title.toLowerCase(); let title = chat.title.toLowerCase();
const query = search.toLowerCase();
if (title.includes(search)) { if (title.includes(query)) {
return true; return true;
} else { } else {
return false; return false;

View file

@ -12,6 +12,7 @@ export const WEB_UI_VERSION = 'v1.0.0-alpha-static';
export const REQUIRED_OLLAMA_VERSION = '0.1.16'; export const REQUIRED_OLLAMA_VERSION = '0.1.16';
export const SUPPORTED_FILE_TYPE = [ export const SUPPORTED_FILE_TYPE = [
'application/epub+zip',
'application/pdf', 'application/pdf',
'text/plain', 'text/plain',
'text/csv', 'text/csv',