Merge branch 'main' into main

2024-01-25 00:13:12 -08:00 · 2024-01-25 00:13:12 -08:00 · fa5918ad13
commit fa5918ad13
parent 1e932d91cb 39986c4ec8
8 changed files with 54 additions and 26 deletions
--- a/.dockerignore
+++ b/.dockerignore
@ -1,4 +1,7 @@
+.github
 .DS_Store
+docs
+kubernetes
 node_modules
 /.svelte-kit
 /package
--- a/26
+++ b/26
@ -26,7 +26,21 @@ ENV OPENAI_API_KEY ""

 ENV WEBUI_JWT_SECRET_KEY "SECRET_KEY"

-WORKDIR /app
+WORKDIR /app/backend
+
+# install python dependencies
+COPY ./backend/requirements.txt ./requirements.txt
+
+RUN pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu --no-cache-dir
+RUN pip3 install -r requirements.txt --no-cache-dir
+
+# Install pandoc
+# RUN python -c "import pypandoc; pypandoc.download_pandoc()"
+RUN apt-get update \
+    && apt-get install -y pandoc \
+    && rm -rf /var/lib/apt/lists/*
+
+# RUN python -c "from sentence_transformers import SentenceTransformer; model = SentenceTransformer('all-MiniLM-L6-v2')"

 # copy embedding weight from build
 RUN mkdir -p /root/.cache/chroma/onnx_models/all-MiniLM-L6-v2
@ -38,15 +52,7 @@ RUN cd /root/.cache/chroma/onnx_models/all-MiniLM-L6-v2 &&\
 # copy built frontend files
 COPY --from=build /app/build /app/build

-WORKDIR /app/backend
-
-COPY ./backend/requirements.txt ./requirements.txt
-
-RUN pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu --no-cache-dir
-RUN pip3 install -r requirements.txt --no-cache-dir
-
-# RUN python -c "from sentence_transformers import SentenceTransformer; model = SentenceTransformer('all-MiniLM-L6-v2')"
-
+# copy backend files
 COPY ./backend .

 CMD [ "sh", "start.sh"]
--- a/backend/apps/rag/main.py
+++ b/backend/apps/rag/main.py
@ -19,6 +19,7 @@ from langchain_community.document_loaders import (
    PyPDFLoader,
    CSVLoader,
    Docx2txtLoader,
+    UnstructuredEPubLoader,
    UnstructuredWordDocumentLoader,
    UnstructuredMarkdownLoader,
    UnstructuredXMLLoader,
@ -193,6 +194,8 @@ def store_doc(
            loader = TextLoader(file_path)
        elif file_ext in octet_markdown:
            loader = UnstructuredMarkdownLoader(file_path)
+        elif file.content_type == "application/epub+zip":
+            loader = UnstructuredEPubLoader(file_path)
        else:
            loader = TextLoader(file_path)
            known_type=False
@ -215,6 +218,12 @@ def store_doc(
            )
    except Exception as e:
        print(e)
+        if "No pandoc was found" in str(e):
+            raise HTTPException(
+                status_code=status.HTTP_400_BAD_REQUEST,
+                detail=ERROR_MESSAGES.PANDOC_NOT_INSTALLED,
+            )
+        else:
            raise HTTPException(
                status_code=status.HTTP_400_BAD_REQUEST,
                detail=ERROR_MESSAGES.DEFAULT(e),
--- a/backend/constants.py
+++ b/backend/constants.py
@ -42,3 +42,5 @@ class ERROR_MESSAGES(str, Enum):
    USER_NOT_FOUND = "We could not find what you're looking for :/"
    API_KEY_NOT_FOUND = "Oops! It looks like there's a hiccup. The API key is missing. Please make sure to provide a valid API key to access this feature."
    MALICIOUS = "Unusual activities detected, please try again in a few minutes."
+
+    PANDOC_NOT_INSTALLED = "Pandoc is not installed on the server. Please contact your administrator for assistance."
--- a/backend/requirements.txt
+++ b/backend/requirements.txt
@ -24,13 +24,13 @@ pypdf
 docx2txt
 unstructured
 markdown
+pypandoc
+pandas
+openpyxl
+pyxlsb
+xlrd

 PyJWT
 pyjwt[crypto]

 black
-
-pandas
-openpyxl
-pyxlsb
-xlrd
--- a/src/lib/components/chat/MessageInput.svelte
+++ b/src/lib/components/chat/MessageInput.svelte
@ -121,6 +121,7 @@
 			error: ''
 		};

+		try {
 			files = [...files, doc];
 			const res = await uploadDocToVectorDB(localStorage.token, '', file);

@ -129,6 +130,11 @@
 				doc.collection_name = res.collection_name;
 				files = files;
 			}
+		} catch (e) {
+			// Remove the failed doc from the files array
+			files = files.filter((f) => f.name !== file.name);
+			toast.error(e);
+		}
 	};

 	onMount(() => {
--- a/src/lib/components/layout/Sidebar.svelte
+++ b/src/lib/components/layout/Sidebar.svelte
@ -321,8 +321,9 @@
 						return true;
 					} else {
 						let title = chat.title.toLowerCase();
+						const query = search.toLowerCase();

-						if (title.includes(search)) {
+						if (title.includes(query)) {
 							return true;
 						} else {
 							return false;
--- a/src/lib/constants.ts
+++ b/src/lib/constants.ts
@ -12,6 +12,7 @@ export const WEB_UI_VERSION = 'v1.0.0-alpha-static';
 export const REQUIRED_OLLAMA_VERSION = '0.1.16';

 export const SUPPORTED_FILE_TYPE = [
+	'application/epub+zip',
 	'application/pdf',
 	'text/plain',
 	'text/csv',