diff --git a/backend/apps/rag/main.py b/backend/apps/rag/main.py index f147152b..a3e3c113 100644 --- a/backend/apps/rag/main.py +++ b/backend/apps/rag/main.py @@ -28,6 +28,7 @@ from langchain_community.document_loaders import ( UnstructuredXMLLoader, UnstructuredRSTLoader, UnstructuredExcelLoader, + YoutubeLoader, ) from langchain.text_splitter import RecursiveCharacterTextSplitter @@ -181,7 +182,7 @@ class CollectionNameForm(BaseModel): collection_name: Optional[str] = "test" -class StoreWebForm(CollectionNameForm): +class UrlForm(CollectionNameForm): url: str @@ -456,8 +457,32 @@ def query_collection_handler( ) +@app.post("/youtube") +def store_youtube_video(form_data: UrlForm, user=Depends(get_current_user)): + try: + loader = YoutubeLoader.from_youtube_url(form_data.url, add_video_info=False) + data = loader.load() + + collection_name = form_data.collection_name + if collection_name == "": + collection_name = calculate_sha256_string(form_data.url)[:63] + + store_data_in_vector_db(data, collection_name, overwrite=True) + return { + "status": True, + "collection_name": collection_name, + "filename": form_data.url, + } + except Exception as e: + log.exception(e) + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=ERROR_MESSAGES.DEFAULT(e), + ) + + @app.post("/web") -def store_web(form_data: StoreWebForm, user=Depends(get_current_user)): +def store_web(form_data: UrlForm, user=Depends(get_current_user)): # "https://www.gutenberg.org/files/1727/1727-h/1727-h.htm" try: loader = get_web_loader(form_data.url) diff --git a/backend/requirements.txt b/backend/requirements.txt index 79eddeae..ce01cf50 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -59,3 +59,4 @@ PyJWT[crypto]==2.8.0 black==24.4.2 langfuse==2.27.3 +youtube-transcript-api diff --git a/src/lib/apis/rag/index.ts b/src/lib/apis/rag/index.ts index 5dfa3d3a..a9d163f8 100644 --- a/src/lib/apis/rag/index.ts +++ b/src/lib/apis/rag/index.ts @@ -221,6 +221,37 @@ export const uploadWebToVectorDB = async (token: string, collection_name: string return res; }; +export const uploadYoutubeTranscriptionToVectorDB = async (token: string, url: string) => { + let error = null; + + const res = await fetch(`${RAG_API_BASE_URL}/youtube`, { + method: 'POST', + headers: { + Accept: 'application/json', + 'Content-Type': 'application/json', + authorization: `Bearer ${token}` + }, + body: JSON.stringify({ + url: url + }) + }) + .then(async (res) => { + if (!res.ok) throw await res.json(); + return res.json(); + }) + .catch((err) => { + error = err.detail; + console.log(err); + return null; + }); + + if (error) { + throw error; + } + + return res; +}; + export const queryDoc = async ( token: string, collection_name: string, diff --git a/src/lib/components/admin/AddUserModal.svelte b/src/lib/components/admin/AddUserModal.svelte new file mode 100644 index 00000000..e69de29b diff --git a/src/lib/components/chat/MessageInput.svelte b/src/lib/components/chat/MessageInput.svelte index 30380e94..2a9c579f 100644 --- a/src/lib/components/chat/MessageInput.svelte +++ b/src/lib/components/chat/MessageInput.svelte @@ -6,7 +6,11 @@ import Prompts from './MessageInput/PromptCommands.svelte'; import Suggestions from './MessageInput/Suggestions.svelte'; - import { uploadDocToVectorDB, uploadWebToVectorDB } from '$lib/apis/rag'; + import { + uploadDocToVectorDB, + uploadWebToVectorDB, + uploadYoutubeTranscriptionToVectorDB + } from '$lib/apis/rag'; import AddFilesPlaceholder from '../AddFilesPlaceholder.svelte'; import { SUPPORTED_FILE_TYPE, SUPPORTED_FILE_EXTENSIONS } from '$lib/constants'; import Documents from './MessageInput/Documents.svelte'; @@ -290,6 +294,34 @@ } }; + const uploadYoutubeTranscription = async (url) => { + console.log(url); + + const doc = { + type: 'doc', + name: url, + collection_name: '', + upload_status: false, + url: url, + error: '' + }; + + try { + files = [...files, doc]; + const res = await uploadYoutubeTranscriptionToVectorDB(localStorage.token, url); + + if (res) { + doc.upload_status = true; + doc.collection_name = res.collection_name; + files = files; + } + } catch (e) { + // Remove the failed doc from the files array + files = files.filter((f) => f.name !== url); + toast.error(e); + } + }; + onMount(() => { console.log(document.getElementById('sidebar')); window.setTimeout(() => chatTextAreaElement?.focus(), 0); @@ -428,6 +460,10 @@ { + console.log(e); + uploadYoutubeTranscription(e.detail); + }} on:url={(e) => { console.log(e); uploadWeb(e.detail); diff --git a/src/lib/components/chat/MessageInput/Documents.svelte b/src/lib/components/chat/MessageInput/Documents.svelte index 3d358acc..37fb672c 100644 --- a/src/lib/components/chat/MessageInput/Documents.svelte +++ b/src/lib/components/chat/MessageInput/Documents.svelte @@ -87,6 +87,17 @@ chatInputElement?.focus(); await tick(); }; + + const confirmSelectYoutube = async (url) => { + dispatch('youtube', url); + + prompt = removeFirstHashWord(prompt); + const chatInputElement = document.getElementById('chat-textarea'); + + await tick(); + chatInputElement?.focus(); + await tick(); + }; {#if filteredItems.length > 0 || prompt.split(' ')?.at(0)?.substring(1).startsWith('http')} @@ -132,7 +143,30 @@ {/each} - {#if prompt.split(' ')?.at(0)?.substring(1).startsWith('http')} + {#if prompt.split(' ')?.at(0)?.substring(1).startsWith('https://www.youtube.com')} + + {:else if prompt.split(' ')?.at(0)?.substring(1).startsWith('http')} + + + -
+
Overview

-
-
+
+
{$i18n.t('All Users')}
@@ -147,12 +150,32 @@ >
-
+
+ +
+ +