forked from open-webui/open-webui
		
	feat: hybrid search and reranking support
This commit is contained in:
		
							parent
							
								
									db801aee79
								
							
						
					
					
						commit
						c0259aad67
					
				
					 10 changed files with 262 additions and 131 deletions
				
			
		|  | @ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 | ||||||
| ### Added | ### Added | ||||||
| 
 | 
 | ||||||
| - **🛠️ Improved Embedding Model Support**: You can now use any embedding model `sentence_transformers` supports. | - **🛠️ Improved Embedding Model Support**: You can now use any embedding model `sentence_transformers` supports. | ||||||
|  | - **🌟 Enhanced RAG Pipeline**: Added `BM25` hybrid searching with reranking model support using `sentence_transformers`. | ||||||
| 
 | 
 | ||||||
| ## [0.1.120] - 2024-04-20 | ## [0.1.120] - 2024-04-20 | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -10,7 +10,7 @@ ARG USE_CUDA_VER=cu121 | ||||||
| # for better performance and multilangauge support use "intfloat/multilingual-e5-large" (~2.5GB) or "intfloat/multilingual-e5-base" (~1.5GB) | # for better performance and multilangauge support use "intfloat/multilingual-e5-large" (~2.5GB) or "intfloat/multilingual-e5-base" (~1.5GB) | ||||||
| # IMPORTANT: If you change the embedding model (sentence-transformers/all-MiniLM-L6-v2) and vice versa, you aren't able to use RAG Chat with your previous documents loaded in the WebUI! You need to re-embed them. | # IMPORTANT: If you change the embedding model (sentence-transformers/all-MiniLM-L6-v2) and vice versa, you aren't able to use RAG Chat with your previous documents loaded in the WebUI! You need to re-embed them. | ||||||
| ARG USE_EMBEDDING_MODEL=sentence-transformers/all-MiniLM-L6-v2 | ARG USE_EMBEDDING_MODEL=sentence-transformers/all-MiniLM-L6-v2 | ||||||
| ARG USE_RERANKING_MODEL=BAAI/bge-reranker-base | ARG USE_RERANKING_MODEL="" | ||||||
| 
 | 
 | ||||||
| ######## WebUI frontend ######## | ######## WebUI frontend ######## | ||||||
| FROM --platform=$BUILDPLATFORM node:21-alpine3.19 as build | FROM --platform=$BUILDPLATFORM node:21-alpine3.19 as build | ||||||
|  | @ -67,6 +67,9 @@ ENV WHISPER_MODEL="base" \ | ||||||
| ENV RAG_EMBEDDING_MODEL="$USE_EMBEDDING_MODEL_DOCKER" \ | ENV RAG_EMBEDDING_MODEL="$USE_EMBEDDING_MODEL_DOCKER" \ | ||||||
|     RAG_RERANKING_MODEL="$USE_RERANKING_MODEL_DOCKER" \ |     RAG_RERANKING_MODEL="$USE_RERANKING_MODEL_DOCKER" \ | ||||||
|     SENTENCE_TRANSFORMERS_HOME="/app/backend/data/cache/embedding/models" |     SENTENCE_TRANSFORMERS_HOME="/app/backend/data/cache/embedding/models" | ||||||
|  | 
 | ||||||
|  | ## Hugging Face download cache ## | ||||||
|  | ENV HF_HOME="/app/backend/data/cache/embedding/models" | ||||||
| #### Other models ########################################################## | #### Other models ########################################################## | ||||||
| 
 | 
 | ||||||
| WORKDIR /app/backend | WORKDIR /app/backend | ||||||
|  | @ -102,13 +105,11 @@ RUN pip3 install uv && \ | ||||||
|         pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/$USE_CUDA_DOCKER_VER --no-cache-dir && \ |         pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/$USE_CUDA_DOCKER_VER --no-cache-dir && \ | ||||||
|         uv pip install --system -r requirements.txt --no-cache-dir && \ |         uv pip install --system -r requirements.txt --no-cache-dir && \ | ||||||
|         python -c "import os; from sentence_transformers import SentenceTransformer; SentenceTransformer(os.environ['RAG_EMBEDDING_MODEL'], device='cpu')" && \ |         python -c "import os; from sentence_transformers import SentenceTransformer; SentenceTransformer(os.environ['RAG_EMBEDDING_MODEL'], device='cpu')" && \ | ||||||
|         python -c "import os; from sentence_transformers import CrossEncoder; CrossEncoder(os.environ['RAG_RERANKING_MODEL'], device='cpu')" && \ |  | ||||||
|         python -c "import os; from faster_whisper import WhisperModel; WhisperModel(os.environ['WHISPER_MODEL'], device='cpu', compute_type='int8', download_root=os.environ['WHISPER_MODEL_DIR'])"; \ |         python -c "import os; from faster_whisper import WhisperModel; WhisperModel(os.environ['WHISPER_MODEL'], device='cpu', compute_type='int8', download_root=os.environ['WHISPER_MODEL_DIR'])"; \ | ||||||
|     else \ |     else \ | ||||||
|         pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu --no-cache-dir && \ |         pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu --no-cache-dir && \ | ||||||
|         uv pip install --system -r requirements.txt --no-cache-dir && \ |         uv pip install --system -r requirements.txt --no-cache-dir && \ | ||||||
|         python -c "import os; from sentence_transformers import SentenceTransformer; SentenceTransformer(os.environ['RAG_EMBEDDING_MODEL'], device='cpu')" && \ |         python -c "import os; from sentence_transformers import SentenceTransformer; SentenceTransformer(os.environ['RAG_EMBEDDING_MODEL'], device='cpu')" && \ | ||||||
|         python -c "import os; from sentence_transformers import CrossEncoder; CrossEncoder(os.environ['RAG_RERANKING_MODEL'], device='cpu')" && \ |  | ||||||
|         python -c "import os; from faster_whisper import WhisperModel; WhisperModel(os.environ['WHISPER_MODEL'], device='cpu', compute_type='int8', download_root=os.environ['WHISPER_MODEL_DIR'])"; \ |         python -c "import os; from faster_whisper import WhisperModel; WhisperModel(os.environ['WHISPER_MODEL'], device='cpu', compute_type='int8', download_root=os.environ['WHISPER_MODEL_DIR'])"; \ | ||||||
|     fi |     fi | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -92,6 +92,10 @@ async def get_ollama_api_urls(user=Depends(get_admin_user)): | ||||||
|     return {"OLLAMA_BASE_URLS": app.state.OLLAMA_BASE_URLS} |     return {"OLLAMA_BASE_URLS": app.state.OLLAMA_BASE_URLS} | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  | def get_ollama_endpoint(url_idx: int = 0): | ||||||
|  |     return app.state.OLLAMA_BASE_URLS[url_idx] | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
| class UrlUpdateForm(BaseModel): | class UrlUpdateForm(BaseModel): | ||||||
|     urls: List[str] |     urls: List[str] | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -64,6 +64,8 @@ from config import ( | ||||||
|     SRC_LOG_LEVELS, |     SRC_LOG_LEVELS, | ||||||
|     UPLOAD_DIR, |     UPLOAD_DIR, | ||||||
|     DOCS_DIR, |     DOCS_DIR, | ||||||
|  |     RAG_TOP_K, | ||||||
|  |     RAG_RELEVANCE_THRESHOLD, | ||||||
|     RAG_EMBEDDING_ENGINE, |     RAG_EMBEDDING_ENGINE, | ||||||
|     RAG_EMBEDDING_MODEL, |     RAG_EMBEDDING_MODEL, | ||||||
|     RAG_EMBEDDING_MODEL_TRUST_REMOTE_CODE, |     RAG_EMBEDDING_MODEL_TRUST_REMOTE_CODE, | ||||||
|  | @ -86,7 +88,8 @@ log.setLevel(SRC_LOG_LEVELS["RAG"]) | ||||||
| app = FastAPI() | app = FastAPI() | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| app.state.TOP_K = 4 | app.state.TOP_K = RAG_TOP_K | ||||||
|  | app.state.RELEVANCE_THRESHOLD = RAG_RELEVANCE_THRESHOLD | ||||||
| app.state.CHUNK_SIZE = CHUNK_SIZE | app.state.CHUNK_SIZE = CHUNK_SIZE | ||||||
| app.state.CHUNK_OVERLAP = CHUNK_OVERLAP | app.state.CHUNK_OVERLAP = CHUNK_OVERLAP | ||||||
| 
 | 
 | ||||||
|  | @ -107,12 +110,17 @@ if app.state.RAG_EMBEDDING_ENGINE == "": | ||||||
|         device=DEVICE_TYPE, |         device=DEVICE_TYPE, | ||||||
|         trust_remote_code=RAG_EMBEDDING_MODEL_TRUST_REMOTE_CODE, |         trust_remote_code=RAG_EMBEDDING_MODEL_TRUST_REMOTE_CODE, | ||||||
|     ) |     ) | ||||||
|  | else: | ||||||
|  |     app.state.sentence_transformer_ef = None | ||||||
| 
 | 
 | ||||||
| app.state.sentence_transformer_rf = sentence_transformers.CrossEncoder( | if not app.state.RAG_RERANKING_MODEL == "": | ||||||
|     app.state.RAG_RERANKING_MODEL, |     app.state.sentence_transformer_rf = sentence_transformers.CrossEncoder( | ||||||
|     device=DEVICE_TYPE, |         app.state.RAG_RERANKING_MODEL, | ||||||
|     trust_remote_code=RAG_RERANKING_MODEL_TRUST_REMOTE_CODE, |         device=DEVICE_TYPE, | ||||||
| ) |         trust_remote_code=RAG_RERANKING_MODEL_TRUST_REMOTE_CODE, | ||||||
|  |     ) | ||||||
|  | else: | ||||||
|  |     app.state.sentence_transformer_rf = None | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| origins = ["*"] | origins = ["*"] | ||||||
|  | @ -185,22 +193,22 @@ async def update_embedding_config( | ||||||
|     ) |     ) | ||||||
|     try: |     try: | ||||||
|         app.state.RAG_EMBEDDING_ENGINE = form_data.embedding_engine |         app.state.RAG_EMBEDDING_ENGINE = form_data.embedding_engine | ||||||
|  |         app.state.RAG_EMBEDDING_MODEL = form_data.embedding_model | ||||||
| 
 | 
 | ||||||
|         if app.state.RAG_EMBEDDING_ENGINE in ["ollama", "openai"]: |         if app.state.RAG_EMBEDDING_ENGINE in ["ollama", "openai"]: | ||||||
|             app.state.RAG_EMBEDDING_MODEL = form_data.embedding_model |  | ||||||
|             app.state.sentence_transformer_ef = None |  | ||||||
| 
 |  | ||||||
|             if form_data.openai_config != None: |             if form_data.openai_config != None: | ||||||
|                 app.state.OPENAI_API_BASE_URL = form_data.openai_config.url |                 app.state.OPENAI_API_BASE_URL = form_data.openai_config.url | ||||||
|                 app.state.OPENAI_API_KEY = form_data.openai_config.key |                 app.state.OPENAI_API_KEY = form_data.openai_config.key | ||||||
|  | 
 | ||||||
|  |             app.state.sentence_transformer_ef = None | ||||||
|         else: |         else: | ||||||
|             sentence_transformer_ef = sentence_transformers.SentenceTransformer( |             app.state.sentence_transformer_ef = ( | ||||||
|                 app.state.RAG_EMBEDDING_MODEL, |                 sentence_transformers.SentenceTransformer( | ||||||
|                 device=DEVICE_TYPE, |                     app.state.RAG_EMBEDDING_MODEL, | ||||||
|                 trust_remote_code=RAG_EMBEDDING_MODEL_TRUST_REMOTE_CODE, |                     device=DEVICE_TYPE, | ||||||
|  |                     trust_remote_code=RAG_EMBEDDING_MODEL_TRUST_REMOTE_CODE, | ||||||
|  |                 ) | ||||||
|             ) |             ) | ||||||
|             app.state.RAG_EMBEDDING_MODEL = form_data.embedding_model |  | ||||||
|             app.state.sentence_transformer_ef = sentence_transformer_ef |  | ||||||
| 
 | 
 | ||||||
|         return { |         return { | ||||||
|             "status": True, |             "status": True, | ||||||
|  | @ -222,7 +230,7 @@ async def update_embedding_config( | ||||||
| 
 | 
 | ||||||
| class RerankingModelUpdateForm(BaseModel): | class RerankingModelUpdateForm(BaseModel): | ||||||
|     reranking_model: str |     reranking_model: str | ||||||
|      | 
 | ||||||
| 
 | 
 | ||||||
| @app.post("/reranking/update") | @app.post("/reranking/update") | ||||||
| async def update_reranking_config( | async def update_reranking_config( | ||||||
|  | @ -233,10 +241,14 @@ async def update_reranking_config( | ||||||
|     ) |     ) | ||||||
|     try: |     try: | ||||||
|         app.state.RAG_RERANKING_MODEL = form_data.reranking_model |         app.state.RAG_RERANKING_MODEL = form_data.reranking_model | ||||||
|         app.state.sentence_transformer_rf = sentence_transformers.CrossEncoder( | 
 | ||||||
|             app.state.RAG_RERANKING_MODEL, |         if app.state.RAG_RERANKING_MODEL == "": | ||||||
|             device=DEVICE_TYPE, |             app.state.sentence_transformer_rf = None | ||||||
|         ) |         else: | ||||||
|  |             app.state.sentence_transformer_rf = sentence_transformers.CrossEncoder( | ||||||
|  |                 app.state.RAG_RERANKING_MODEL, | ||||||
|  |                 device=DEVICE_TYPE, | ||||||
|  |             ) | ||||||
| 
 | 
 | ||||||
|         return { |         return { | ||||||
|             "status": True, |             "status": True, | ||||||
|  | @ -302,11 +314,13 @@ async def get_query_settings(user=Depends(get_admin_user)): | ||||||
|         "status": True, |         "status": True, | ||||||
|         "template": app.state.RAG_TEMPLATE, |         "template": app.state.RAG_TEMPLATE, | ||||||
|         "k": app.state.TOP_K, |         "k": app.state.TOP_K, | ||||||
|  |         "r": app.state.RELEVANCE_THRESHOLD, | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| class QuerySettingsForm(BaseModel): | class QuerySettingsForm(BaseModel): | ||||||
|     k: Optional[int] = None |     k: Optional[int] = None | ||||||
|  |     r: Optional[float] = None | ||||||
|     template: Optional[str] = None |     template: Optional[str] = None | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  | @ -316,6 +330,7 @@ async def update_query_settings( | ||||||
| ): | ): | ||||||
|     app.state.RAG_TEMPLATE = form_data.template if form_data.template else RAG_TEMPLATE |     app.state.RAG_TEMPLATE = form_data.template if form_data.template else RAG_TEMPLATE | ||||||
|     app.state.TOP_K = form_data.k if form_data.k else 4 |     app.state.TOP_K = form_data.k if form_data.k else 4 | ||||||
|  |     app.state.RELEVANCE_THRESHOLD = form_data.r if form_data.r else 0.0 | ||||||
|     return {"status": True, "template": app.state.RAG_TEMPLATE} |     return {"status": True, "template": app.state.RAG_TEMPLATE} | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  | @ -323,6 +338,7 @@ class QueryDocForm(BaseModel): | ||||||
|     collection_name: str |     collection_name: str | ||||||
|     query: str |     query: str | ||||||
|     k: Optional[int] = None |     k: Optional[int] = None | ||||||
|  |     r: Optional[float] = None | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| @app.post("/query/doc") | @app.post("/query/doc") | ||||||
|  | @ -343,6 +359,7 @@ def query_doc_handler( | ||||||
|             collection_name=form_data.collection_name, |             collection_name=form_data.collection_name, | ||||||
|             query=form_data.query, |             query=form_data.query, | ||||||
|             k=form_data.k if form_data.k else app.state.TOP_K, |             k=form_data.k if form_data.k else app.state.TOP_K, | ||||||
|  |             r=form_data.r if form_data.r else app.state.RELEVANCE_THRESHOLD, | ||||||
|             embeddings_function=embeddings_function, |             embeddings_function=embeddings_function, | ||||||
|             reranking_function=app.state.sentence_transformer_rf, |             reranking_function=app.state.sentence_transformer_rf, | ||||||
|         ) |         ) | ||||||
|  | @ -358,6 +375,7 @@ class QueryCollectionsForm(BaseModel): | ||||||
|     collection_names: List[str] |     collection_names: List[str] | ||||||
|     query: str |     query: str | ||||||
|     k: Optional[int] = None |     k: Optional[int] = None | ||||||
|  |     r: Optional[float] = None | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| @app.post("/query/collection") | @app.post("/query/collection") | ||||||
|  | @ -378,6 +396,7 @@ def query_collection_handler( | ||||||
|             collection_names=form_data.collection_names, |             collection_names=form_data.collection_names, | ||||||
|             query=form_data.query, |             query=form_data.query, | ||||||
|             k=form_data.k if form_data.k else app.state.TOP_K, |             k=form_data.k if form_data.k else app.state.TOP_K, | ||||||
|  |             r=form_data.r if form_data.r else app.state.RELEVANCE_THRESHOLD, | ||||||
|             embeddings_function=embeddings_function, |             embeddings_function=embeddings_function, | ||||||
|             reranking_function=app.state.sentence_transformer_rf, |             reranking_function=app.state.sentence_transformer_rf, | ||||||
|         ) |         ) | ||||||
|  | @ -467,12 +486,7 @@ def store_docs_in_vector_db(docs, collection_name, overwrite: bool = False) -> b | ||||||
|         ) |         ) | ||||||
| 
 | 
 | ||||||
|         embedding_texts = list(map(lambda x: x.replace("\n", " "), texts)) |         embedding_texts = list(map(lambda x: x.replace("\n", " "), texts)) | ||||||
|         if app.state.RAG_EMBEDDING_ENGINE == "": |         embeddings = embedding_func(embedding_texts) | ||||||
|             embeddings = embedding_func(embedding_texts) |  | ||||||
|         else: |  | ||||||
|             embeddings = [ |  | ||||||
|                 embedding_func(embedding_texts) for text in texts |  | ||||||
|             ] |  | ||||||
| 
 | 
 | ||||||
|         for batch in create_batches( |         for batch in create_batches( | ||||||
|             api=CHROMA_CLIENT, |             api=CHROMA_CLIENT, | ||||||
|  |  | ||||||
|  | @ -1,8 +1,5 @@ | ||||||
| import logging | import logging | ||||||
| import requests | import requests | ||||||
| import operator |  | ||||||
| 
 |  | ||||||
| import sentence_transformers |  | ||||||
| 
 | 
 | ||||||
| from typing import List | from typing import List | ||||||
| 
 | 
 | ||||||
|  | @ -11,8 +8,10 @@ from apps.ollama.main import ( | ||||||
|     GenerateEmbeddingsForm, |     GenerateEmbeddingsForm, | ||||||
| ) | ) | ||||||
| 
 | 
 | ||||||
|  | from langchain_core.documents import Document | ||||||
|  | from langchain_community.retrievers import BM25Retriever | ||||||
| from langchain.retrievers import ( | from langchain.retrievers import ( | ||||||
|     BM25Retriever, |     ContextualCompressionRetriever, | ||||||
|     EnsembleRetriever, |     EnsembleRetriever, | ||||||
| ) | ) | ||||||
| 
 | 
 | ||||||
|  | @ -27,6 +26,7 @@ def query_embeddings_doc( | ||||||
|     collection_name: str, |     collection_name: str, | ||||||
|     query: str, |     query: str, | ||||||
|     k: int, |     k: int, | ||||||
|  |     r: float, | ||||||
|     embeddings_function, |     embeddings_function, | ||||||
|     reranking_function, |     reranking_function, | ||||||
| ): | ): | ||||||
|  | @ -34,38 +34,39 @@ def query_embeddings_doc( | ||||||
|         # if you use docker use the model from the environment variable |         # if you use docker use the model from the environment variable | ||||||
|         collection = CHROMA_CLIENT.get_collection(name=collection_name) |         collection = CHROMA_CLIENT.get_collection(name=collection_name) | ||||||
| 
 | 
 | ||||||
|         # keyword search |         documents = collection.get()  # get all documents | ||||||
|         documents = collection.get() # get all documents |  | ||||||
|         bm25_retriever = BM25Retriever.from_texts( |         bm25_retriever = BM25Retriever.from_texts( | ||||||
|             texts=documents.get("documents"), |             texts=documents.get("documents"), | ||||||
|             metadatas=documents.get("metadatas"), |             metadatas=documents.get("metadatas"), | ||||||
|         ) |         ) | ||||||
|         bm25_retriever.k = k |         bm25_retriever.k = k | ||||||
| 
 | 
 | ||||||
|         # semantic search (vector) |  | ||||||
|         chroma_retriever = ChromaRetriever( |         chroma_retriever = ChromaRetriever( | ||||||
|             collection=collection, |             collection=collection, | ||||||
|             k=k, |  | ||||||
|             embeddings_function=embeddings_function, |             embeddings_function=embeddings_function, | ||||||
|  |             top_n=k, | ||||||
|         ) |         ) | ||||||
| 
 | 
 | ||||||
|         # hybrid search (ensemble) |  | ||||||
|         ensemble_retriever = EnsembleRetriever( |         ensemble_retriever = EnsembleRetriever( | ||||||
|             retrievers=[bm25_retriever, chroma_retriever], |             retrievers=[bm25_retriever, chroma_retriever], weights=[0.5, 0.5] | ||||||
|             weights=[0.6, 0.4] |  | ||||||
|         ) |         ) | ||||||
| 
 | 
 | ||||||
|         documents = ensemble_retriever.invoke(query) |         compressor = RerankCompressor( | ||||||
|         result = query_results_rank( |             embeddings_function=embeddings_function, | ||||||
|             query=query, |  | ||||||
|             documents=documents, |  | ||||||
|             k=k, |  | ||||||
|             reranking_function=reranking_function, |             reranking_function=reranking_function, | ||||||
|  |             r_score=r, | ||||||
|  |             top_n=k, | ||||||
|         ) |         ) | ||||||
|  | 
 | ||||||
|  |         compression_retriever = ContextualCompressionRetriever( | ||||||
|  |             base_compressor=compressor, base_retriever=ensemble_retriever | ||||||
|  |         ) | ||||||
|  | 
 | ||||||
|  |         result = compression_retriever.invoke(query) | ||||||
|         result = { |         result = { | ||||||
|             "distances": [[d[1].item() for d in result]], |             "distances": [[d.metadata.get("score") for d in result]], | ||||||
|             "documents": [[d[0].page_content for d in result]], |             "documents": [[d.page_content for d in result]], | ||||||
|             "metadatas": [[d[0].metadata for d in result]], |             "metadatas": [[d.metadata for d in result]], | ||||||
|         } |         } | ||||||
| 
 | 
 | ||||||
|         return result |         return result | ||||||
|  | @ -73,58 +74,52 @@ def query_embeddings_doc( | ||||||
|         raise e |         raise e | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def query_results_rank(query: str, documents, k: int, reranking_function): |  | ||||||
|     scores = reranking_function.predict([(query, doc.page_content) for doc in documents]) |  | ||||||
|     docs_with_scores = list(zip(documents, scores)) |  | ||||||
|     result = sorted(docs_with_scores, key=operator.itemgetter(1), reverse=True) |  | ||||||
|     return result[: k] |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| def merge_and_sort_query_results(query_results, k): | def merge_and_sort_query_results(query_results, k): | ||||||
|     # Initialize lists to store combined data |     # Initialize lists to store combined data | ||||||
|     combined_distances = [] |     combined_distances = [] | ||||||
|     combined_documents = [] |     combined_documents = [] | ||||||
|     combined_metadatas = [] |     combined_metadatas = [] | ||||||
| 
 | 
 | ||||||
|     # Combine data from each dictionary |  | ||||||
|     for data in query_results: |     for data in query_results: | ||||||
|         combined_distances.extend(data["distances"][0]) |         combined_distances.extend(data["distances"][0]) | ||||||
|         combined_documents.extend(data["documents"][0]) |         combined_documents.extend(data["documents"][0]) | ||||||
|         combined_metadatas.extend(data["metadatas"][0]) |         combined_metadatas.extend(data["metadatas"][0]) | ||||||
| 
 | 
 | ||||||
|     # Create a list of tuples (distance, document, metadata) |     # Create a list of tuples (distance, document, metadata) | ||||||
|     combined = list( |     combined = list(zip(combined_distances, combined_documents, combined_metadatas)) | ||||||
|         zip(combined_distances, combined_documents, combined_metadatas) |  | ||||||
|     ) |  | ||||||
| 
 | 
 | ||||||
|     # Sort the list based on distances |     # Sort the list based on distances | ||||||
|     combined.sort(key=lambda x: x[0]) |     combined.sort(key=lambda x: x[0]) | ||||||
| 
 | 
 | ||||||
|     # Unzip the sorted list |     # We don't have anything :-( | ||||||
|     sorted_distances, sorted_documents, sorted_metadatas = zip(*combined) |     if not combined: | ||||||
|  |         sorted_distances = [] | ||||||
|  |         sorted_documents = [] | ||||||
|  |         sorted_metadatas = [] | ||||||
|  |     else: | ||||||
|  |         # Unzip the sorted list | ||||||
|  |         sorted_distances, sorted_documents, sorted_metadatas = zip(*combined) | ||||||
| 
 | 
 | ||||||
|     # Slicing the lists to include only k elements |         # Slicing the lists to include only k elements | ||||||
|     sorted_distances = list(sorted_distances)[:k] |         sorted_distances = list(sorted_distances)[:k] | ||||||
|     sorted_documents = list(sorted_documents)[:k] |         sorted_documents = list(sorted_documents)[:k] | ||||||
|     sorted_metadatas = list(sorted_metadatas)[:k] |         sorted_metadatas = list(sorted_metadatas)[:k] | ||||||
| 
 | 
 | ||||||
|     # Create the output dictionary |     # Create the output dictionary | ||||||
|     merged_query_results = { |     result = { | ||||||
|         "distances": [sorted_distances], |         "distances": [sorted_distances], | ||||||
|         "documents": [sorted_documents], |         "documents": [sorted_documents], | ||||||
|         "metadatas": [sorted_metadatas], |         "metadatas": [sorted_metadatas], | ||||||
|         "embeddings": None, |  | ||||||
|         "uris": None, |  | ||||||
|         "data": None, |  | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     return merged_query_results |     return result | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def query_embeddings_collection( | def query_embeddings_collection( | ||||||
|     collection_names: List[str], |     collection_names: List[str], | ||||||
|     query: str, |     query: str, | ||||||
|     k: int, |     k: int, | ||||||
|  |     r: float, | ||||||
|     embeddings_function, |     embeddings_function, | ||||||
|     reranking_function, |     reranking_function, | ||||||
| ): | ): | ||||||
|  | @ -137,6 +132,7 @@ def query_embeddings_collection( | ||||||
|                 collection_name=collection_name, |                 collection_name=collection_name, | ||||||
|                 query=query, |                 query=query, | ||||||
|                 k=k, |                 k=k, | ||||||
|  |                 r=r, | ||||||
|                 embeddings_function=embeddings_function, |                 embeddings_function=embeddings_function, | ||||||
|                 reranking_function=reranking_function, |                 reranking_function=reranking_function, | ||||||
|             ) |             ) | ||||||
|  | @ -162,22 +158,31 @@ def query_embeddings_function( | ||||||
| ): | ): | ||||||
|     if embedding_engine == "": |     if embedding_engine == "": | ||||||
|         return lambda query: embedding_function.encode(query).tolist() |         return lambda query: embedding_function.encode(query).tolist() | ||||||
|     elif embedding_engine == "ollama": |     elif embedding_engine in ["ollama", "openai"]: | ||||||
|         return lambda query: generate_ollama_embeddings( |         if embedding_engine == "ollama": | ||||||
|             GenerateEmbeddingsForm( |             func = lambda query: generate_ollama_embeddings( | ||||||
|                 **{ |                 GenerateEmbeddingsForm( | ||||||
|                     "model": embedding_model, |                     **{ | ||||||
|                     "prompt": query, |                         "model": embedding_model, | ||||||
|                 } |                         "prompt": query, | ||||||
|  |                     } | ||||||
|  |                 ) | ||||||
|             ) |             ) | ||||||
|         ) |         elif embedding_engine == "openai": | ||||||
|     elif embedding_engine == "openai": |             func = lambda query: generate_openai_embeddings( | ||||||
|         return lambda query: generate_openai_embeddings( |                 model=embedding_model, | ||||||
|             model=embedding_model, |                 text=query, | ||||||
|             text=query, |                 key=openai_key, | ||||||
|             key=openai_key, |                 url=openai_url, | ||||||
|             url=openai_url, |             ) | ||||||
|         ) | 
 | ||||||
|  |         def generate_multiple(query, f): | ||||||
|  |             if isinstance(query, list): | ||||||
|  |                 return [f(q) for q in query] | ||||||
|  |             else: | ||||||
|  |                 return f(query) | ||||||
|  | 
 | ||||||
|  |         return lambda query: generate_multiple(query, func) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def rag_messages( | def rag_messages( | ||||||
|  | @ -185,6 +190,7 @@ def rag_messages( | ||||||
|     messages, |     messages, | ||||||
|     template, |     template, | ||||||
|     k, |     k, | ||||||
|  |     r, | ||||||
|     embedding_engine, |     embedding_engine, | ||||||
|     embedding_model, |     embedding_model, | ||||||
|     embedding_function, |     embedding_function, | ||||||
|  | @ -221,53 +227,68 @@ def rag_messages( | ||||||
|         content_type = None |         content_type = None | ||||||
|         query = "" |         query = "" | ||||||
| 
 | 
 | ||||||
|  |     embeddings_function = query_embeddings_function( | ||||||
|  |         embedding_engine, | ||||||
|  |         embedding_model, | ||||||
|  |         embedding_function, | ||||||
|  |         openai_key, | ||||||
|  |         openai_url, | ||||||
|  |     ) | ||||||
|  | 
 | ||||||
|  |     extracted_collections = [] | ||||||
|     relevant_contexts = [] |     relevant_contexts = [] | ||||||
| 
 | 
 | ||||||
|     for doc in docs: |     for doc in docs: | ||||||
|         context = None |         context = None | ||||||
| 
 | 
 | ||||||
|         try: |         collection = doc.get("collection_name") | ||||||
|  |         if collection: | ||||||
|  |             collection = [collection] | ||||||
|  |         else: | ||||||
|  |             collection = doc.get("collection_names", []) | ||||||
| 
 | 
 | ||||||
|  |         collection = set(collection).difference(extracted_collections) | ||||||
|  |         if not collection: | ||||||
|  |             log.debug(f"skipping {doc} as it has already been extracted") | ||||||
|  |             continue | ||||||
|  | 
 | ||||||
|  |         try: | ||||||
|             if doc["type"] == "text": |             if doc["type"] == "text": | ||||||
|                 context = doc["content"] |                 context = doc["content"] | ||||||
|             else: |             elif doc["type"] == "collection": | ||||||
|                 embeddings_function = query_embeddings_function( |                 context = query_embeddings_collection( | ||||||
|                     embedding_engine, |                     collection_names=doc["collection_names"], | ||||||
|                     embedding_model, |                     query=query, | ||||||
|                     embedding_function, |                     k=k, | ||||||
|                     openai_key, |                     r=r, | ||||||
|                     openai_url, |                     embeddings_function=embeddings_function, | ||||||
|  |                     reranking_function=reranking_function, | ||||||
|  |                 ) | ||||||
|  |             else: | ||||||
|  |                 context = query_embeddings_doc( | ||||||
|  |                     collection_name=doc["collection_name"], | ||||||
|  |                     query=query, | ||||||
|  |                     k=k, | ||||||
|  |                     r=r, | ||||||
|  |                     embeddings_function=embeddings_function, | ||||||
|  |                     reranking_function=reranking_function, | ||||||
|                 ) |                 ) | ||||||
| 
 |  | ||||||
|                 if doc["type"] == "collection": |  | ||||||
|                     context = query_embeddings_collection( |  | ||||||
|                         collection_names=doc["collection_names"], |  | ||||||
|                         query=query, |  | ||||||
|                         k=k, |  | ||||||
|                         embeddings_function=embeddings_function, |  | ||||||
|                         reranking_function=reranking_function, |  | ||||||
|                     ) |  | ||||||
|                 else: |  | ||||||
|                     context = query_embeddings_doc( |  | ||||||
|                         collection_name=doc["collection_name"], |  | ||||||
|                         query=query, |  | ||||||
|                         k=k, |  | ||||||
|                         embeddings_function=embeddings_function, |  | ||||||
|                         reranking_function=reranking_function, |  | ||||||
|                     ) |  | ||||||
| 
 |  | ||||||
|         except Exception as e: |         except Exception as e: | ||||||
|             log.exception(e) |             log.exception(e) | ||||||
|             context = None |             context = None | ||||||
| 
 | 
 | ||||||
|         relevant_contexts.append(context) |         if context: | ||||||
|  |             relevant_contexts.append(context) | ||||||
|  | 
 | ||||||
|  |         extracted_collections.extend(collection) | ||||||
| 
 | 
 | ||||||
|     log.debug(f"relevant_contexts: {relevant_contexts}") |     log.debug(f"relevant_contexts: {relevant_contexts}") | ||||||
| 
 | 
 | ||||||
|     context_string = "" |     context_string = "" | ||||||
|     for context in relevant_contexts: |     for context in relevant_contexts: | ||||||
|         if context: |         items = context["documents"][0] | ||||||
|             context_string += " ".join(context["documents"][0]) + "\n" |         context_string += "\n\n".join(items) | ||||||
|  |     context_string = context_string.strip() | ||||||
| 
 | 
 | ||||||
|     ra_content = rag_template( |     ra_content = rag_template( | ||||||
|         template=template, |         template=template, | ||||||
|  | @ -275,6 +296,8 @@ def rag_messages( | ||||||
|         query=query, |         query=query, | ||||||
|     ) |     ) | ||||||
| 
 | 
 | ||||||
|  |     log.debug(f"ra_content: {ra_content}") | ||||||
|  | 
 | ||||||
|     if content_type == "list": |     if content_type == "list": | ||||||
|         new_content = [] |         new_content = [] | ||||||
|         for content_item in user_message["content"]: |         for content_item in user_message["content"]: | ||||||
|  | @ -321,15 +344,14 @@ def generate_openai_embeddings( | ||||||
| 
 | 
 | ||||||
| from typing import Any | from typing import Any | ||||||
| 
 | 
 | ||||||
| from langchain_core.callbacks import CallbackManagerForRetrieverRun |  | ||||||
| from langchain_core.documents import Document |  | ||||||
| from langchain_core.retrievers import BaseRetriever | from langchain_core.retrievers import BaseRetriever | ||||||
|  | from langchain_core.callbacks import CallbackManagerForRetrieverRun | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| class ChromaRetriever(BaseRetriever): | class ChromaRetriever(BaseRetriever): | ||||||
|     collection: Any |     collection: Any | ||||||
|     k: int |  | ||||||
|     embeddings_function: Any |     embeddings_function: Any | ||||||
|  |     top_n: int | ||||||
| 
 | 
 | ||||||
|     def _get_relevant_documents( |     def _get_relevant_documents( | ||||||
|         self, |         self, | ||||||
|  | @ -341,7 +363,7 @@ class ChromaRetriever(BaseRetriever): | ||||||
| 
 | 
 | ||||||
|         results = self.collection.query( |         results = self.collection.query( | ||||||
|             query_embeddings=[query_embeddings], |             query_embeddings=[query_embeddings], | ||||||
|             n_results=self.k, |             n_results=self.top_n, | ||||||
|         ) |         ) | ||||||
| 
 | 
 | ||||||
|         ids = results["ids"][0] |         ids = results["ids"][0] | ||||||
|  | @ -355,3 +377,60 @@ class ChromaRetriever(BaseRetriever): | ||||||
|             ) |             ) | ||||||
|             for idx in range(len(ids)) |             for idx in range(len(ids)) | ||||||
|         ] |         ] | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | import operator | ||||||
|  | 
 | ||||||
|  | from typing import Optional, Sequence | ||||||
|  | 
 | ||||||
|  | from langchain_core.documents import BaseDocumentCompressor, Document | ||||||
|  | from langchain_core.callbacks import Callbacks | ||||||
|  | from langchain_core.pydantic_v1 import Extra | ||||||
|  | 
 | ||||||
|  | from sentence_transformers import util | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | class RerankCompressor(BaseDocumentCompressor): | ||||||
|  |     embeddings_function: Any | ||||||
|  |     reranking_function: Any | ||||||
|  |     r_score: float | ||||||
|  |     top_n: int | ||||||
|  | 
 | ||||||
|  |     class Config: | ||||||
|  |         extra = Extra.forbid | ||||||
|  |         arbitrary_types_allowed = True | ||||||
|  | 
 | ||||||
|  |     def compress_documents( | ||||||
|  |         self, | ||||||
|  |         documents: Sequence[Document], | ||||||
|  |         query: str, | ||||||
|  |         callbacks: Optional[Callbacks] = None, | ||||||
|  |     ) -> Sequence[Document]: | ||||||
|  |         if self.reranking_function: | ||||||
|  |             scores = self.reranking_function.predict( | ||||||
|  |                 [(query, doc.page_content) for doc in documents] | ||||||
|  |             ) | ||||||
|  |         else: | ||||||
|  |             query_embedding = self.embeddings_function(query) | ||||||
|  |             document_embedding = self.embeddings_function( | ||||||
|  |                 [doc.page_content for doc in documents] | ||||||
|  |             ) | ||||||
|  |             scores = util.cos_sim(query_embedding, document_embedding)[0] | ||||||
|  | 
 | ||||||
|  |         docs_with_scores = list(zip(documents, scores.tolist())) | ||||||
|  |         if self.r_score: | ||||||
|  |             docs_with_scores = [ | ||||||
|  |                 (d, s) for d, s in docs_with_scores if s >= self.r_score | ||||||
|  |             ] | ||||||
|  | 
 | ||||||
|  |         result = sorted(docs_with_scores, key=operator.itemgetter(1), reverse=True) | ||||||
|  |         final_results = [] | ||||||
|  |         for doc, doc_score in result[: self.top_n]: | ||||||
|  |             metadata = doc.metadata | ||||||
|  |             metadata["score"] = doc_score | ||||||
|  |             doc = Document( | ||||||
|  |                 page_content=doc.page_content, | ||||||
|  |                 metadata=metadata, | ||||||
|  |             ) | ||||||
|  |             final_results.append(doc) | ||||||
|  |         return final_results | ||||||
|  |  | ||||||
|  | @ -420,6 +420,9 @@ if WEBUI_AUTH and WEBUI_SECRET_KEY == "": | ||||||
| CHROMA_DATA_PATH = f"{DATA_DIR}/vector_db" | CHROMA_DATA_PATH = f"{DATA_DIR}/vector_db" | ||||||
| # this uses the model defined in the Dockerfile ENV variable. If you dont use docker or docker based deployments such as k8s, the default embedding model will be used (sentence-transformers/all-MiniLM-L6-v2) | # this uses the model defined in the Dockerfile ENV variable. If you dont use docker or docker based deployments such as k8s, the default embedding model will be used (sentence-transformers/all-MiniLM-L6-v2) | ||||||
| 
 | 
 | ||||||
|  | RAG_TOP_K = int(os.environ.get("RAG_TOP_K", "5")) | ||||||
|  | RAG_RELEVANCE_THRESHOLD = float(os.environ.get("RAG_RELEVANCE_THRESHOLD", "0.0")) | ||||||
|  | 
 | ||||||
| RAG_EMBEDDING_ENGINE = os.environ.get("RAG_EMBEDDING_ENGINE", "") | RAG_EMBEDDING_ENGINE = os.environ.get("RAG_EMBEDDING_ENGINE", "") | ||||||
| 
 | 
 | ||||||
| RAG_EMBEDDING_MODEL = os.environ.get( | RAG_EMBEDDING_MODEL = os.environ.get( | ||||||
|  | @ -431,10 +434,9 @@ RAG_EMBEDDING_MODEL_TRUST_REMOTE_CODE = ( | ||||||
|     os.environ.get("RAG_EMBEDDING_MODEL_TRUST_REMOTE_CODE", "").lower() == "true" |     os.environ.get("RAG_EMBEDDING_MODEL_TRUST_REMOTE_CODE", "").lower() == "true" | ||||||
| ) | ) | ||||||
| 
 | 
 | ||||||
| RAG_RERANKING_MODEL = os.environ.get( | RAG_RERANKING_MODEL = os.environ.get("RAG_RERANKING_MODEL", "") | ||||||
|     "RAG_RERANKING_MODEL", "BAAI/bge-reranker-v2-m3" | if not RAG_RERANKING_MODEL == "": | ||||||
| ) |     log.info(f"Reranking model set: {RAG_RERANKING_MODEL}"), | ||||||
| log.info(f"Reranking model set: {RAG_RERANKING_MODEL}"), |  | ||||||
| 
 | 
 | ||||||
| RAG_RERANKING_MODEL_TRUST_REMOTE_CODE = ( | RAG_RERANKING_MODEL_TRUST_REMOTE_CODE = ( | ||||||
|     os.environ.get("RAG_RERANKING_MODEL_TRUST_REMOTE_CODE", "").lower() == "true" |     os.environ.get("RAG_RERANKING_MODEL_TRUST_REMOTE_CODE", "").lower() == "true" | ||||||
|  | @ -448,16 +450,15 @@ if USE_CUDA.lower() == "true": | ||||||
| else: | else: | ||||||
|     DEVICE_TYPE = "cpu" |     DEVICE_TYPE = "cpu" | ||||||
| 
 | 
 | ||||||
| 
 |  | ||||||
| CHROMA_CLIENT = chromadb.PersistentClient( | CHROMA_CLIENT = chromadb.PersistentClient( | ||||||
|     path=CHROMA_DATA_PATH, |     path=CHROMA_DATA_PATH, | ||||||
|     settings=Settings(allow_reset=True, anonymized_telemetry=False), |     settings=Settings(allow_reset=True, anonymized_telemetry=False), | ||||||
| ) | ) | ||||||
| CHUNK_SIZE = 1500 |  | ||||||
| CHUNK_OVERLAP = 100 |  | ||||||
| 
 | 
 | ||||||
|  | CHUNK_SIZE = int(os.environ.get("CHUNK_SIZE", "1500")) | ||||||
|  | CHUNK_OVERLAP = int(os.environ.get("CHUNK_OVERLAP", "100")) | ||||||
| 
 | 
 | ||||||
| RAG_TEMPLATE = """Use the following context as your learned knowledge, inside <context></context> XML tags. | DEFAULT_RAG_TEMPLATE = """Use the following context as your learned knowledge, inside <context></context> XML tags. | ||||||
| <context> | <context> | ||||||
|     [context] |     [context] | ||||||
| </context> | </context> | ||||||
|  | @ -471,6 +472,8 @@ And answer according to the language of the user's question. | ||||||
| Given the context information, answer the query. | Given the context information, answer the query. | ||||||
| Query: [query]""" | Query: [query]""" | ||||||
| 
 | 
 | ||||||
|  | RAG_TEMPLATE = os.environ.get("RAG_TEMPLATE", DEFAULT_RAG_TEMPLATE) | ||||||
|  | 
 | ||||||
| RAG_OPENAI_API_BASE_URL = os.getenv("RAG_OPENAI_API_BASE_URL", OPENAI_API_BASE_URL) | RAG_OPENAI_API_BASE_URL = os.getenv("RAG_OPENAI_API_BASE_URL", OPENAI_API_BASE_URL) | ||||||
| RAG_OPENAI_API_KEY = os.getenv("RAG_OPENAI_API_KEY", OPENAI_API_KEY) | RAG_OPENAI_API_KEY = os.getenv("RAG_OPENAI_API_KEY", OPENAI_API_KEY) | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -120,12 +120,13 @@ class RAGMiddleware(BaseHTTPMiddleware): | ||||||
|                     data["messages"], |                     data["messages"], | ||||||
|                     rag_app.state.RAG_TEMPLATE, |                     rag_app.state.RAG_TEMPLATE, | ||||||
|                     rag_app.state.TOP_K, |                     rag_app.state.TOP_K, | ||||||
|  |                     rag_app.state.RELEVANCE_THRESHOLD, | ||||||
|                     rag_app.state.RAG_EMBEDDING_ENGINE, |                     rag_app.state.RAG_EMBEDDING_ENGINE, | ||||||
|                     rag_app.state.RAG_EMBEDDING_MODEL, |                     rag_app.state.RAG_EMBEDDING_MODEL, | ||||||
|                     rag_app.state.sentence_transformer_ef, |                     rag_app.state.sentence_transformer_ef, | ||||||
|                     rag_app.state.sentence_transformer_rf, |                     rag_app.state.sentence_transformer_rf, | ||||||
|                     rag_app.state.RAG_OPENAI_API_KEY, |                     rag_app.state.OPENAI_API_KEY, | ||||||
|                     rag_app.state.RAG_OPENAI_API_BASE_URL, |                     rag_app.state.OPENAI_API_BASE_URL, | ||||||
|                 ) |                 ) | ||||||
|                 del data["docs"] |                 del data["docs"] | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -123,6 +123,7 @@ export const getQuerySettings = async (token: string) => { | ||||||
| 
 | 
 | ||||||
| type QuerySettings = { | type QuerySettings = { | ||||||
| 	k: number | null; | 	k: number | null; | ||||||
|  | 	r: number | null; | ||||||
| 	template: string | null; | 	template: string | null; | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
|  | @ -473,4 +474,4 @@ export const updateRerankingConfig = async (token: string, payload: RerankingMod | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
| 	return res; | 	return res; | ||||||
| }; | }; | ||||||
|  |  | ||||||
|  | @ -2,7 +2,7 @@ | ||||||
| 	import fileSaver from 'file-saver'; | 	import fileSaver from 'file-saver'; | ||||||
| 	const { saveAs } = fileSaver; | 	const { saveAs } = fileSaver; | ||||||
| 
 | 
 | ||||||
| 	import { chats, user } from '$lib/stores'; | 	import { config, chats, user } from '$lib/stores'; | ||||||
| 
 | 
 | ||||||
| 	import { | 	import { | ||||||
| 		createNewChat, | 		createNewChat, | ||||||
|  |  | ||||||
|  | @ -42,6 +42,7 @@ | ||||||
| 
 | 
 | ||||||
| 	let querySettings = { | 	let querySettings = { | ||||||
| 		template: '', | 		template: '', | ||||||
|  | 		r: 0.0, | ||||||
| 		k: 4 | 		k: 4 | ||||||
| 	}; | 	}; | ||||||
| 
 | 
 | ||||||
|  | @ -124,7 +125,7 @@ | ||||||
| 
 | 
 | ||||||
| 		updateRerankingModelLoading = true; | 		updateRerankingModelLoading = true; | ||||||
| 		const res = await updateRerankingConfig(localStorage.token, { | 		const res = await updateRerankingConfig(localStorage.token, { | ||||||
| 			reranking_model: rerankingModel, | 			reranking_model: rerankingModel | ||||||
| 		}).catch(async (error) => { | 		}).catch(async (error) => { | ||||||
| 			toast.error(error); | 			toast.error(error); | ||||||
| 			await setRerankingConfig(); | 			await setRerankingConfig(); | ||||||
|  | @ -450,6 +451,12 @@ | ||||||
| 					</div> | 					</div> | ||||||
| 				</div> | 				</div> | ||||||
| 
 | 
 | ||||||
|  | 				<div class="mt-2 mb-1 text-xs text-gray-400 dark:text-gray-500"> | ||||||
|  | 					{$i18n.t( | ||||||
|  | 						'Note: If you choose a reranking model, it will use that to score and rerank instead of the embedding model.' | ||||||
|  | 					)} | ||||||
|  | 				</div> | ||||||
|  | 
 | ||||||
| 				<hr class=" dark:border-gray-700 my-3" /> | 				<hr class=" dark:border-gray-700 my-3" /> | ||||||
| 
 | 
 | ||||||
| 				<div class="  flex w-full justify-between"> | 				<div class="  flex w-full justify-between"> | ||||||
|  | @ -576,6 +583,26 @@ | ||||||
| 						</div> | 						</div> | ||||||
| 					</div> | 					</div> | ||||||
| 
 | 
 | ||||||
|  | 					<div class=" flex"> | ||||||
|  | 						<div class="  flex w-full justify-between"> | ||||||
|  | 							<div class="self-center text-xs font-medium flex-1"> | ||||||
|  | 								{$i18n.t('Relevance Threshold')} | ||||||
|  | 							</div> | ||||||
|  | 
 | ||||||
|  | 							<div class="self-center p-3"> | ||||||
|  | 								<input | ||||||
|  | 									class=" w-full rounded-lg py-1.5 px-4 text-sm dark:text-gray-300 dark:bg-gray-850 outline-none" | ||||||
|  | 									type="number" | ||||||
|  | 									step="0.01" | ||||||
|  | 									placeholder={$i18n.t('Enter Relevance Threshold')} | ||||||
|  | 									bind:value={querySettings.r} | ||||||
|  | 									autocomplete="off" | ||||||
|  | 									min="0.0" | ||||||
|  | 								/> | ||||||
|  | 							</div> | ||||||
|  | 						</div> | ||||||
|  | 					</div> | ||||||
|  | 
 | ||||||
| 					<div> | 					<div> | ||||||
| 						<div class=" mb-2.5 text-sm font-medium">{$i18n.t('RAG Template')}</div> | 						<div class=" mb-2.5 text-sm font-medium">{$i18n.t('RAG Template')}</div> | ||||||
| 						<textarea | 						<textarea | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Steven Kreitzer
						Steven Kreitzer