Merge pull request #1756 from buroa/buroa/toggle-hybrid

feat: toggle hybrid search
This commit is contained in:
Timothy Jaeryang Baek 2024-04-26 11:48:44 -07:00 committed by GitHub
commit 543707eefd
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 156 additions and 103 deletions

View file

@ -70,6 +70,7 @@ from config import (
RAG_EMBEDDING_MODEL, RAG_EMBEDDING_MODEL,
RAG_EMBEDDING_MODEL_AUTO_UPDATE, RAG_EMBEDDING_MODEL_AUTO_UPDATE,
RAG_EMBEDDING_MODEL_TRUST_REMOTE_CODE, RAG_EMBEDDING_MODEL_TRUST_REMOTE_CODE,
ENABLE_RAG_HYBRID_SEARCH,
RAG_RERANKING_MODEL, RAG_RERANKING_MODEL,
RAG_RERANKING_MODEL_AUTO_UPDATE, RAG_RERANKING_MODEL_AUTO_UPDATE,
RAG_RERANKING_MODEL_TRUST_REMOTE_CODE, RAG_RERANKING_MODEL_TRUST_REMOTE_CODE,
@ -91,6 +92,9 @@ app = FastAPI()
app.state.TOP_K = RAG_TOP_K app.state.TOP_K = RAG_TOP_K
app.state.RELEVANCE_THRESHOLD = RAG_RELEVANCE_THRESHOLD app.state.RELEVANCE_THRESHOLD = RAG_RELEVANCE_THRESHOLD
app.state.ENABLE_RAG_HYBRID_SEARCH = ENABLE_RAG_HYBRID_SEARCH
app.state.CHUNK_SIZE = CHUNK_SIZE app.state.CHUNK_SIZE = CHUNK_SIZE
app.state.CHUNK_OVERLAP = CHUNK_OVERLAP app.state.CHUNK_OVERLAP = CHUNK_OVERLAP
@ -321,6 +325,7 @@ async def get_query_settings(user=Depends(get_admin_user)):
"template": app.state.RAG_TEMPLATE, "template": app.state.RAG_TEMPLATE,
"k": app.state.TOP_K, "k": app.state.TOP_K,
"r": app.state.RELEVANCE_THRESHOLD, "r": app.state.RELEVANCE_THRESHOLD,
"hybrid": app.state.ENABLE_RAG_HYBRID_SEARCH,
} }
@ -328,6 +333,7 @@ class QuerySettingsForm(BaseModel):
k: Optional[int] = None k: Optional[int] = None
r: Optional[float] = None r: Optional[float] = None
template: Optional[str] = None template: Optional[str] = None
hybrid: Optional[bool] = None
@app.post("/query/settings/update") @app.post("/query/settings/update")
@ -337,7 +343,14 @@ async def update_query_settings(
app.state.RAG_TEMPLATE = form_data.template if form_data.template else RAG_TEMPLATE app.state.RAG_TEMPLATE = form_data.template if form_data.template else RAG_TEMPLATE
app.state.TOP_K = form_data.k if form_data.k else 4 app.state.TOP_K = form_data.k if form_data.k else 4
app.state.RELEVANCE_THRESHOLD = form_data.r if form_data.r else 0.0 app.state.RELEVANCE_THRESHOLD = form_data.r if form_data.r else 0.0
return {"status": True, "template": app.state.RAG_TEMPLATE} app.state.ENABLE_RAG_HYBRID_SEARCH = form_data.hybrid if form_data.hybrid else False
return {
"status": True,
"template": app.state.RAG_TEMPLATE,
"k": app.state.TOP_K,
"r": app.state.RELEVANCE_THRESHOLD,
"hybrid": app.state.ENABLE_RAG_HYBRID_SEARCH,
}
class QueryDocForm(BaseModel): class QueryDocForm(BaseModel):
@ -345,6 +358,7 @@ class QueryDocForm(BaseModel):
query: str query: str
k: Optional[int] = None k: Optional[int] = None
r: Optional[float] = None r: Optional[float] = None
hybrid: Optional[bool] = None
@app.post("/query/doc") @app.post("/query/doc")
@ -368,6 +382,11 @@ def query_doc_handler(
r=form_data.r if form_data.r else app.state.RELEVANCE_THRESHOLD, r=form_data.r if form_data.r else app.state.RELEVANCE_THRESHOLD,
embeddings_function=embeddings_function, embeddings_function=embeddings_function,
reranking_function=app.state.sentence_transformer_rf, reranking_function=app.state.sentence_transformer_rf,
hybrid_search=(
form_data.hybrid
if form_data.hybrid
else app.state.ENABLE_RAG_HYBRID_SEARCH
),
) )
except Exception as e: except Exception as e:
log.exception(e) log.exception(e)
@ -382,6 +401,7 @@ class QueryCollectionsForm(BaseModel):
query: str query: str
k: Optional[int] = None k: Optional[int] = None
r: Optional[float] = None r: Optional[float] = None
hybrid: Optional[bool] = None
@app.post("/query/collection") @app.post("/query/collection")
@ -405,6 +425,11 @@ def query_collection_handler(
r=form_data.r if form_data.r else app.state.RELEVANCE_THRESHOLD, r=form_data.r if form_data.r else app.state.RELEVANCE_THRESHOLD,
embeddings_function=embeddings_function, embeddings_function=embeddings_function,
reranking_function=app.state.sentence_transformer_rf, reranking_function=app.state.sentence_transformer_rf,
hybrid_search=(
form_data.hybrid
if form_data.hybrid
else app.state.ENABLE_RAG_HYBRID_SEARCH
),
) )
except Exception as e: except Exception as e:
log.exception(e) log.exception(e)

View file

@ -18,8 +18,6 @@ from langchain.retrievers import (
EnsembleRetriever, EnsembleRetriever,
) )
from sentence_transformers import CrossEncoder
from typing import Optional from typing import Optional
from config import SRC_LOG_LEVELS, CHROMA_CLIENT from config import SRC_LOG_LEVELS, CHROMA_CLIENT
@ -32,16 +30,15 @@ def query_embeddings_doc(
collection_name: str, collection_name: str,
query: str, query: str,
embeddings_function, embeddings_function,
reranking_function,
k: int, k: int,
reranking_function: Optional[CrossEncoder] = None, r: int,
r: Optional[float] = None, hybrid_search: bool,
): ):
try: try:
collection = CHROMA_CLIENT.get_collection(name=collection_name)
if reranking_function: if hybrid_search:
# if you use docker use the model from the environment variable
collection = CHROMA_CLIENT.get_collection(name=collection_name)
documents = collection.get() # get all documents documents = collection.get() # get all documents
bm25_retriever = BM25Retriever.from_texts( bm25_retriever = BM25Retriever.from_texts(
texts=documents.get("documents"), texts=documents.get("documents"),
@ -77,24 +74,19 @@ def query_embeddings_doc(
"metadatas": [[d.metadata for d in result]], "metadatas": [[d.metadata for d in result]],
} }
else: else:
# if you use docker use the model from the environment variable
query_embeddings = embeddings_function(query) query_embeddings = embeddings_function(query)
log.info(f"query_embeddings_doc {query_embeddings}")
collection = CHROMA_CLIENT.get_collection(name=collection_name)
result = collection.query( result = collection.query(
query_embeddings=[query_embeddings], query_embeddings=[query_embeddings],
n_results=k, n_results=k,
) )
log.info(f"query_embeddings_doc:result {result}") log.info(f"query_embeddings_doc:result {result}")
return result return result
except Exception as e: except Exception as e:
raise e raise e
def merge_and_sort_query_results(query_results, k): def merge_and_sort_query_results(query_results, k, reverse=False):
# Initialize lists to store combined data # Initialize lists to store combined data
combined_distances = [] combined_distances = []
combined_documents = [] combined_documents = []
@ -109,7 +101,7 @@ def merge_and_sort_query_results(query_results, k):
combined = list(zip(combined_distances, combined_documents, combined_metadatas)) combined = list(zip(combined_distances, combined_documents, combined_metadatas))
# Sort the list based on distances # Sort the list based on distances
combined.sort(key=lambda x: x[0]) combined.sort(key=lambda x: x[0], reverse=reverse)
# We don't have anything :-( # We don't have anything :-(
if not combined: if not combined:
@ -142,6 +134,7 @@ def query_embeddings_collection(
r: float, r: float,
embeddings_function, embeddings_function,
reranking_function, reranking_function,
hybrid_search: bool,
): ):
results = [] results = []
@ -155,12 +148,14 @@ def query_embeddings_collection(
r=r, r=r,
embeddings_function=embeddings_function, embeddings_function=embeddings_function,
reranking_function=reranking_function, reranking_function=reranking_function,
hybrid_search=hybrid_search,
) )
results.append(result) results.append(result)
except: except:
pass pass
return merge_and_sort_query_results(results, k) reverse = hybrid and reranking_function is not None
return merge_and_sort_query_results(results, k=k, reverse=reverse)
def rag_template(template: str, context: str, query: str): def rag_template(template: str, context: str, query: str):
@ -211,6 +206,7 @@ def rag_messages(
template, template,
k, k,
r, r,
hybrid_search,
embedding_engine, embedding_engine,
embedding_model, embedding_model,
embedding_function, embedding_function,
@ -283,6 +279,7 @@ def rag_messages(
r=r, r=r,
embeddings_function=embeddings_function, embeddings_function=embeddings_function,
reranking_function=reranking_function, reranking_function=reranking_function,
hybrid_search=hybrid_search,
) )
else: else:
context = query_embeddings_doc( context = query_embeddings_doc(
@ -292,6 +289,7 @@ def rag_messages(
r=r, r=r,
embeddings_function=embeddings_function, embeddings_function=embeddings_function,
reranking_function=reranking_function, reranking_function=reranking_function,
hybrid_search=hybrid_search,
) )
except Exception as e: except Exception as e:
log.exception(e) log.exception(e)
@ -479,7 +477,9 @@ class RerankCompressor(BaseDocumentCompressor):
(d, s) for d, s in docs_with_scores if s >= self.r_score (d, s) for d, s in docs_with_scores if s >= self.r_score
] ]
result = sorted(docs_with_scores, key=operator.itemgetter(1), reverse=True) reverse = self.reranking_function is not None
result = sorted(docs_with_scores, key=operator.itemgetter(1), reverse=reverse)
final_results = [] final_results = []
for doc, doc_score in result[: self.top_n]: for doc, doc_score in result[: self.top_n]:
metadata = doc.metadata metadata = doc.metadata

View file

@ -423,6 +423,10 @@ CHROMA_DATA_PATH = f"{DATA_DIR}/vector_db"
RAG_TOP_K = int(os.environ.get("RAG_TOP_K", "5")) RAG_TOP_K = int(os.environ.get("RAG_TOP_K", "5"))
RAG_RELEVANCE_THRESHOLD = float(os.environ.get("RAG_RELEVANCE_THRESHOLD", "0.0")) RAG_RELEVANCE_THRESHOLD = float(os.environ.get("RAG_RELEVANCE_THRESHOLD", "0.0"))
ENABLE_RAG_HYBRID_SEARCH = (
os.environ.get("ENABLE_RAG_HYBRID_SEARCH", "").lower() == "true"
)
RAG_EMBEDDING_ENGINE = os.environ.get("RAG_EMBEDDING_ENGINE", "") RAG_EMBEDDING_ENGINE = os.environ.get("RAG_EMBEDDING_ENGINE", "")
RAG_EMBEDDING_MODEL = os.environ.get( RAG_EMBEDDING_MODEL = os.environ.get(

View file

@ -121,6 +121,7 @@ class RAGMiddleware(BaseHTTPMiddleware):
rag_app.state.RAG_TEMPLATE, rag_app.state.RAG_TEMPLATE,
rag_app.state.TOP_K, rag_app.state.TOP_K,
rag_app.state.RELEVANCE_THRESHOLD, rag_app.state.RELEVANCE_THRESHOLD,
rag_app.state.ENABLE_RAG_HYBRID_SEARCH,
rag_app.state.RAG_EMBEDDING_ENGINE, rag_app.state.RAG_EMBEDDING_ENGINE,
rag_app.state.RAG_EMBEDDING_MODEL, rag_app.state.RAG_EMBEDDING_MODEL,
rag_app.state.sentence_transformer_ef, rag_app.state.sentence_transformer_ef,

View file

@ -43,7 +43,8 @@
let querySettings = { let querySettings = {
template: '', template: '',
r: 0.0, r: 0.0,
k: 4 k: 4,
hybrid: false
}; };
const scanHandler = async () => { const scanHandler = async () => {
@ -174,6 +175,12 @@
} }
}; };
const toggleHybridSearch = async () => {
querySettings.hybrid = !querySettings.hybrid;
querySettings = await updateQuerySettings(localStorage.token, querySettings);
};
onMount(async () => { onMount(async () => {
const res = await getRAGConfig(localStorage.token); const res = await getRAGConfig(localStorage.token);
@ -202,6 +209,24 @@
<div> <div>
<div class=" mb-2 text-sm font-medium">{$i18n.t('General Settings')}</div> <div class=" mb-2 text-sm font-medium">{$i18n.t('General Settings')}</div>
<div class=" flex w-full justify-between">
<div class=" self-center text-xs font-medium">{$i18n.t('Hybrid Search')}</div>
<button
class="p-1 px-3 text-xs flex rounded transition"
on:click={() => {
toggleHybridSearch();
}}
type="button"
>
{#if querySettings.hybrid === true}
<span class="ml-2 self-center">{$i18n.t('On')}</span>
{:else}
<span class="ml-2 self-center">{$i18n.t('Off')}</span>
{/if}
</button>
</div>
<div class=" flex w-full justify-between"> <div class=" flex w-full justify-between">
<div class=" self-center text-xs font-medium">{$i18n.t('Embedding Model Engine')}</div> <div class=" self-center text-xs font-medium">{$i18n.t('Embedding Model Engine')}</div>
<div class="flex items-center relative"> <div class="flex items-center relative">
@ -386,78 +411,74 @@
<hr class=" dark:border-gray-700 my-3" /> <hr class=" dark:border-gray-700 my-3" />
<div class=" "> {#if querySettings.hybrid === true}
<div class=" mb-2 text-sm font-medium">{$i18n.t('Update Reranking Model')}</div> <div class=" ">
<div class=" mb-2 text-sm font-medium">{$i18n.t('Update Reranking Model')}</div>
<div class="flex w-full"> <div class="flex w-full">
<div class="flex-1 mr-2"> <div class="flex-1 mr-2">
<input <input
class="w-full rounded-lg py-2 px-4 text-sm dark:text-gray-300 dark:bg-gray-850 outline-none" class="w-full rounded-lg py-2 px-4 text-sm dark:text-gray-300 dark:bg-gray-850 outline-none"
placeholder={$i18n.t('Update reranking model (e.g. {{model}})', { placeholder={$i18n.t('Update reranking model (e.g. {{model}})', {
model: rerankingModel.slice(-40) model: rerankingModel.slice(-40)
})} })}
bind:value={rerankingModel} bind:value={rerankingModel}
/> />
</div> </div>
<button <button
class="px-2.5 bg-gray-100 hover:bg-gray-200 text-gray-800 dark:bg-gray-850 dark:hover:bg-gray-800 dark:text-gray-100 rounded-lg transition" class="px-2.5 bg-gray-100 hover:bg-gray-200 text-gray-800 dark:bg-gray-850 dark:hover:bg-gray-800 dark:text-gray-100 rounded-lg transition"
on:click={() => { on:click={() => {
rerankingModelUpdateHandler(); rerankingModelUpdateHandler();
}} }}
disabled={updateRerankingModelLoading} disabled={updateRerankingModelLoading}
> >
{#if updateRerankingModelLoading} {#if updateRerankingModelLoading}
<div class="self-center"> <div class="self-center">
<svg <svg
class=" w-4 h-4" class=" w-4 h-4"
viewBox="0 0 24 24" viewBox="0 0 24 24"
fill="currentColor" fill="currentColor"
xmlns="http://www.w3.org/2000/svg" xmlns="http://www.w3.org/2000/svg"
><style> ><style>
.spinner_ajPY { .spinner_ajPY {
transform-origin: center; transform-origin: center;
animation: spinner_AtaB 0.75s infinite linear; animation: spinner_AtaB 0.75s infinite linear;
}
@keyframes spinner_AtaB {
100% {
transform: rotate(360deg);
} }
} @keyframes spinner_AtaB {
</style><path 100% {
d="M12,1A11,11,0,1,0,23,12,11,11,0,0,0,12,1Zm0,19a8,8,0,1,1,8-8A8,8,0,0,1,12,20Z" transform: rotate(360deg);
opacity=".25" }
/><path }
d="M10.14,1.16a11,11,0,0,0-9,8.92A1.59,1.59,0,0,0,2.46,12,1.52,1.52,0,0,0,4.11,10.7a8,8,0,0,1,6.66-6.61A1.42,1.42,0,0,0,12,2.69h0A1.57,1.57,0,0,0,10.14,1.16Z" </style><path
class="spinner_ajPY" d="M12,1A11,11,0,1,0,23,12,11,11,0,0,0,12,1Zm0,19a8,8,0,1,1,8-8A8,8,0,0,1,12,20Z"
/></svg opacity=".25"
/><path
d="M10.14,1.16a11,11,0,0,0-9,8.92A1.59,1.59,0,0,0,2.46,12,1.52,1.52,0,0,0,4.11,10.7a8,8,0,0,1,6.66-6.61A1.42,1.42,0,0,0,12,2.69h0A1.57,1.57,0,0,0,10.14,1.16Z"
class="spinner_ajPY"
/></svg
>
</div>
{:else}
<svg
xmlns="http://www.w3.org/2000/svg"
viewBox="0 0 16 16"
fill="currentColor"
class="w-4 h-4"
> >
</div> <path
{:else} d="M8.75 2.75a.75.75 0 0 0-1.5 0v5.69L5.03 6.22a.75.75 0 0 0-1.06 1.06l3.5 3.5a.75.75 0 0 0 1.06 0l3.5-3.5a.75.75 0 0 0-1.06-1.06L8.75 8.44V2.75Z"
<svg />
xmlns="http://www.w3.org/2000/svg" <path
viewBox="0 0 16 16" d="M3.5 9.75a.75.75 0 0 0-1.5 0v1.5A2.75 2.75 0 0 0 4.75 14h6.5A2.75 2.75 0 0 0 14 11.25v-1.5a.75.75 0 0 0-1.5 0v1.5c0 .69-.56 1.25-1.25 1.25h-6.5c-.69 0-1.25-.56-1.25-1.25v-1.5Z"
fill="currentColor" />
class="w-4 h-4" </svg>
> {/if}
<path </button>
d="M8.75 2.75a.75.75 0 0 0-1.5 0v5.69L5.03 6.22a.75.75 0 0 0-1.06 1.06l3.5 3.5a.75.75 0 0 0 1.06 0l3.5-3.5a.75.75 0 0 0-1.06-1.06L8.75 8.44V2.75Z" </div>
/>
<path
d="M3.5 9.75a.75.75 0 0 0-1.5 0v1.5A2.75 2.75 0 0 0 4.75 14h6.5A2.75 2.75 0 0 0 14 11.25v-1.5a.75.75 0 0 0-1.5 0v1.5c0 .69-.56 1.25-1.25 1.25h-6.5c-.69 0-1.25-.56-1.25-1.25v-1.5Z"
/>
</svg>
{/if}
</button>
</div> </div>
</div>
<div class="mt-2 mb-1 text-xs text-gray-400 dark:text-gray-500"> <hr class=" dark:border-gray-700 my-3" />
{$i18n.t( {/if}
'Note: If you choose a reranking model, it will use that to score and rerank instead of the embedding model.'
)}
</div>
<hr class=" dark:border-gray-700 my-3" />
<div class=" flex w-full justify-between"> <div class=" flex w-full justify-between">
<div class=" self-center text-xs font-medium"> <div class=" self-center text-xs font-medium">
@ -583,25 +604,27 @@
</div> </div>
</div> </div>
<div class=" flex"> {#if querySettings.hybrid === true}
<div class=" flex w-full justify-between"> <div class=" flex">
<div class="self-center text-xs font-medium flex-1"> <div class=" flex w-full justify-between">
{$i18n.t('Relevance Threshold')} <div class="self-center text-xs font-medium flex-1">
</div> {$i18n.t('Relevance Threshold')}
</div>
<div class="self-center p-3"> <div class="self-center p-3">
<input <input
class=" w-full rounded-lg py-1.5 px-4 text-sm dark:text-gray-300 dark:bg-gray-850 outline-none" class=" w-full rounded-lg py-1.5 px-4 text-sm dark:text-gray-300 dark:bg-gray-850 outline-none"
type="number" type="number"
step="0.01" step="0.01"
placeholder={$i18n.t('Enter Relevance Threshold')} placeholder={$i18n.t('Enter Relevance Threshold')}
bind:value={querySettings.r} bind:value={querySettings.r}
autocomplete="off" autocomplete="off"
min="0.0" min="0.0"
/> />
</div>
</div> </div>
</div> </div>
</div> {/if}
<div> <div>
<div class=" mb-2.5 text-sm font-medium">{$i18n.t('RAG Template')}</div> <div class=" mb-2.5 text-sm font-medium">{$i18n.t('RAG Template')}</div>