import logging import requests from typing import List from apps.ollama.main import ( generate_ollama_embeddings, GenerateEmbeddingsForm, ) from langchain_core.documents import Document from langchain_community.retrievers import BM25Retriever from langchain.retrievers import ( ContextualCompressionRetriever, EnsembleRetriever, ) from config import SRC_LOG_LEVELS, CHROMA_CLIENT log = logging.getLogger(__name__) log.setLevel(SRC_LOG_LEVELS["RAG"]) def query_embeddings_doc( collection_name: str, query: str, k: int, r: float, embeddings_function, reranking_function, ): try: # if you use docker use the model from the environment variable collection = CHROMA_CLIENT.get_collection(name=collection_name) documents = collection.get() # get all documents bm25_retriever = BM25Retriever.from_texts( texts=documents.get("documents"), metadatas=documents.get("metadatas"), ) bm25_retriever.k = k chroma_retriever = ChromaRetriever( collection=collection, embeddings_function=embeddings_function, top_n=k, ) ensemble_retriever = EnsembleRetriever( retrievers=[bm25_retriever, chroma_retriever], weights=[0.5, 0.5] ) compressor = RerankCompressor( embeddings_function=embeddings_function, reranking_function=reranking_function, r_score=r, top_n=k, ) compression_retriever = ContextualCompressionRetriever( base_compressor=compressor, base_retriever=ensemble_retriever ) result = compression_retriever.invoke(query) result = { "distances": [[d.metadata.get("score") for d in result]], "documents": [[d.page_content for d in result]], "metadatas": [[d.metadata for d in result]], } return result except Exception as e: raise e def merge_and_sort_query_results(query_results, k): # Initialize lists to store combined data combined_distances = [] combined_documents = [] combined_metadatas = [] for data in query_results: combined_distances.extend(data["distances"][0]) combined_documents.extend(data["documents"][0]) combined_metadatas.extend(data["metadatas"][0]) # Create a list of tuples (distance, document, metadata) combined = list(zip(combined_distances, combined_documents, combined_metadatas)) # Sort the list based on distances combined.sort(key=lambda x: x[0]) # We don't have anything :-( if not combined: sorted_distances = [] sorted_documents = [] sorted_metadatas = [] else: # Unzip the sorted list sorted_distances, sorted_documents, sorted_metadatas = zip(*combined) # Slicing the lists to include only k elements sorted_distances = list(sorted_distances)[:k] sorted_documents = list(sorted_documents)[:k] sorted_metadatas = list(sorted_metadatas)[:k] # Create the output dictionary result = { "distances": [sorted_distances], "documents": [sorted_documents], "metadatas": [sorted_metadatas], } return result def query_embeddings_collection( collection_names: List[str], query: str, k: int, r: float, embeddings_function, reranking_function, ): results = [] for collection_name in collection_names: try: result = query_embeddings_doc( collection_name=collection_name, query=query, k=k, r=r, embeddings_function=embeddings_function, reranking_function=reranking_function, ) results.append(result) except: pass return merge_and_sort_query_results(results, k) def rag_template(template: str, context: str, query: str): template = template.replace("[context]", context) template = template.replace("[query]", query) return template def query_embeddings_function( embedding_engine, embedding_model, embedding_function, openai_key, openai_url, ): if embedding_engine == "": return lambda query: embedding_function.encode(query).tolist() elif embedding_engine in ["ollama", "openai"]: if embedding_engine == "ollama": func = lambda query: generate_ollama_embeddings( GenerateEmbeddingsForm( **{ "model": embedding_model, "prompt": query, } ) ) elif embedding_engine == "openai": func = lambda query: generate_openai_embeddings( model=embedding_model, text=query, key=openai_key, url=openai_url, ) def generate_multiple(query, f): if isinstance(query, list): return [f(q) for q in query] else: return f(query) return lambda query: generate_multiple(query, func) def rag_messages( docs, messages, template, k, r, embedding_engine, embedding_model, embedding_function, reranking_function, openai_key, openai_url, ): log.debug( f"docs: {docs} {messages} {embedding_engine} {embedding_model} {embedding_function} {reranking_function} {openai_key} {openai_url}" ) last_user_message_idx = None for i in range(len(messages) - 1, -1, -1): if messages[i]["role"] == "user": last_user_message_idx = i break user_message = messages[last_user_message_idx] if isinstance(user_message["content"], list): # Handle list content input content_type = "list" query = "" for content_item in user_message["content"]: if content_item["type"] == "text": query = content_item["text"] break elif isinstance(user_message["content"], str): # Handle text content input content_type = "text" query = user_message["content"] else: # Fallback in case the input does not match expected types content_type = None query = "" embeddings_function = query_embeddings_function( embedding_engine, embedding_model, embedding_function, openai_key, openai_url, ) extracted_collections = [] relevant_contexts = [] for doc in docs: context = None collection = doc.get("collection_name") if collection: collection = [collection] else: collection = doc.get("collection_names", []) collection = set(collection).difference(extracted_collections) if not collection: log.debug(f"skipping {doc} as it has already been extracted") continue try: if doc["type"] == "text": context = doc["content"] elif doc["type"] == "collection": context = query_embeddings_collection( collection_names=doc["collection_names"], query=query, k=k, r=r, embeddings_function=embeddings_function, reranking_function=reranking_function, ) else: context = query_embeddings_doc( collection_name=doc["collection_name"], query=query, k=k, r=r, embeddings_function=embeddings_function, reranking_function=reranking_function, ) except Exception as e: log.exception(e) context = None if context: relevant_contexts.append(context) extracted_collections.extend(collection) log.debug(f"relevant_contexts: {relevant_contexts}") context_string = "" for context in relevant_contexts: items = context["documents"][0] context_string += "\n\n".join(items) context_string = context_string.strip() ra_content = rag_template( template=template, context=context_string, query=query, ) log.debug(f"ra_content: {ra_content}") if content_type == "list": new_content = [] for content_item in user_message["content"]: if content_item["type"] == "text": # Update the text item's content with ra_content new_content.append({"type": "text", "text": ra_content}) else: # Keep other types of content as they are new_content.append(content_item) new_user_message = {**user_message, "content": new_content} else: new_user_message = { **user_message, "content": ra_content, } messages[last_user_message_idx] = new_user_message return messages def generate_openai_embeddings( model: str, text: str, key: str, url: str = "https://api.openai.com/v1" ): try: r = requests.post( f"{url}/embeddings", headers={ "Content-Type": "application/json", "Authorization": f"Bearer {key}", }, json={"input": text, "model": model}, ) r.raise_for_status() data = r.json() if "data" in data: return data["data"][0]["embedding"] else: raise "Something went wrong :/" except Exception as e: print(e) return None from typing import Any from langchain_core.retrievers import BaseRetriever from langchain_core.callbacks import CallbackManagerForRetrieverRun class ChromaRetriever(BaseRetriever): collection: Any embeddings_function: Any top_n: int def _get_relevant_documents( self, query: str, *, run_manager: CallbackManagerForRetrieverRun, ) -> List[Document]: query_embeddings = self.embeddings_function(query) results = self.collection.query( query_embeddings=[query_embeddings], n_results=self.top_n, ) ids = results["ids"][0] metadatas = results["metadatas"][0] documents = results["documents"][0] return [ Document( metadata=metadatas[idx], page_content=documents[idx], ) for idx in range(len(ids)) ] import operator from typing import Optional, Sequence from langchain_core.documents import BaseDocumentCompressor, Document from langchain_core.callbacks import Callbacks from langchain_core.pydantic_v1 import Extra from sentence_transformers import util class RerankCompressor(BaseDocumentCompressor): embeddings_function: Any reranking_function: Any r_score: float top_n: int class Config: extra = Extra.forbid arbitrary_types_allowed = True def compress_documents( self, documents: Sequence[Document], query: str, callbacks: Optional[Callbacks] = None, ) -> Sequence[Document]: if self.reranking_function: scores = self.reranking_function.predict( [(query, doc.page_content) for doc in documents] ) else: query_embedding = self.embeddings_function(query) document_embedding = self.embeddings_function( [doc.page_content for doc in documents] ) scores = util.cos_sim(query_embedding, document_embedding)[0] docs_with_scores = list(zip(documents, scores.tolist())) if self.r_score: docs_with_scores = [ (d, s) for d, s in docs_with_scores if s >= self.r_score ] result = sorted(docs_with_scores, key=operator.itemgetter(1), reverse=True) final_results = [] for doc, doc_score in result[: self.top_n]: metadata = doc.metadata metadata["score"] = doc_score doc = Document( page_content=doc.page_content, metadata=metadata, ) final_results.append(doc) return final_results