feat: doc tagging

This commit is contained in:
Timothy J. Baek 2024-02-03 14:44:49 -08:00
parent 8fd1b62e04
commit 00803c92f2
10 changed files with 344 additions and 108 deletions

View file

@ -128,6 +128,51 @@ class QueryCollectionsForm(BaseModel):
k: Optional[int] = 4
def merge_and_sort_query_results(query_results, k):
# Initialize lists to store combined data
combined_ids = []
combined_distances = []
combined_metadatas = []
combined_documents = []
# Combine data from each dictionary
for data in query_results:
combined_ids.extend(data["ids"][0])
combined_distances.extend(data["distances"][0])
combined_metadatas.extend(data["metadatas"][0])
combined_documents.extend(data["documents"][0])
# Create a list of tuples (distance, id, metadata, document)
combined = list(
zip(combined_distances, combined_ids, combined_metadatas, combined_documents)
)
# Sort the list based on distances
combined.sort(key=lambda x: x[0])
# Unzip the sorted list
sorted_distances, sorted_ids, sorted_metadatas, sorted_documents = zip(*combined)
# Slicing the lists to include only k elements
sorted_distances = list(sorted_distances)[:k]
sorted_ids = list(sorted_ids)[:k]
sorted_metadatas = list(sorted_metadatas)[:k]
sorted_documents = list(sorted_documents)[:k]
# Create the output dictionary
merged_query_results = {
"ids": [sorted_ids],
"distances": [sorted_distances],
"metadatas": [sorted_metadatas],
"documents": [sorted_documents],
"embeddings": None,
"uris": None,
"data": None,
}
return merged_query_results
@app.post("/query/collections")
def query_collections(
form_data: QueryCollectionsForm,
@ -147,7 +192,7 @@ def query_collections(
except:
pass
return results
return merge_and_sort_query_results(results, form_data.k)
@app.post("/web")