# evaluate.py import requests import uuid import json import csv from ranx import Qrels, Run, evaluate # -- CONFIG ------------------------------------- INDEX_URL = "http://localhost:8082/sandi/index" SEARCH_URL = "http://localhost:8081/sandi/search" CLIENT_ID = "BEIR_TREC_COVID" DATASET_DIR = "datasets/trec-covid-beir" BATCH_SIZE = 100 TOP_K = 1000 # ----------------------------------------------- def load_corpus(path): corpus = {} with open(f"{path}/corpus.jsonl") as f: for line in f: doc = json.loads(line) corpus[doc["_id"]] = {"title": doc.get("title",""), "text": doc.get("text","")} return corpus def load_queries(path): queries = {} with open(f"{path}/queries.jsonl") as f: for line in f: q = json.loads(line) queries[q["_id"]] = q["text"] return queries def load_qrels(path, split="test"): qrels = {} with open(f"{path}/qrels/{split}.tsv") as f: reader = csv.DictReader(f, delimiter="\t") for row in reader: q_id = row["query-id"] doc_id = row["corpus-id"] score = int(row["score"]) if score > 0: qrels.setdefault(q_id, {})[doc_id] = score return qrels def index_corpus(corpus): docs = list(corpus.items()) indexed = 0 for i in range(0, len(docs), BATCH_SIZE): batch = docs[i:i + BATCH_SIZE] data = [{"id": did, "title": d["title"], "text": d["text"]} for did, d in batch] payload = {"requestId": str(uuid.uuid4()), "clientId": CLIENT_ID, "data": data} resp = requests.post(INDEX_URL, json=payload, timeout=60) resp.raise_for_status() indexed += resp.json().get("indexed", 0) print(f" Indexed {indexed}/{len(docs)}...") print(f"Done. {indexed} documents indexed.") def search_query(query_text): payload = {"requestId": str(uuid.uuid4()), "clientId": CLIENT_ID, "searchQuery": query_text, "pageSize": TOP_K, "resultFields": "id,*", "synonyms": True, "legacy": False, "rerank": True} resp = requests.post(SEARCH_URL, json=payload, timeout=30) resp.raise_for_status() results = resp.json().get("results", []) scores = {doc["id"]: 1.0 / (rank + 1) for rank, doc in enumerate(results) if doc.get("id")} return scores def main(): print("Loading dataset...") corpus = load_corpus(DATASET_DIR) queries = load_queries(DATASET_DIR) qrels = load_qrels(DATASET_DIR) print(f" Corpus: {len(corpus)}, Queries: {len(queries)}, Qrels: {len(qrels)}") print("\nIndexing...") index_corpus(corpus) print(f"\nSearching {len(queries)} queries...") run = {} for i, (q_id, q_text) in enumerate(queries.items()): run[q_id] = search_query(q_text) if (i + 1) % 100 == 0: print(f" {i+1}/{len(queries)}") print("\nEvaluating...") qrels_obj = Qrels(qrels) run_obj = Run(run) metrics = evaluate(qrels_obj, run_obj, ["ndcg@5", "ndcg@10", "recall@10", "recall@100", "precision@10", "mrr@10", "map@10"], make_comparable=True) print("\n-- Results --") for k, v in metrics.items(): print(f" {k}: {v:.4f}") if __name__ == "__main__": main()