Are you tired of implementing, training, and running baselines?
1 from ranx import Qrels, Run, compare
2
3 qrels = Qrels(...)
4 my_run = Run(...)
5
6 # Download pre-computed runs from RanxHub
7 bm25_run = Run.from_ranxhub("bm25-run-id")
8 bert_run = Run.from_ranxhub("bert-run-id")
9
10 compare(
11 qrels=qrels,
12 runs=[bm25_run, bert_run, my_run],
13 metrics=["map@100", "mrr@10", "ndcg@10"],
14 )
15
16 # Model MAP@100 MRR@100 NDCG@10
17 --- ------ -------- -------- --------
18 a bm25 0.233 0.234 0.239
19 b bert 0.366ᵃ 0.367ᵃ 0.408ᵃ
20 c my_run 0.405ᵃᵇ 0.406ᵃᵇ 0.451ᵃᵇ