debeir.evaluation.residual_scoring

 1import os
 2import subprocess
 3import tempfile
 4import uuid
 5from typing import Dict, List, Union
 6
 7from debeir.evaluation.evaluator import Evaluator
 8
 9
10# Remove all documents that exist in the training set
11# Evaluate on remaining
12# Normalize for result set length, cut off at ????
13
14
15class ResidualEvaluator(Evaluator):
16    """ Residual Scoring is the scoring of a subset of documents or the residiaul. The residual is created by removing documents from the collection and qrels.
17    """
18
19    def __init__(self, qrels: str, metrics: List[str], filter_ids: Dict[str, List[str]]):
20        """ 
21        Args:
22            qrels (str): Path to qrels 
23            metrics (List[str]): A list of metrics with depth e.g. NDCG@1000
24            filter_ids (Dict[str, List[str]]): A list of IDs to remove from the collection given by Dict[Topic_num, [Docids]]
25        """
26        super().__init__(qrels, metrics)
27        self.qrels_fp = qrels
28        self.filter_ids = filter_ids
29
30    def _filter_run(self, res: str):
31        if self.filter_ids is None:
32            return res
33
34        tmpdir = tempfile.mkdtemp()
35        tmpfp = os.path.join(tmpdir, str(uuid.uuid4()))
36
37        writer = open(tmpfp, 'w+')
38
39        with open(res) as out_file:
40            for line in out_file:
41                topic_num, _, doc_id, _, _, _ = line.split()
42                if doc_id in self.filter_ids[topic_num]:
43                    continue
44
45                writer.write(line)
46
47        writer.close()
48
49        return tmpfp
50
51    def evaluate_runs(self, res: Union[str, List[str]], with_trec_binary=False, **kwargs):
52        """ Run the residual evaluation for the runs
53
54        :param res: The results to run the evaluator against
55        :param with_trec_binary: Use the TREC C binary instead of the default Python library, defaults to False
56        :return: A dictionary of supplied metrics of the results against the qrels 
57        """
58        if with_trec_binary:
59            return self._evaluate_with_binary(res, **kwargs)
60
61        fp = self._filter_run(res)
62
63        return super().evaluate_runs(fp, **kwargs)
64
65    def _evaluate_with_binary(self, res, **kwargs):
66        fp = self._filter_run(res)
67
68        output = subprocess.check_output(["trec_eval", self.qrels_fp, fp]).decode()
69
70        metrics = {}
71
72        for line in str(output).split("\n"):
73            try:
74                metric, _, value = line.split()
75                metrics[metric] = value
76            except:
77                continue
78
79        return metrics
class ResidualEvaluator(debeir.evaluation.evaluator.Evaluator):
16class ResidualEvaluator(Evaluator):
17    """ Residual Scoring is the scoring of a subset of documents or the residiaul. The residual is created by removing documents from the collection and qrels.
18    """
19
20    def __init__(self, qrels: str, metrics: List[str], filter_ids: Dict[str, List[str]]):
21        """ 
22        Args:
23            qrels (str): Path to qrels 
24            metrics (List[str]): A list of metrics with depth e.g. NDCG@1000
25            filter_ids (Dict[str, List[str]]): A list of IDs to remove from the collection given by Dict[Topic_num, [Docids]]
26        """
27        super().__init__(qrels, metrics)
28        self.qrels_fp = qrels
29        self.filter_ids = filter_ids
30
31    def _filter_run(self, res: str):
32        if self.filter_ids is None:
33            return res
34
35        tmpdir = tempfile.mkdtemp()
36        tmpfp = os.path.join(tmpdir, str(uuid.uuid4()))
37
38        writer = open(tmpfp, 'w+')
39
40        with open(res) as out_file:
41            for line in out_file:
42                topic_num, _, doc_id, _, _, _ = line.split()
43                if doc_id in self.filter_ids[topic_num]:
44                    continue
45
46                writer.write(line)
47
48        writer.close()
49
50        return tmpfp
51
52    def evaluate_runs(self, res: Union[str, List[str]], with_trec_binary=False, **kwargs):
53        """ Run the residual evaluation for the runs
54
55        :param res: The results to run the evaluator against
56        :param with_trec_binary: Use the TREC C binary instead of the default Python library, defaults to False
57        :return: A dictionary of supplied metrics of the results against the qrels 
58        """
59        if with_trec_binary:
60            return self._evaluate_with_binary(res, **kwargs)
61
62        fp = self._filter_run(res)
63
64        return super().evaluate_runs(fp, **kwargs)
65
66    def _evaluate_with_binary(self, res, **kwargs):
67        fp = self._filter_run(res)
68
69        output = subprocess.check_output(["trec_eval", self.qrels_fp, fp]).decode()
70
71        metrics = {}
72
73        for line in str(output).split("\n"):
74            try:
75                metric, _, value = line.split()
76                metrics[metric] = value
77            except:
78                continue
79
80        return metrics

Residual Scoring is the scoring of a subset of documents or the residiaul. The residual is created by removing documents from the collection and qrels.

ResidualEvaluator(qrels: str, metrics: List[str], filter_ids: Dict[str, List[str]])
20    def __init__(self, qrels: str, metrics: List[str], filter_ids: Dict[str, List[str]]):
21        """ 
22        Args:
23            qrels (str): Path to qrels 
24            metrics (List[str]): A list of metrics with depth e.g. NDCG@1000
25            filter_ids (Dict[str, List[str]]): A list of IDs to remove from the collection given by Dict[Topic_num, [Docids]]
26        """
27        super().__init__(qrels, metrics)
28        self.qrels_fp = qrels
29        self.filter_ids = filter_ids

Args: qrels (str): Path to qrels metrics (List[str]): A list of metrics with depth e.g. NDCG@1000 filter_ids (Dict[str, List[str]]): A list of IDs to remove from the collection given by Dict[Topic_num, [Docids]]

def evaluate_runs(self, res: Union[str, List[str]], with_trec_binary=False, **kwargs):
52    def evaluate_runs(self, res: Union[str, List[str]], with_trec_binary=False, **kwargs):
53        """ Run the residual evaluation for the runs
54
55        :param res: The results to run the evaluator against
56        :param with_trec_binary: Use the TREC C binary instead of the default Python library, defaults to False
57        :return: A dictionary of supplied metrics of the results against the qrels 
58        """
59        if with_trec_binary:
60            return self._evaluate_with_binary(res, **kwargs)
61
62        fp = self._filter_run(res)
63
64        return super().evaluate_runs(fp, **kwargs)

Run the residual evaluation for the runs

Parameters
  • res: The results to run the evaluator against
  • with_trec_binary: Use the TREC C binary instead of the default Python library, defaults to False
Returns

A dictionary of supplied metrics of the results against the qrels