debeir.utils.scaler

 1from typing import Dict
 2
 3
 4def unpack_elasticsearch_scores(results) -> Dict:
 5    """
 6    Helper function to retrieve the top score of documents for each topic.
 7    Used in NIR weight adjustment calculation.
 8
 9    :param results: Raw input of results from Elasticsearch library
10    :return:
11        Returns a 1-D dictionary of {topic_num: top_score} pairs.
12    """
13    scores = {}
14    if isinstance(results[0][0], list):
15        results = results[0]
16
17    for raw_result in results:
18        if raw_result is None:
19            continue
20
21        topic_num, result = raw_result
22        for res in result["hits"]["hits"]:
23            score = float(res["_score"])
24
25            scores[topic_num] = score
26
27    return scores
28
29
30def get_z_value(cosine_ceiling, bm25_ceiling) -> float:
31    """
32    Analytical solution for the normalization constant, z, used in NIR log normalization.
33
34    :param cosine_ceiling: The highest theoretical additive cosine score
35    :param bm25_ceiling: The highest BM25 score retrieved from a given topic OR an estimate.
36    :return:
37        The normalization parameter for NIR log normalization.
38    """
39
40    assert bm25_ceiling > cosine_ceiling, "BM25 Ceiling cannot be lower than the cosine ceiling."
41
42    return bm25_ceiling ** (1 / float(cosine_ceiling))
43
44# class Scaler:
45#    def __init__(self, gold_standard, qwt, cwt):
46#        self.scores = []
47#
48#    def get_norm_weight_by_query(self, qid, estimate_ceiling=False):
49#        return self.get_norm_weight(self.qwt, self.cwt, bm25_ceiling=self.scores[int(qid) - 1],
50#                                    estimate_ceiling=estimate_ceiling)
51#
52#    @classmethod
53#    def get_norm_weight(cls, qwt, cwt, bm25_ceiling=100, estimate_ceiling=False):
54#        qw_len = len(qwt.get_all_weights())
55#        qw_non_zero = len(list(filter(lambda k: k > 0, qwt.get_all_weights())))
56#
57#        if estimate_ceiling:
58#            bm25_ceiling = qw_non_zero / qw_len * bm25_ceiling
59#
60#        cosine_ceiling = len(list(filter(lambda k: k > 0, cwt.get_all_weights())))
61#
62#        # Analytical solution for getting log base:
63#        # n_score - log(bm25_score)/log(x) = 0
64#        # Solve for x
65#        return bm25_ceiling ** (1 / float(cosine_ceiling))
66#
def unpack_elasticsearch_scores(results) -> Dict:
 5def unpack_elasticsearch_scores(results) -> Dict:
 6    """
 7    Helper function to retrieve the top score of documents for each topic.
 8    Used in NIR weight adjustment calculation.
 9
10    :param results: Raw input of results from Elasticsearch library
11    :return:
12        Returns a 1-D dictionary of {topic_num: top_score} pairs.
13    """
14    scores = {}
15    if isinstance(results[0][0], list):
16        results = results[0]
17
18    for raw_result in results:
19        if raw_result is None:
20            continue
21
22        topic_num, result = raw_result
23        for res in result["hits"]["hits"]:
24            score = float(res["_score"])
25
26            scores[topic_num] = score
27
28    return scores

Helper function to retrieve the top score of documents for each topic. Used in NIR weight adjustment calculation.

Parameters
  • results: Raw input of results from Elasticsearch library
Returns
Returns a 1-D dictionary of {topic_num: top_score} pairs.
def get_z_value(cosine_ceiling, bm25_ceiling) -> float:
31def get_z_value(cosine_ceiling, bm25_ceiling) -> float:
32    """
33    Analytical solution for the normalization constant, z, used in NIR log normalization.
34
35    :param cosine_ceiling: The highest theoretical additive cosine score
36    :param bm25_ceiling: The highest BM25 score retrieved from a given topic OR an estimate.
37    :return:
38        The normalization parameter for NIR log normalization.
39    """
40
41    assert bm25_ceiling > cosine_ceiling, "BM25 Ceiling cannot be lower than the cosine ceiling."
42
43    return bm25_ceiling ** (1 / float(cosine_ceiling))

Analytical solution for the normalization constant, z, used in NIR log normalization.

Parameters
  • cosine_ceiling: The highest theoretical additive cosine score
  • bm25_ceiling: The highest BM25 score retrieved from a given topic OR an estimate.
Returns
The normalization parameter for NIR log normalization.