debeir.utils.scaler
1from typing import Dict 2 3 4def unpack_elasticsearch_scores(results) -> Dict: 5 """ 6 Helper function to retrieve the top score of documents for each topic. 7 Used in NIR weight adjustment calculation. 8 9 :param results: Raw input of results from Elasticsearch library 10 :return: 11 Returns a 1-D dictionary of {topic_num: top_score} pairs. 12 """ 13 scores = {} 14 if isinstance(results[0][0], list): 15 results = results[0] 16 17 for raw_result in results: 18 if raw_result is None: 19 continue 20 21 topic_num, result = raw_result 22 for res in result["hits"]["hits"]: 23 score = float(res["_score"]) 24 25 scores[topic_num] = score 26 27 return scores 28 29 30def get_z_value(cosine_ceiling, bm25_ceiling) -> float: 31 """ 32 Analytical solution for the normalization constant, z, used in NIR log normalization. 33 34 :param cosine_ceiling: The highest theoretical additive cosine score 35 :param bm25_ceiling: The highest BM25 score retrieved from a given topic OR an estimate. 36 :return: 37 The normalization parameter for NIR log normalization. 38 """ 39 40 assert bm25_ceiling > cosine_ceiling, "BM25 Ceiling cannot be lower than the cosine ceiling." 41 42 return bm25_ceiling ** (1 / float(cosine_ceiling)) 43 44# class Scaler: 45# def __init__(self, gold_standard, qwt, cwt): 46# self.scores = [] 47# 48# def get_norm_weight_by_query(self, qid, estimate_ceiling=False): 49# return self.get_norm_weight(self.qwt, self.cwt, bm25_ceiling=self.scores[int(qid) - 1], 50# estimate_ceiling=estimate_ceiling) 51# 52# @classmethod 53# def get_norm_weight(cls, qwt, cwt, bm25_ceiling=100, estimate_ceiling=False): 54# qw_len = len(qwt.get_all_weights()) 55# qw_non_zero = len(list(filter(lambda k: k > 0, qwt.get_all_weights()))) 56# 57# if estimate_ceiling: 58# bm25_ceiling = qw_non_zero / qw_len * bm25_ceiling 59# 60# cosine_ceiling = len(list(filter(lambda k: k > 0, cwt.get_all_weights()))) 61# 62# # Analytical solution for getting log base: 63# # n_score - log(bm25_score)/log(x) = 0 64# # Solve for x 65# return bm25_ceiling ** (1 / float(cosine_ceiling)) 66#
def
unpack_elasticsearch_scores(results) -> Dict:
5def unpack_elasticsearch_scores(results) -> Dict: 6 """ 7 Helper function to retrieve the top score of documents for each topic. 8 Used in NIR weight adjustment calculation. 9 10 :param results: Raw input of results from Elasticsearch library 11 :return: 12 Returns a 1-D dictionary of {topic_num: top_score} pairs. 13 """ 14 scores = {} 15 if isinstance(results[0][0], list): 16 results = results[0] 17 18 for raw_result in results: 19 if raw_result is None: 20 continue 21 22 topic_num, result = raw_result 23 for res in result["hits"]["hits"]: 24 score = float(res["_score"]) 25 26 scores[topic_num] = score 27 28 return scores
Helper function to retrieve the top score of documents for each topic. Used in NIR weight adjustment calculation.
Parameters
- results: Raw input of results from Elasticsearch library
Returns
Returns a 1-D dictionary of {topic_num: top_score} pairs.
def
get_z_value(cosine_ceiling, bm25_ceiling) -> float:
31def get_z_value(cosine_ceiling, bm25_ceiling) -> float: 32 """ 33 Analytical solution for the normalization constant, z, used in NIR log normalization. 34 35 :param cosine_ceiling: The highest theoretical additive cosine score 36 :param bm25_ceiling: The highest BM25 score retrieved from a given topic OR an estimate. 37 :return: 38 The normalization parameter for NIR log normalization. 39 """ 40 41 assert bm25_ceiling > cosine_ceiling, "BM25 Ceiling cannot be lower than the cosine ceiling." 42 43 return bm25_ceiling ** (1 / float(cosine_ceiling))
Analytical solution for the normalization constant, z, used in NIR log normalization.
Parameters
- cosine_ceiling: The highest theoretical additive cosine score
- bm25_ceiling: The highest BM25 score retrieved from a given topic OR an estimate.
Returns
The normalization parameter for NIR log normalization.