debeir.rankers.reranking.nir

NIR Reranker

[Insert paper link here]

 1"""
 2NIR Reranker
 3
 4[Insert paper link here]
 5"""
 6
 7import math
 8from typing import Dict, List
 9
10from debeir.core.document import Document
11from debeir.rankers.reranking.reranker import DocumentReRanker
12from debeir.rankers.transformer_sent_encoder import Encoder
13from debeir.utils import scaler
14from scipy import spatial
15from tqdm import tqdm
16
17
18class NIReRanker(DocumentReRanker):
19    """
20    Re-ranker which uses the NIR scoring method
21        score = log(bm25)/log(z) + cosine_sum
22    """
23
24    def __init__(self, query, ranked_list: List[Document], encoder: Encoder,
25                 distance_fn=spatial.distance.cosine, facets_weights: Dict = None,
26                 presort=False, fields_to_encode=None,
27                 *args, **kwargs):
28
29        if presort:
30            ranked_list.sort(key=lambda k: k.score)
31
32        super().__init__(query, ranked_list, *args, **kwargs)
33        self.encoder = encoder
34        self.top_score = self._get_top_score()
35        self.top_cosine_score = -1
36
37        self.query_vec = self.encoder(self.query)
38        self.distance_fn = distance_fn
39        self.fields_to_encode = fields_to_encode
40
41        if facets_weights:
42            self.facets_weights = facets_weights
43        else:
44            self.facets_weights = {}
45
46        # Compute all the cosine scores
47        self.pre_calc = {}
48        self.pre_calc_finished = False
49        self.log_norm = None
50
51    def _get_top_score(self):
52        return self.ranked_list[0].score
53
54    def _compute_scores_helper(self):
55        for document in tqdm(self.ranked_list, desc="Calculating cosine scores"):
56            facet_scores = {}
57            for facet in self.fields_to_encode if self.fields_to_encode else document.facets:
58                if "embedding" in facet.lower():
59                    continue
60
61                document_facet = document.facets[facet]
62                facet_weight = self.facets_weights[document_facet] if facet in self.facets_weights else 1.0
63
64                # Early exit
65                if facet_weight == 0:
66                    continue
67
68                document_vec = self.encoder(document_facet)
69                facet_scores[facet] = self.distance_fn(self.query_vec, document_vec) * facet_weight
70
71                sum_score = sum(facet_scores.values())
72                facet_scores["cosine_sum"] = sum_score
73
74                self.top_cosine_score = max(self.top_cosine_score, sum_score)
75                self.pre_calc[document.doc_id] = facet_scores
76
77        self.pre_calc_finished = True
78
79    def _compute_scores(self, document):
80        if not self.pre_calc_finished:
81            self._compute_scores_helper()
82            self.log_norm = scaler.get_z_value(self.top_cosine_score, self.top_score)
83
84        return math.log(document.score, self.log_norm) + self.pre_calc[document.doc_id]["cosine_sum"]
19class NIReRanker(DocumentReRanker):
20    """
21    Re-ranker which uses the NIR scoring method
22        score = log(bm25)/log(z) + cosine_sum
23    """
24
25    def __init__(self, query, ranked_list: List[Document], encoder: Encoder,
26                 distance_fn=spatial.distance.cosine, facets_weights: Dict = None,
27                 presort=False, fields_to_encode=None,
28                 *args, **kwargs):
29
30        if presort:
31            ranked_list.sort(key=lambda k: k.score)
32
33        super().__init__(query, ranked_list, *args, **kwargs)
34        self.encoder = encoder
35        self.top_score = self._get_top_score()
36        self.top_cosine_score = -1
37
38        self.query_vec = self.encoder(self.query)
39        self.distance_fn = distance_fn
40        self.fields_to_encode = fields_to_encode
41
42        if facets_weights:
43            self.facets_weights = facets_weights
44        else:
45            self.facets_weights = {}
46
47        # Compute all the cosine scores
48        self.pre_calc = {}
49        self.pre_calc_finished = False
50        self.log_norm = None
51
52    def _get_top_score(self):
53        return self.ranked_list[0].score
54
55    def _compute_scores_helper(self):
56        for document in tqdm(self.ranked_list, desc="Calculating cosine scores"):
57            facet_scores = {}
58            for facet in self.fields_to_encode if self.fields_to_encode else document.facets:
59                if "embedding" in facet.lower():
60                    continue
61
62                document_facet = document.facets[facet]
63                facet_weight = self.facets_weights[document_facet] if facet in self.facets_weights else 1.0
64
65                # Early exit
66                if facet_weight == 0:
67                    continue
68
69                document_vec = self.encoder(document_facet)
70                facet_scores[facet] = self.distance_fn(self.query_vec, document_vec) * facet_weight
71
72                sum_score = sum(facet_scores.values())
73                facet_scores["cosine_sum"] = sum_score
74
75                self.top_cosine_score = max(self.top_cosine_score, sum_score)
76                self.pre_calc[document.doc_id] = facet_scores
77
78        self.pre_calc_finished = True
79
80    def _compute_scores(self, document):
81        if not self.pre_calc_finished:
82            self._compute_scores_helper()
83            self.log_norm = scaler.get_z_value(self.top_cosine_score, self.top_score)
84
85        return math.log(document.score, self.log_norm) + self.pre_calc[document.doc_id]["cosine_sum"]

Re-ranker which uses the NIR scoring method score = log(bm25)/log(z) + cosine_sum

NIReRanker( query, ranked_list: List[debeir.core.document.Document], encoder: debeir.rankers.transformer_sent_encoder.Encoder, distance_fn=<function cosine>, facets_weights: Dict = None, presort=False, fields_to_encode=None, *args, **kwargs)
25    def __init__(self, query, ranked_list: List[Document], encoder: Encoder,
26                 distance_fn=spatial.distance.cosine, facets_weights: Dict = None,
27                 presort=False, fields_to_encode=None,
28                 *args, **kwargs):
29
30        if presort:
31            ranked_list.sort(key=lambda k: k.score)
32
33        super().__init__(query, ranked_list, *args, **kwargs)
34        self.encoder = encoder
35        self.top_score = self._get_top_score()
36        self.top_cosine_score = -1
37
38        self.query_vec = self.encoder(self.query)
39        self.distance_fn = distance_fn
40        self.fields_to_encode = fields_to_encode
41
42        if facets_weights:
43            self.facets_weights = facets_weights
44        else:
45            self.facets_weights = {}
46
47        # Compute all the cosine scores
48        self.pre_calc = {}
49        self.pre_calc_finished = False
50        self.log_norm = None