debeir.datasets.trec_covid

 1from typing import Dict
 2
 3from debeir.core.parser import XMLParser
 4from debeir.core.query import GenericElasticsearchQuery
 5
 6
 7class TrecCovidParser(XMLParser):
 8    parse_fields = ["query", "question", "narrative"]
 9    topic_field_name = "topic"
10    id_field = "number"
11
12    @classmethod
13    def get_topics(cls, xmlfile) -> Dict[int, Dict[str, str]]:
14        return super().get_topics(xmlfile)
15
16
17class TrecElasticsearchQuery(GenericElasticsearchQuery):
18    def __init__(self, topics, config, *args, **kwargs):
19        super().__init__(topics, config, *args, **kwargs)
20
21        self.mappings = ["title", "abstract", "fulltext"]
22
23        self.topics = topics
24        self.config = config
25        self.query_type = self.config.query_type
26
27        self.embed_mappings = [
28            "title_embedding",
29            "abstract_embedding",
30            "fulltext_embedding",
31        ]
32
33        self.id_mapping = "id"
34
35        self.query_funcs = {
36            "query": self.generate_query,
37            "embedding": self.generate_query_embedding,
38        }
class TrecCovidParser(debeir.core.parser.XMLParser):
 8class TrecCovidParser(XMLParser):
 9    parse_fields = ["query", "question", "narrative"]
10    topic_field_name = "topic"
11    id_field = "number"
12
13    @classmethod
14    def get_topics(cls, xmlfile) -> Dict[int, Dict[str, str]]:
15        return super().get_topics(xmlfile)

Load topics from an XML file

@classmethod
def get_topics(cls, xmlfile) -> Dict[int, Dict[str, str]]:
13    @classmethod
14    def get_topics(cls, xmlfile) -> Dict[int, Dict[str, str]]:
15        return super().get_topics(xmlfile)

Instance method for getting topics, forwards instance self parameters to the _get_topics class method.

class TrecElasticsearchQuery(debeir.core.query.GenericElasticsearchQuery):
18class TrecElasticsearchQuery(GenericElasticsearchQuery):
19    def __init__(self, topics, config, *args, **kwargs):
20        super().__init__(topics, config, *args, **kwargs)
21
22        self.mappings = ["title", "abstract", "fulltext"]
23
24        self.topics = topics
25        self.config = config
26        self.query_type = self.config.query_type
27
28        self.embed_mappings = [
29            "title_embedding",
30            "abstract_embedding",
31            "fulltext_embedding",
32        ]
33
34        self.id_mapping = "id"
35
36        self.query_funcs = {
37            "query": self.generate_query,
38            "embedding": self.generate_query_embedding,
39        }

A generic elasticsearch query. Contains methods for NIR-style (embedding) queries and normal BM25 queries. Requires topics, configs to be included

TrecElasticsearchQuery(topics, config, *args, **kwargs)
19    def __init__(self, topics, config, *args, **kwargs):
20        super().__init__(topics, config, *args, **kwargs)
21
22        self.mappings = ["title", "abstract", "fulltext"]
23
24        self.topics = topics
25        self.config = config
26        self.query_type = self.config.query_type
27
28        self.embed_mappings = [
29            "title_embedding",
30            "abstract_embedding",
31            "fulltext_embedding",
32        ]
33
34        self.id_mapping = "id"
35
36        self.query_funcs = {
37            "query": self.generate_query,
38            "embedding": self.generate_query_embedding,
39        }