debeir.datasets.trec_covid
1from typing import Dict 2 3from debeir.core.parser import XMLParser 4from debeir.core.query import GenericElasticsearchQuery 5 6 7class TrecCovidParser(XMLParser): 8 parse_fields = ["query", "question", "narrative"] 9 topic_field_name = "topic" 10 id_field = "number" 11 12 @classmethod 13 def get_topics(cls, xmlfile) -> Dict[int, Dict[str, str]]: 14 return super().get_topics(xmlfile) 15 16 17class TrecElasticsearchQuery(GenericElasticsearchQuery): 18 def __init__(self, topics, config, *args, **kwargs): 19 super().__init__(topics, config, *args, **kwargs) 20 21 self.mappings = ["title", "abstract", "fulltext"] 22 23 self.topics = topics 24 self.config = config 25 self.query_type = self.config.query_type 26 27 self.embed_mappings = [ 28 "title_embedding", 29 "abstract_embedding", 30 "fulltext_embedding", 31 ] 32 33 self.id_mapping = "id" 34 35 self.query_funcs = { 36 "query": self.generate_query, 37 "embedding": self.generate_query_embedding, 38 }
8class TrecCovidParser(XMLParser): 9 parse_fields = ["query", "question", "narrative"] 10 topic_field_name = "topic" 11 id_field = "number" 12 13 @classmethod 14 def get_topics(cls, xmlfile) -> Dict[int, Dict[str, str]]: 15 return super().get_topics(xmlfile)
Load topics from an XML file
@classmethod
def
get_topics(cls, xmlfile) -> Dict[int, Dict[str, str]]:
13 @classmethod 14 def get_topics(cls, xmlfile) -> Dict[int, Dict[str, str]]: 15 return super().get_topics(xmlfile)
Instance method for getting topics, forwards instance self parameters to the _get_topics class method.
Inherited Members
18class TrecElasticsearchQuery(GenericElasticsearchQuery): 19 def __init__(self, topics, config, *args, **kwargs): 20 super().__init__(topics, config, *args, **kwargs) 21 22 self.mappings = ["title", "abstract", "fulltext"] 23 24 self.topics = topics 25 self.config = config 26 self.query_type = self.config.query_type 27 28 self.embed_mappings = [ 29 "title_embedding", 30 "abstract_embedding", 31 "fulltext_embedding", 32 ] 33 34 self.id_mapping = "id" 35 36 self.query_funcs = { 37 "query": self.generate_query, 38 "embedding": self.generate_query_embedding, 39 }
A generic elasticsearch query. Contains methods for NIR-style (embedding) queries and normal BM25 queries. Requires topics, configs to be included
TrecElasticsearchQuery(topics, config, *args, **kwargs)
19 def __init__(self, topics, config, *args, **kwargs): 20 super().__init__(topics, config, *args, **kwargs) 21 22 self.mappings = ["title", "abstract", "fulltext"] 23 24 self.topics = topics 25 self.config = config 26 self.query_type = self.config.query_type 27 28 self.embed_mappings = [ 29 "title_embedding", 30 "abstract_embedding", 31 "fulltext_embedding", 32 ] 33 34 self.id_mapping = "id" 35 36 self.query_funcs = { 37 "query": self.generate_query, 38 "embedding": self.generate_query_embedding, 39 }