debeir.datasets.factory
1from pathlib import Path 2from typing import Dict, Type, Union 3 4import toml 5from debeir.datasets.bioreddit import BioRedditCommentParser, BioRedditSubmissionParser 6from debeir.datasets.clinical_trials import ClinicalTrialParser, ClinicalTrialsElasticsearchExecutor, \ 7 TrialsElasticsearchQuery, TrialsQueryConfig 8from debeir.datasets.marco import MarcoElasticsearchExecutor, MarcoQueryConfig 9from debeir.datasets.trec_clinical_trials import TrecClincialElasticsearchQuery, TrecClinicalTrialsParser 10from debeir.datasets.trec_covid import TrecCovidParser, TrecElasticsearchQuery 11from debeir.evaluation.evaluator import Evaluator 12from debeir.evaluation.residual_scoring import ResidualEvaluator 13from debeir.core.config import Config, ElasticsearchConfig, GenericConfig, MetricsConfig, NIRConfig, SolrConfig, \ 14 _NIRMasterConfig 15from debeir.core.executor import GenericElasticsearchExecutor 16from debeir.core.parser import ( 17 CSVParser, Parser, TSVParser, 18) 19from debeir.core.query import GenericElasticsearchQuery, Query 20 21str_to_config_cls = { 22 "clinical_trials": TrialsQueryConfig, 23 "test_trials": TrialsQueryConfig, 24 "med-marco": MarcoQueryConfig, 25 "generic": MarcoQueryConfig, 26} 27 28query_factory = { 29 "clinical_trials": TrialsElasticsearchQuery, 30 "test_trials": TrialsElasticsearchQuery, 31 "generic": GenericElasticsearchQuery, 32 "trec_covid": TrecElasticsearchQuery, 33 "trec_clinical": TrecClincialElasticsearchQuery, 34} 35 36parser_factory = { 37 "trec_covid": TrecCovidParser, 38 "bioreddit-comment": BioRedditCommentParser, 39 "bioreddit-submission": BioRedditSubmissionParser, 40 "test_trials": ClinicalTrialParser, 41 "med-marco": CSVParser, 42 "tsv": TSVParser, 43 "trec_clinical": TrecClinicalTrialsParser 44} 45 46executor_factory = { 47 "clinical": ClinicalTrialsElasticsearchExecutor, 48 "med-marco": MarcoElasticsearchExecutor, 49 "generic": GenericElasticsearchExecutor, 50} 51 52evaluator_factory = { 53 "residual": ResidualEvaluator, 54 "trec": Evaluator, 55} 56 57 58def get_index_name(config_fp): 59 """ 60 Get the index name from the config without parsing as a TOML 61 62 :param config_fp: 63 :return: 64 """ 65 with open(config_fp, "r") as reader: 66 for line in reader: 67 if line.startswith("index"): 68 line = line.replace('"', "") 69 return line.split("=")[-1].strip() 70 return None 71 72 73def factory_fn(config_fp, index=None) -> (Query, GenericConfig, 74 Parser, GenericElasticsearchExecutor, Evaluator): 75 """ 76 Factory method for creating the parsed topics, config object, query object and query executor object 77 78 :param config_fp: Config file path 79 :param index: Index to search 80 :return: 81 Query, Config, Parser, Executor, Evaluator 82 """ 83 config = config_factory(config_fp) 84 assert config.index is not None 85 query_cls = query_factory[config.query_fn] 86 parser = parser_factory[config.parser_fn] 87 executor = executor_factory[config.executor_fn] 88 89 return query_cls, config, parser, executor 90 91 92def config_factory(path: Union[str, Path] = None, config_cls: Type[Config] = None, args_dict: Dict = None): 93 """ 94 Factory method for creating configs 95 96 :param path: Config path 97 :param config_cls: Config class to instantiate 98 :param args_dict: Arguments to consider 99 :return: 100 A config object 101 """ 102 if path: 103 args_dict = toml.load(path) 104 105 if not config_cls: 106 if "config_fn" in args_dict: 107 config_cls = str_to_config_cls[args_dict["config_fn"]] 108 else: 109 raise NotImplementedError() 110 111 return config_cls.from_args(args_dict, config_cls) 112 113 114def get_nir_config(nir_config, *args, ignore_errors=False, **kwargs): 115 main_config = config_factory(nir_config, config_cls=_NIRMasterConfig) 116 search_engine_config = None 117 118 supported_search_engines = {"solr": SolrConfig, 119 "elasticsearch": ElasticsearchConfig} 120 121 search_engine_config = None 122 123 if 'engine' in kwargs and kwargs['engine'] in supported_search_engines: 124 search_engine = kwargs['engine'] 125 search_engine_config = config_factory(args_dict=main_config.get_search_engine_settings(search_engine), 126 config_cls=supported_search_engines[search_engine]) 127 128 # for search_engine in supported_search_engines: 129 # if search_engine in kwargs and kwargs[search_engine] and kwargs['engine'] == search_engine: 130 # search_engine_config = config_factory(args_dict=main_config.get_search_engine_settings(search_engine), 131 # config_cls=supported_search_engines[search_engine]) 132 133 if not ignore_errors and search_engine_config is None: 134 raise RuntimeError("Unable to get a search engine configuration.") 135 136 metrics_config = config_factory(args_dict=main_config.get_metrics(), config_cls=MetricsConfig) 137 nir_config = config_factory(args_dict=main_config.get_nir_settings(), config_cls=NIRConfig) 138 139 return nir_config, search_engine_config, metrics_config 140 141 142def apply_nir_config(func): 143 """ 144 Decorator that applies the NIR config settings to the current function 145 Replaces arguments and keywords arguments with those found in the config 146 147 :param func: 148 :return: 149 """ 150 151 def parse_nir_config(*args, ignore_errors=False, **kwargs): 152 """ 153 Parses the NIR config for the different setting groups: Search Engine, Metrics and NIR settings 154 Applies these settings to the current function 155 :param ignore_errors: 156 :param args: 157 :param kwargs: 158 :return: 159 """ 160 161 nir_config, search_engine_config, metrics_config = get_nir_config(*args, 162 ignore_errors, 163 **kwargs) 164 165 kwargs = nir_config.__update__( 166 **search_engine_config.__update__( 167 **metrics_config.__update__(**kwargs) 168 ) 169 ) 170 171 return func(*args, **kwargs) 172 173 return parse_nir_config
def
get_index_name(config_fp):
59def get_index_name(config_fp): 60 """ 61 Get the index name from the config without parsing as a TOML 62 63 :param config_fp: 64 :return: 65 """ 66 with open(config_fp, "r") as reader: 67 for line in reader: 68 if line.startswith("index"): 69 line = line.replace('"', "") 70 return line.split("=")[-1].strip() 71 return None
Get the index name from the config without parsing as a TOML
Parameters
- config_fp:
Returns
def
factory_fn( config_fp, index=None) -> (<class 'debeir.core.query.Query'>, <class 'debeir.core.config.GenericConfig'>, <class 'debeir.core.parser.Parser'>, <class 'debeir.core.executor.GenericElasticsearchExecutor'>, <class 'debeir.evaluation.evaluator.Evaluator'>):
74def factory_fn(config_fp, index=None) -> (Query, GenericConfig, 75 Parser, GenericElasticsearchExecutor, Evaluator): 76 """ 77 Factory method for creating the parsed topics, config object, query object and query executor object 78 79 :param config_fp: Config file path 80 :param index: Index to search 81 :return: 82 Query, Config, Parser, Executor, Evaluator 83 """ 84 config = config_factory(config_fp) 85 assert config.index is not None 86 query_cls = query_factory[config.query_fn] 87 parser = parser_factory[config.parser_fn] 88 executor = executor_factory[config.executor_fn] 89 90 return query_cls, config, parser, executor
Factory method for creating the parsed topics, config object, query object and query executor object
Parameters
- config_fp: Config file path
- index: Index to search
Returns
Query, Config, Parser, Executor, Evaluator
def
config_factory( path: Union[str, pathlib.Path] = None, config_cls: Type[debeir.core.config.Config] = None, args_dict: Dict = None):
93def config_factory(path: Union[str, Path] = None, config_cls: Type[Config] = None, args_dict: Dict = None): 94 """ 95 Factory method for creating configs 96 97 :param path: Config path 98 :param config_cls: Config class to instantiate 99 :param args_dict: Arguments to consider 100 :return: 101 A config object 102 """ 103 if path: 104 args_dict = toml.load(path) 105 106 if not config_cls: 107 if "config_fn" in args_dict: 108 config_cls = str_to_config_cls[args_dict["config_fn"]] 109 else: 110 raise NotImplementedError() 111 112 return config_cls.from_args(args_dict, config_cls)
Factory method for creating configs
Parameters
- path: Config path
- config_cls: Config class to instantiate
- args_dict: Arguments to consider
Returns
A config object
def
get_nir_config(nir_config, *args, ignore_errors=False, **kwargs):
115def get_nir_config(nir_config, *args, ignore_errors=False, **kwargs): 116 main_config = config_factory(nir_config, config_cls=_NIRMasterConfig) 117 search_engine_config = None 118 119 supported_search_engines = {"solr": SolrConfig, 120 "elasticsearch": ElasticsearchConfig} 121 122 search_engine_config = None 123 124 if 'engine' in kwargs and kwargs['engine'] in supported_search_engines: 125 search_engine = kwargs['engine'] 126 search_engine_config = config_factory(args_dict=main_config.get_search_engine_settings(search_engine), 127 config_cls=supported_search_engines[search_engine]) 128 129 # for search_engine in supported_search_engines: 130 # if search_engine in kwargs and kwargs[search_engine] and kwargs['engine'] == search_engine: 131 # search_engine_config = config_factory(args_dict=main_config.get_search_engine_settings(search_engine), 132 # config_cls=supported_search_engines[search_engine]) 133 134 if not ignore_errors and search_engine_config is None: 135 raise RuntimeError("Unable to get a search engine configuration.") 136 137 metrics_config = config_factory(args_dict=main_config.get_metrics(), config_cls=MetricsConfig) 138 nir_config = config_factory(args_dict=main_config.get_nir_settings(), config_cls=NIRConfig) 139 140 return nir_config, search_engine_config, metrics_config
def
apply_nir_config(func):
143def apply_nir_config(func): 144 """ 145 Decorator that applies the NIR config settings to the current function 146 Replaces arguments and keywords arguments with those found in the config 147 148 :param func: 149 :return: 150 """ 151 152 def parse_nir_config(*args, ignore_errors=False, **kwargs): 153 """ 154 Parses the NIR config for the different setting groups: Search Engine, Metrics and NIR settings 155 Applies these settings to the current function 156 :param ignore_errors: 157 :param args: 158 :param kwargs: 159 :return: 160 """ 161 162 nir_config, search_engine_config, metrics_config = get_nir_config(*args, 163 ignore_errors, 164 **kwargs) 165 166 kwargs = nir_config.__update__( 167 **search_engine_config.__update__( 168 **metrics_config.__update__(**kwargs) 169 ) 170 ) 171 172 return func(*args, **kwargs) 173 174 return parse_nir_config
Decorator that applies the NIR config settings to the current function Replaces arguments and keywords arguments with those found in the config
Parameters
- func: