debeir.core.config
1import abc 2import dataclasses 3import os 4from abc import ABC 5from dataclasses import dataclass 6from pathlib import Path 7from typing import Dict, List, MutableMapping, Union 8 9import loguru 10import toml 11 12 13class Config: 14 """ 15 Config Interface with creation class methods 16 """ 17 18 def __update__(self, **kwargs): 19 attrs = vars(self) 20 kwargs.update(attrs) 21 22 return kwargs 23 24 @classmethod 25 def from_toml(cls, fp: Union[str, Path], field_class, *args, **kwargs) -> 'Config': 26 """ 27 Instantiates a Config object from a toml file 28 29 :param fp: File path of the Config TOML file 30 :param field_class: Class of the Config object to be instantiated 31 :param args: Arguments to be passed to Config 32 :param kwargs: Keyword arguments to be passed 33 :return: 34 A instantiated and validated Config object. 35 """ 36 args_dict = toml.load(fp) 37 38 return cls.from_args(args_dict, field_class, *args, **kwargs) 39 40 @classmethod 41 def from_args(cls, args_dict: MutableMapping, field_class, *args, **kwargs): 42 """ 43 Instantiates a Config object from arguments 44 45 46 :param args_dict: 47 :param field_class: 48 :param args: 49 :param kwargs: 50 :return: 51 """ 52 from debeir.rankers.transformer_sent_encoder import Encoder 53 54 field_names = set(f.name for f in dataclasses.fields(field_class)) 55 obj = field_class(**{k: v for k, v in args_dict.items() if k in field_names}) 56 if hasattr(obj, 'encoder_fp') and obj.encoder_fp: 57 obj.encoder = Encoder(obj.encoder_fp, obj.encoder_normalize) 58 59 obj.validate() 60 61 return obj 62 63 @classmethod 64 def from_dict(cls, data_class, **kwargs): 65 """ 66 Instantiates a Config object from a dictionary 67 68 :param data_class: 69 :param kwargs: 70 :return: 71 """ 72 from debeir.rankers.transformer_sent_encoder import Encoder 73 74 if "encoder_fp" in kwargs and kwargs["encoder_fp"]: 75 kwargs["encoder"] = Encoder(kwargs["encoder_fp"]) 76 77 field_names = set(f.name for f in dataclasses.fields(data_class)) 78 obj = data_class(**{k: v for k, v in kwargs.items() if k in field_names}) 79 obj.validate(0) 80 81 return obj 82 83 @abc.abstractmethod 84 def validate(self): 85 """ 86 Validates if the config is correct. 87 Must be implemented by inherited classes. 88 """ 89 pass 90 91 92@dataclass(init=True, unsafe_hash=True) 93class GenericConfig(Config, ABC): 94 """ 95 Generic NIR Configuration file for which all configs will inherit 96 """ 97 query_type: str 98 index: str = None 99 encoder_normalize: bool = True 100 ablations: bool = False 101 norm_weight: float = None 102 automatic: bool = None 103 encoder: object = None 104 encoder_fp: str = None 105 query_weights: List[float] = None 106 cosine_weights: List[float] = None 107 evaluate: bool = False 108 qrels: str = None 109 config_fn: str = None 110 query_fn: str = None 111 parser_fn: str = None 112 executor_fn: str = None 113 cosine_ceiling: float = None 114 topics_path: str = None 115 return_id_only: bool = False 116 overwrite_output_if_exists: bool = False 117 output_file: str = None 118 run_name: str = None 119 120 @classmethod 121 def from_toml(cls, fp: Union[str, Path], *args, **kwargs) -> 'GenericConfig': 122 return Config.from_toml(fp, cls, *args, **kwargs) 123 124 125@dataclass(init=True) 126class _NIRMasterConfig(Config): 127 """ 128 Base NIR Master config: nir.toml 129 """ 130 metrics: Dict 131 search: Dict 132 nir: Dict 133 134 def get_metrics(self, key='common', return_as_instance=False): 135 metrics = self.metrics[key] 136 if return_as_instance: 137 return MetricsConfig.from_args(metrics, MetricsConfig) 138 139 return metrics 140 141 def get_search_engine_settings(self, key='elasticsearch', return_as_instance=False): 142 engine_settings = self.search['engines'][key] 143 if return_as_instance: 144 return ElasticsearchConfig.from_args(engine_settings, ElasticsearchConfig) 145 146 return engine_settings 147 148 def get_nir_settings(self, key='default_settings', return_as_instance=False): 149 nir_settings = self.nir[key] 150 151 if return_as_instance: 152 return NIRConfig.from_args(nir_settings, NIRConfig) 153 154 return nir_settings 155 156 def validate(self): 157 return True 158 159 160@dataclass(init=True) 161class ElasticsearchConfig(Config): 162 """ 163 Basic Elasticsearch configuration file settings from the master nir.toml file 164 """ 165 protocol: str 166 ip: str 167 port: str 168 timeout: int 169 170 def validate(self): 171 """ 172 Checks if Elasticsearch URL is correct 173 """ 174 assert self.protocol in ['http', 'https'] 175 assert self.port.isdigit() 176 177 178@dataclass(init=True) 179class SolrConfig(ElasticsearchConfig): 180 """ 181 Basic Solr configuration file settings from the master nir.toml file 182 """ 183 pass 184 185 186@dataclass(init=True) 187class MetricsConfig(Config): 188 """ 189 Basic Metrics configuration file settings from the master nir.toml file 190 """ 191 metrics: List[str] 192 193 def validate(self): 194 """ 195 Checks if each Metrics is usable by evaluator classes 196 """ 197 for metric in self.metrics: 198 assert "@" in metric 199 200 metric, depth = metric.split("@") 201 202 assert metric.isalpha() 203 assert depth.isdigit() 204 205 206@dataclass(init=True) 207class NIRConfig(Config): 208 """ 209 Basic NIR configuration file settings from the master nir.toml file 210 """ 211 norm_weight: str 212 evaluate: bool 213 return_size: int 214 output_directory: str 215 216 def validate(self): 217 return True 218 219 220def apply_config(func): 221 """ 222 Configuration decorator. 223 224 :param func: Decorated function 225 :return: 226 """ 227 228 def use_config(self, *args, **kwargs): 229 """ 230 Replaces keywords and args passed to the function with ones from self.config. 231 232 :param self: 233 :param args: To be updated 234 :param kwargs: To be updated 235 :return: 236 """ 237 if self.config is not None: 238 kwargs = self.config.__update__(**kwargs) 239 240 return func(self, *args, **kwargs) 241 242 return use_config 243 244 245def override_with_toml_config(func): 246 """ 247 Configuration decorator. Overwrite a functions kwargs and args with a specified toml config file. 248 Pass override_with_config=path/to/config 249 250 :param func: Decorated function 251 :return: 252 """ 253 254 def override_with(override_with_config_: str = None, *args, **kwargs): 255 """ 256 Replaces keywords and args passed to the function with ones from self.config. 257 258 :param override_with_config_: Path to config else None 259 :param args: To be updated 260 :param kwargs: To be updated 261 :return: 262 """ 263 264 if f"override_{func.__name__}_with_config_" in kwargs: 265 override_with_config_ = f"override_{func.__name__}_with_config_" 266 267 if override_with_config_ is not None: 268 if os.path.exists(override_with_config_): 269 toml_kwargs = toml.load(override_with_config_) 270 kwargs = kwargs.update(**toml_kwargs) 271 272 return func(*args, **kwargs) 273 274 return override_with 275 276 277def save_kwargs_to_file(func): 278 def save_kwargs(save_kwargs_to_: str = None, *args, **kwargs): 279 """ 280 Save kwargs passed to the function output_file = f"{save_kwargs_to_}_{func.__name__}.toml" 281 282 :param save_kwargs_to_: Path to save location for config else None. This should be a DIRECTORY. 283 :param args: To be updated 284 :param kwargs: To be updated 285 :return: 286 """ 287 if save_kwargs_to_ is not None: 288 os.makedirs(save_kwargs_to_, exist_ok=True) 289 290 if os.path.exists(save_kwargs_to_): 291 output_file = f"{save_kwargs_to_}/{func.__name__}.toml" 292 loguru.logger.info(f"Saving kwargs to {output_file}") 293 toml.dump(kwargs, open(output_file, "w+")) 294 295 return func(*args, **kwargs) 296 297 return save_kwargs
14class Config: 15 """ 16 Config Interface with creation class methods 17 """ 18 19 def __update__(self, **kwargs): 20 attrs = vars(self) 21 kwargs.update(attrs) 22 23 return kwargs 24 25 @classmethod 26 def from_toml(cls, fp: Union[str, Path], field_class, *args, **kwargs) -> 'Config': 27 """ 28 Instantiates a Config object from a toml file 29 30 :param fp: File path of the Config TOML file 31 :param field_class: Class of the Config object to be instantiated 32 :param args: Arguments to be passed to Config 33 :param kwargs: Keyword arguments to be passed 34 :return: 35 A instantiated and validated Config object. 36 """ 37 args_dict = toml.load(fp) 38 39 return cls.from_args(args_dict, field_class, *args, **kwargs) 40 41 @classmethod 42 def from_args(cls, args_dict: MutableMapping, field_class, *args, **kwargs): 43 """ 44 Instantiates a Config object from arguments 45 46 47 :param args_dict: 48 :param field_class: 49 :param args: 50 :param kwargs: 51 :return: 52 """ 53 from debeir.rankers.transformer_sent_encoder import Encoder 54 55 field_names = set(f.name for f in dataclasses.fields(field_class)) 56 obj = field_class(**{k: v for k, v in args_dict.items() if k in field_names}) 57 if hasattr(obj, 'encoder_fp') and obj.encoder_fp: 58 obj.encoder = Encoder(obj.encoder_fp, obj.encoder_normalize) 59 60 obj.validate() 61 62 return obj 63 64 @classmethod 65 def from_dict(cls, data_class, **kwargs): 66 """ 67 Instantiates a Config object from a dictionary 68 69 :param data_class: 70 :param kwargs: 71 :return: 72 """ 73 from debeir.rankers.transformer_sent_encoder import Encoder 74 75 if "encoder_fp" in kwargs and kwargs["encoder_fp"]: 76 kwargs["encoder"] = Encoder(kwargs["encoder_fp"]) 77 78 field_names = set(f.name for f in dataclasses.fields(data_class)) 79 obj = data_class(**{k: v for k, v in kwargs.items() if k in field_names}) 80 obj.validate(0) 81 82 return obj 83 84 @abc.abstractmethod 85 def validate(self): 86 """ 87 Validates if the config is correct. 88 Must be implemented by inherited classes. 89 """ 90 pass
Config Interface with creation class methods
25 @classmethod 26 def from_toml(cls, fp: Union[str, Path], field_class, *args, **kwargs) -> 'Config': 27 """ 28 Instantiates a Config object from a toml file 29 30 :param fp: File path of the Config TOML file 31 :param field_class: Class of the Config object to be instantiated 32 :param args: Arguments to be passed to Config 33 :param kwargs: Keyword arguments to be passed 34 :return: 35 A instantiated and validated Config object. 36 """ 37 args_dict = toml.load(fp) 38 39 return cls.from_args(args_dict, field_class, *args, **kwargs)
Instantiates a Config object from a toml file
Parameters
- fp: File path of the Config TOML file
- field_class: Class of the Config object to be instantiated
- args: Arguments to be passed to Config
- kwargs: Keyword arguments to be passed
Returns
A instantiated and validated Config object.
41 @classmethod 42 def from_args(cls, args_dict: MutableMapping, field_class, *args, **kwargs): 43 """ 44 Instantiates a Config object from arguments 45 46 47 :param args_dict: 48 :param field_class: 49 :param args: 50 :param kwargs: 51 :return: 52 """ 53 from debeir.rankers.transformer_sent_encoder import Encoder 54 55 field_names = set(f.name for f in dataclasses.fields(field_class)) 56 obj = field_class(**{k: v for k, v in args_dict.items() if k in field_names}) 57 if hasattr(obj, 'encoder_fp') and obj.encoder_fp: 58 obj.encoder = Encoder(obj.encoder_fp, obj.encoder_normalize) 59 60 obj.validate() 61 62 return obj
Instantiates a Config object from arguments
Parameters
- args_dict:
- field_class:
- args:
- kwargs:
Returns
64 @classmethod 65 def from_dict(cls, data_class, **kwargs): 66 """ 67 Instantiates a Config object from a dictionary 68 69 :param data_class: 70 :param kwargs: 71 :return: 72 """ 73 from debeir.rankers.transformer_sent_encoder import Encoder 74 75 if "encoder_fp" in kwargs and kwargs["encoder_fp"]: 76 kwargs["encoder"] = Encoder(kwargs["encoder_fp"]) 77 78 field_names = set(f.name for f in dataclasses.fields(data_class)) 79 obj = data_class(**{k: v for k, v in kwargs.items() if k in field_names}) 80 obj.validate(0) 81 82 return obj
Instantiates a Config object from a dictionary
Parameters
- data_class:
- kwargs:
Returns
93@dataclass(init=True, unsafe_hash=True) 94class GenericConfig(Config, ABC): 95 """ 96 Generic NIR Configuration file for which all configs will inherit 97 """ 98 query_type: str 99 index: str = None 100 encoder_normalize: bool = True 101 ablations: bool = False 102 norm_weight: float = None 103 automatic: bool = None 104 encoder: object = None 105 encoder_fp: str = None 106 query_weights: List[float] = None 107 cosine_weights: List[float] = None 108 evaluate: bool = False 109 qrels: str = None 110 config_fn: str = None 111 query_fn: str = None 112 parser_fn: str = None 113 executor_fn: str = None 114 cosine_ceiling: float = None 115 topics_path: str = None 116 return_id_only: bool = False 117 overwrite_output_if_exists: bool = False 118 output_file: str = None 119 run_name: str = None 120 121 @classmethod 122 def from_toml(cls, fp: Union[str, Path], *args, **kwargs) -> 'GenericConfig': 123 return Config.from_toml(fp, cls, *args, **kwargs)
Generic NIR Configuration file for which all configs will inherit
121 @classmethod 122 def from_toml(cls, fp: Union[str, Path], *args, **kwargs) -> 'GenericConfig': 123 return Config.from_toml(fp, cls, *args, **kwargs)
Instantiates a Config object from a toml file
Parameters
- fp: File path of the Config TOML file
- field_class: Class of the Config object to be instantiated
- args: Arguments to be passed to Config
- kwargs: Keyword arguments to be passed
Returns
A instantiated and validated Config object.
161@dataclass(init=True) 162class ElasticsearchConfig(Config): 163 """ 164 Basic Elasticsearch configuration file settings from the master nir.toml file 165 """ 166 protocol: str 167 ip: str 168 port: str 169 timeout: int 170 171 def validate(self): 172 """ 173 Checks if Elasticsearch URL is correct 174 """ 175 assert self.protocol in ['http', 'https'] 176 assert self.port.isdigit()
Basic Elasticsearch configuration file settings from the master nir.toml file
179@dataclass(init=True) 180class SolrConfig(ElasticsearchConfig): 181 """ 182 Basic Solr configuration file settings from the master nir.toml file 183 """ 184 pass
Basic Solr configuration file settings from the master nir.toml file
187@dataclass(init=True) 188class MetricsConfig(Config): 189 """ 190 Basic Metrics configuration file settings from the master nir.toml file 191 """ 192 metrics: List[str] 193 194 def validate(self): 195 """ 196 Checks if each Metrics is usable by evaluator classes 197 """ 198 for metric in self.metrics: 199 assert "@" in metric 200 201 metric, depth = metric.split("@") 202 203 assert metric.isalpha() 204 assert depth.isdigit()
Basic Metrics configuration file settings from the master nir.toml file
194 def validate(self): 195 """ 196 Checks if each Metrics is usable by evaluator classes 197 """ 198 for metric in self.metrics: 199 assert "@" in metric 200 201 metric, depth = metric.split("@") 202 203 assert metric.isalpha() 204 assert depth.isdigit()
Checks if each Metrics is usable by evaluator classes
207@dataclass(init=True) 208class NIRConfig(Config): 209 """ 210 Basic NIR configuration file settings from the master nir.toml file 211 """ 212 norm_weight: str 213 evaluate: bool 214 return_size: int 215 output_directory: str 216 217 def validate(self): 218 return True
Basic NIR configuration file settings from the master nir.toml file
221def apply_config(func): 222 """ 223 Configuration decorator. 224 225 :param func: Decorated function 226 :return: 227 """ 228 229 def use_config(self, *args, **kwargs): 230 """ 231 Replaces keywords and args passed to the function with ones from self.config. 232 233 :param self: 234 :param args: To be updated 235 :param kwargs: To be updated 236 :return: 237 """ 238 if self.config is not None: 239 kwargs = self.config.__update__(**kwargs) 240 241 return func(self, *args, **kwargs) 242 243 return use_config
Configuration decorator.
Parameters
- func: Decorated function
Returns
246def override_with_toml_config(func): 247 """ 248 Configuration decorator. Overwrite a functions kwargs and args with a specified toml config file. 249 Pass override_with_config=path/to/config 250 251 :param func: Decorated function 252 :return: 253 """ 254 255 def override_with(override_with_config_: str = None, *args, **kwargs): 256 """ 257 Replaces keywords and args passed to the function with ones from self.config. 258 259 :param override_with_config_: Path to config else None 260 :param args: To be updated 261 :param kwargs: To be updated 262 :return: 263 """ 264 265 if f"override_{func.__name__}_with_config_" in kwargs: 266 override_with_config_ = f"override_{func.__name__}_with_config_" 267 268 if override_with_config_ is not None: 269 if os.path.exists(override_with_config_): 270 toml_kwargs = toml.load(override_with_config_) 271 kwargs = kwargs.update(**toml_kwargs) 272 273 return func(*args, **kwargs) 274 275 return override_with
Configuration decorator. Overwrite a functions kwargs and args with a specified toml config file. Pass override_with_config=path/to/config
Parameters
- func: Decorated function
Returns
278def save_kwargs_to_file(func): 279 def save_kwargs(save_kwargs_to_: str = None, *args, **kwargs): 280 """ 281 Save kwargs passed to the function output_file = f"{save_kwargs_to_}_{func.__name__}.toml" 282 283 :param save_kwargs_to_: Path to save location for config else None. This should be a DIRECTORY. 284 :param args: To be updated 285 :param kwargs: To be updated 286 :return: 287 """ 288 if save_kwargs_to_ is not None: 289 os.makedirs(save_kwargs_to_, exist_ok=True) 290 291 if os.path.exists(save_kwargs_to_): 292 output_file = f"{save_kwargs_to_}/{func.__name__}.toml" 293 loguru.logger.info(f"Saving kwargs to {output_file}") 294 toml.dump(kwargs, open(output_file, "w+")) 295 296 return func(*args, **kwargs) 297 298 return save_kwargs