debeir.core.config

  1import abc
  2import dataclasses
  3import os
  4from abc import ABC
  5from dataclasses import dataclass
  6from pathlib import Path
  7from typing import Dict, List, MutableMapping, Union
  8
  9import loguru
 10import toml
 11
 12
 13class Config:
 14    """
 15    Config Interface with creation class methods
 16    """
 17
 18    def __update__(self, **kwargs):
 19        attrs = vars(self)
 20        kwargs.update(attrs)
 21
 22        return kwargs
 23
 24    @classmethod
 25    def from_toml(cls, fp: Union[str, Path], field_class, *args, **kwargs) -> 'Config':
 26        """
 27        Instantiates a Config object from a toml file
 28
 29        :param fp: File path of the Config TOML file
 30        :param field_class: Class of the Config object to be instantiated
 31        :param args: Arguments to be passed to Config
 32        :param kwargs: Keyword arguments to be passed
 33        :return:
 34            A instantiated and validated Config object.
 35        """
 36        args_dict = toml.load(fp)
 37
 38        return cls.from_args(args_dict, field_class, *args, **kwargs)
 39
 40    @classmethod
 41    def from_args(cls, args_dict: MutableMapping, field_class, *args, **kwargs):
 42        """
 43        Instantiates a Config object from arguments
 44
 45
 46        :param args_dict:
 47        :param field_class:
 48        :param args:
 49        :param kwargs:
 50        :return:
 51        """
 52        from debeir.rankers.transformer_sent_encoder import Encoder
 53
 54        field_names = set(f.name for f in dataclasses.fields(field_class))
 55        obj = field_class(**{k: v for k, v in args_dict.items() if k in field_names})
 56        if hasattr(obj, 'encoder_fp') and obj.encoder_fp:
 57            obj.encoder = Encoder(obj.encoder_fp, obj.encoder_normalize)
 58
 59        obj.validate()
 60
 61        return obj
 62
 63    @classmethod
 64    def from_dict(cls, data_class, **kwargs):
 65        """
 66        Instantiates a Config object from a dictionary
 67
 68        :param data_class:
 69        :param kwargs:
 70        :return:
 71        """
 72        from debeir.rankers.transformer_sent_encoder import Encoder
 73
 74        if "encoder_fp" in kwargs and kwargs["encoder_fp"]:
 75            kwargs["encoder"] = Encoder(kwargs["encoder_fp"])
 76
 77        field_names = set(f.name for f in dataclasses.fields(data_class))
 78        obj = data_class(**{k: v for k, v in kwargs.items() if k in field_names})
 79        obj.validate(0)
 80
 81        return obj
 82
 83    @abc.abstractmethod
 84    def validate(self):
 85        """
 86        Validates if the config is correct.
 87        Must be implemented by inherited classes.
 88        """
 89        pass
 90
 91
 92@dataclass(init=True, unsafe_hash=True)
 93class GenericConfig(Config, ABC):
 94    """
 95    Generic NIR Configuration file for which all configs will inherit
 96    """
 97    query_type: str
 98    index: str = None
 99    encoder_normalize: bool = True
100    ablations: bool = False
101    norm_weight: float = None
102    automatic: bool = None
103    encoder: object = None
104    encoder_fp: str = None
105    query_weights: List[float] = None
106    cosine_weights: List[float] = None
107    evaluate: bool = False
108    qrels: str = None
109    config_fn: str = None
110    query_fn: str = None
111    parser_fn: str = None
112    executor_fn: str = None
113    cosine_ceiling: float = None
114    topics_path: str = None
115    return_id_only: bool = False
116    overwrite_output_if_exists: bool = False
117    output_file: str = None
118    run_name: str = None
119
120    @classmethod
121    def from_toml(cls, fp: Union[str, Path], *args, **kwargs) -> 'GenericConfig':
122        return Config.from_toml(fp, cls, *args, **kwargs)
123
124
125@dataclass(init=True)
126class _NIRMasterConfig(Config):
127    """
128    Base NIR Master config: nir.toml
129    """
130    metrics: Dict
131    search: Dict
132    nir: Dict
133
134    def get_metrics(self, key='common', return_as_instance=False):
135        metrics = self.metrics[key]
136        if return_as_instance:
137            return MetricsConfig.from_args(metrics, MetricsConfig)
138
139        return metrics
140
141    def get_search_engine_settings(self, key='elasticsearch', return_as_instance=False):
142        engine_settings = self.search['engines'][key]
143        if return_as_instance:
144            return ElasticsearchConfig.from_args(engine_settings, ElasticsearchConfig)
145
146        return engine_settings
147
148    def get_nir_settings(self, key='default_settings', return_as_instance=False):
149        nir_settings = self.nir[key]
150
151        if return_as_instance:
152            return NIRConfig.from_args(nir_settings, NIRConfig)
153
154        return nir_settings
155
156    def validate(self):
157        return True
158
159
160@dataclass(init=True)
161class ElasticsearchConfig(Config):
162    """
163    Basic Elasticsearch configuration file settings from the master nir.toml file
164    """
165    protocol: str
166    ip: str
167    port: str
168    timeout: int
169
170    def validate(self):
171        """
172        Checks if Elasticsearch URL is correct
173        """
174        assert self.protocol in ['http', 'https']
175        assert self.port.isdigit()
176
177
178@dataclass(init=True)
179class SolrConfig(ElasticsearchConfig):
180    """
181    Basic Solr configuration file settings from the master nir.toml file
182    """
183    pass
184
185
186@dataclass(init=True)
187class MetricsConfig(Config):
188    """
189    Basic Metrics configuration file settings from the master nir.toml file
190    """
191    metrics: List[str]
192
193    def validate(self):
194        """
195        Checks if each Metrics is usable by evaluator classes
196        """
197        for metric in self.metrics:
198            assert "@" in metric
199
200            metric, depth = metric.split("@")
201
202            assert metric.isalpha()
203            assert depth.isdigit()
204
205
206@dataclass(init=True)
207class NIRConfig(Config):
208    """
209    Basic NIR configuration file settings from the master nir.toml file
210    """
211    norm_weight: str
212    evaluate: bool
213    return_size: int
214    output_directory: str
215
216    def validate(self):
217        return True
218
219
220def apply_config(func):
221    """
222    Configuration decorator.
223
224    :param func: Decorated function
225    :return:
226    """
227
228    def use_config(self, *args, **kwargs):
229        """
230        Replaces keywords and args passed to the function with ones from self.config.
231
232        :param self:
233        :param args: To be updated
234        :param kwargs: To be updated
235        :return:
236        """
237        if self.config is not None:
238            kwargs = self.config.__update__(**kwargs)
239
240        return func(self, *args, **kwargs)
241
242    return use_config
243
244
245def override_with_toml_config(func):
246    """
247    Configuration decorator. Overwrite a functions kwargs and args with a specified toml config file.
248    Pass override_with_config=path/to/config
249
250    :param func: Decorated function
251    :return:
252    """
253
254    def override_with(override_with_config_: str = None, *args, **kwargs):
255        """
256        Replaces keywords and args passed to the function with ones from self.config.
257
258        :param override_with_config_: Path to config else None
259        :param args: To be updated
260        :param kwargs: To be updated
261        :return:
262        """
263
264        if f"override_{func.__name__}_with_config_" in kwargs:
265            override_with_config_ = f"override_{func.__name__}_with_config_"
266
267        if override_with_config_ is not None:
268            if os.path.exists(override_with_config_):
269                toml_kwargs = toml.load(override_with_config_)
270                kwargs = kwargs.update(**toml_kwargs)
271
272        return func(*args, **kwargs)
273
274    return override_with
275
276
277def save_kwargs_to_file(func):
278    def save_kwargs(save_kwargs_to_: str = None, *args, **kwargs):
279        """
280        Save kwargs passed to the function output_file = f"{save_kwargs_to_}_{func.__name__}.toml"
281
282        :param save_kwargs_to_: Path to save location for config else None. This should be a DIRECTORY.
283        :param args: To be updated
284        :param kwargs: To be updated
285        :return:
286        """
287        if save_kwargs_to_ is not None:
288            os.makedirs(save_kwargs_to_, exist_ok=True)
289
290            if os.path.exists(save_kwargs_to_):
291                output_file = f"{save_kwargs_to_}/{func.__name__}.toml"
292                loguru.logger.info(f"Saving kwargs to {output_file}")
293                toml.dump(kwargs, open(output_file, "w+"))
294
295        return func(*args, **kwargs)
296
297    return save_kwargs
class Config:
14class Config:
15    """
16    Config Interface with creation class methods
17    """
18
19    def __update__(self, **kwargs):
20        attrs = vars(self)
21        kwargs.update(attrs)
22
23        return kwargs
24
25    @classmethod
26    def from_toml(cls, fp: Union[str, Path], field_class, *args, **kwargs) -> 'Config':
27        """
28        Instantiates a Config object from a toml file
29
30        :param fp: File path of the Config TOML file
31        :param field_class: Class of the Config object to be instantiated
32        :param args: Arguments to be passed to Config
33        :param kwargs: Keyword arguments to be passed
34        :return:
35            A instantiated and validated Config object.
36        """
37        args_dict = toml.load(fp)
38
39        return cls.from_args(args_dict, field_class, *args, **kwargs)
40
41    @classmethod
42    def from_args(cls, args_dict: MutableMapping, field_class, *args, **kwargs):
43        """
44        Instantiates a Config object from arguments
45
46
47        :param args_dict:
48        :param field_class:
49        :param args:
50        :param kwargs:
51        :return:
52        """
53        from debeir.rankers.transformer_sent_encoder import Encoder
54
55        field_names = set(f.name for f in dataclasses.fields(field_class))
56        obj = field_class(**{k: v for k, v in args_dict.items() if k in field_names})
57        if hasattr(obj, 'encoder_fp') and obj.encoder_fp:
58            obj.encoder = Encoder(obj.encoder_fp, obj.encoder_normalize)
59
60        obj.validate()
61
62        return obj
63
64    @classmethod
65    def from_dict(cls, data_class, **kwargs):
66        """
67        Instantiates a Config object from a dictionary
68
69        :param data_class:
70        :param kwargs:
71        :return:
72        """
73        from debeir.rankers.transformer_sent_encoder import Encoder
74
75        if "encoder_fp" in kwargs and kwargs["encoder_fp"]:
76            kwargs["encoder"] = Encoder(kwargs["encoder_fp"])
77
78        field_names = set(f.name for f in dataclasses.fields(data_class))
79        obj = data_class(**{k: v for k, v in kwargs.items() if k in field_names})
80        obj.validate(0)
81
82        return obj
83
84    @abc.abstractmethod
85    def validate(self):
86        """
87        Validates if the config is correct.
88        Must be implemented by inherited classes.
89        """
90        pass

Config Interface with creation class methods

Config()
@classmethod
def from_toml( cls, fp: Union[str, pathlib.Path], field_class, *args, **kwargs) -> debeir.core.config.Config:
25    @classmethod
26    def from_toml(cls, fp: Union[str, Path], field_class, *args, **kwargs) -> 'Config':
27        """
28        Instantiates a Config object from a toml file
29
30        :param fp: File path of the Config TOML file
31        :param field_class: Class of the Config object to be instantiated
32        :param args: Arguments to be passed to Config
33        :param kwargs: Keyword arguments to be passed
34        :return:
35            A instantiated and validated Config object.
36        """
37        args_dict = toml.load(fp)
38
39        return cls.from_args(args_dict, field_class, *args, **kwargs)

Instantiates a Config object from a toml file

Parameters
  • fp: File path of the Config TOML file
  • field_class: Class of the Config object to be instantiated
  • args: Arguments to be passed to Config
  • kwargs: Keyword arguments to be passed
Returns
A instantiated and validated Config object.
@classmethod
def from_args(cls, args_dict: MutableMapping, field_class, *args, **kwargs):
41    @classmethod
42    def from_args(cls, args_dict: MutableMapping, field_class, *args, **kwargs):
43        """
44        Instantiates a Config object from arguments
45
46
47        :param args_dict:
48        :param field_class:
49        :param args:
50        :param kwargs:
51        :return:
52        """
53        from debeir.rankers.transformer_sent_encoder import Encoder
54
55        field_names = set(f.name for f in dataclasses.fields(field_class))
56        obj = field_class(**{k: v for k, v in args_dict.items() if k in field_names})
57        if hasattr(obj, 'encoder_fp') and obj.encoder_fp:
58            obj.encoder = Encoder(obj.encoder_fp, obj.encoder_normalize)
59
60        obj.validate()
61
62        return obj

Instantiates a Config object from arguments

Parameters
  • args_dict:
  • field_class:
  • args:
  • kwargs:
Returns
@classmethod
def from_dict(cls, data_class, **kwargs):
64    @classmethod
65    def from_dict(cls, data_class, **kwargs):
66        """
67        Instantiates a Config object from a dictionary
68
69        :param data_class:
70        :param kwargs:
71        :return:
72        """
73        from debeir.rankers.transformer_sent_encoder import Encoder
74
75        if "encoder_fp" in kwargs and kwargs["encoder_fp"]:
76            kwargs["encoder"] = Encoder(kwargs["encoder_fp"])
77
78        field_names = set(f.name for f in dataclasses.fields(data_class))
79        obj = data_class(**{k: v for k, v in kwargs.items() if k in field_names})
80        obj.validate(0)
81
82        return obj

Instantiates a Config object from a dictionary

Parameters
  • data_class:
  • kwargs:
Returns
@abc.abstractmethod
def validate(self):
84    @abc.abstractmethod
85    def validate(self):
86        """
87        Validates if the config is correct.
88        Must be implemented by inherited classes.
89        """
90        pass

Validates if the config is correct. Must be implemented by inherited classes.

@dataclass(init=True, unsafe_hash=True)
class GenericConfig(Config, abc.ABC):
 93@dataclass(init=True, unsafe_hash=True)
 94class GenericConfig(Config, ABC):
 95    """
 96    Generic NIR Configuration file for which all configs will inherit
 97    """
 98    query_type: str
 99    index: str = None
100    encoder_normalize: bool = True
101    ablations: bool = False
102    norm_weight: float = None
103    automatic: bool = None
104    encoder: object = None
105    encoder_fp: str = None
106    query_weights: List[float] = None
107    cosine_weights: List[float] = None
108    evaluate: bool = False
109    qrels: str = None
110    config_fn: str = None
111    query_fn: str = None
112    parser_fn: str = None
113    executor_fn: str = None
114    cosine_ceiling: float = None
115    topics_path: str = None
116    return_id_only: bool = False
117    overwrite_output_if_exists: bool = False
118    output_file: str = None
119    run_name: str = None
120
121    @classmethod
122    def from_toml(cls, fp: Union[str, Path], *args, **kwargs) -> 'GenericConfig':
123        return Config.from_toml(fp, cls, *args, **kwargs)

Generic NIR Configuration file for which all configs will inherit

GenericConfig( query_type: str, index: str = None, encoder_normalize: bool = True, ablations: bool = False, norm_weight: float = None, automatic: bool = None, encoder: object = None, encoder_fp: str = None, query_weights: List[float] = None, cosine_weights: List[float] = None, evaluate: bool = False, qrels: str = None, config_fn: str = None, query_fn: str = None, parser_fn: str = None, executor_fn: str = None, cosine_ceiling: float = None, topics_path: str = None, return_id_only: bool = False, overwrite_output_if_exists: bool = False, output_file: str = None, run_name: str = None)
@classmethod
def from_toml( cls, fp: Union[str, pathlib.Path], *args, **kwargs) -> debeir.core.config.GenericConfig:
121    @classmethod
122    def from_toml(cls, fp: Union[str, Path], *args, **kwargs) -> 'GenericConfig':
123        return Config.from_toml(fp, cls, *args, **kwargs)

Instantiates a Config object from a toml file

Parameters
  • fp: File path of the Config TOML file
  • field_class: Class of the Config object to be instantiated
  • args: Arguments to be passed to Config
  • kwargs: Keyword arguments to be passed
Returns
A instantiated and validated Config object.
Inherited Members
Config
from_args
from_dict
validate
@dataclass(init=True)
class ElasticsearchConfig(Config):
161@dataclass(init=True)
162class ElasticsearchConfig(Config):
163    """
164    Basic Elasticsearch configuration file settings from the master nir.toml file
165    """
166    protocol: str
167    ip: str
168    port: str
169    timeout: int
170
171    def validate(self):
172        """
173        Checks if Elasticsearch URL is correct
174        """
175        assert self.protocol in ['http', 'https']
176        assert self.port.isdigit()

Basic Elasticsearch configuration file settings from the master nir.toml file

ElasticsearchConfig(protocol: str, ip: str, port: str, timeout: int)
def validate(self):
171    def validate(self):
172        """
173        Checks if Elasticsearch URL is correct
174        """
175        assert self.protocol in ['http', 'https']
176        assert self.port.isdigit()

Checks if Elasticsearch URL is correct

Inherited Members
Config
from_toml
from_args
from_dict
@dataclass(init=True)
class SolrConfig(ElasticsearchConfig):
179@dataclass(init=True)
180class SolrConfig(ElasticsearchConfig):
181    """
182    Basic Solr configuration file settings from the master nir.toml file
183    """
184    pass

Basic Solr configuration file settings from the master nir.toml file

SolrConfig(protocol: str, ip: str, port: str, timeout: int)
@dataclass(init=True)
class MetricsConfig(Config):
187@dataclass(init=True)
188class MetricsConfig(Config):
189    """
190    Basic Metrics configuration file settings from the master nir.toml file
191    """
192    metrics: List[str]
193
194    def validate(self):
195        """
196        Checks if each Metrics is usable by evaluator classes
197        """
198        for metric in self.metrics:
199            assert "@" in metric
200
201            metric, depth = metric.split("@")
202
203            assert metric.isalpha()
204            assert depth.isdigit()

Basic Metrics configuration file settings from the master nir.toml file

MetricsConfig(metrics: List[str])
def validate(self):
194    def validate(self):
195        """
196        Checks if each Metrics is usable by evaluator classes
197        """
198        for metric in self.metrics:
199            assert "@" in metric
200
201            metric, depth = metric.split("@")
202
203            assert metric.isalpha()
204            assert depth.isdigit()

Checks if each Metrics is usable by evaluator classes

Inherited Members
Config
from_toml
from_args
from_dict
@dataclass(init=True)
class NIRConfig(Config):
207@dataclass(init=True)
208class NIRConfig(Config):
209    """
210    Basic NIR configuration file settings from the master nir.toml file
211    """
212    norm_weight: str
213    evaluate: bool
214    return_size: int
215    output_directory: str
216
217    def validate(self):
218        return True

Basic NIR configuration file settings from the master nir.toml file

NIRConfig( norm_weight: str, evaluate: bool, return_size: int, output_directory: str)
def validate(self):
217    def validate(self):
218        return True

Validates if the config is correct. Must be implemented by inherited classes.

Inherited Members
Config
from_toml
from_args
from_dict
def apply_config(func):
221def apply_config(func):
222    """
223    Configuration decorator.
224
225    :param func: Decorated function
226    :return:
227    """
228
229    def use_config(self, *args, **kwargs):
230        """
231        Replaces keywords and args passed to the function with ones from self.config.
232
233        :param self:
234        :param args: To be updated
235        :param kwargs: To be updated
236        :return:
237        """
238        if self.config is not None:
239            kwargs = self.config.__update__(**kwargs)
240
241        return func(self, *args, **kwargs)
242
243    return use_config

Configuration decorator.

Parameters
  • func: Decorated function
Returns
def override_with_toml_config(func):
246def override_with_toml_config(func):
247    """
248    Configuration decorator. Overwrite a functions kwargs and args with a specified toml config file.
249    Pass override_with_config=path/to/config
250
251    :param func: Decorated function
252    :return:
253    """
254
255    def override_with(override_with_config_: str = None, *args, **kwargs):
256        """
257        Replaces keywords and args passed to the function with ones from self.config.
258
259        :param override_with_config_: Path to config else None
260        :param args: To be updated
261        :param kwargs: To be updated
262        :return:
263        """
264
265        if f"override_{func.__name__}_with_config_" in kwargs:
266            override_with_config_ = f"override_{func.__name__}_with_config_"
267
268        if override_with_config_ is not None:
269            if os.path.exists(override_with_config_):
270                toml_kwargs = toml.load(override_with_config_)
271                kwargs = kwargs.update(**toml_kwargs)
272
273        return func(*args, **kwargs)
274
275    return override_with

Configuration decorator. Overwrite a functions kwargs and args with a specified toml config file. Pass override_with_config=path/to/config

Parameters
  • func: Decorated function
Returns
def save_kwargs_to_file(func):
278def save_kwargs_to_file(func):
279    def save_kwargs(save_kwargs_to_: str = None, *args, **kwargs):
280        """
281        Save kwargs passed to the function output_file = f"{save_kwargs_to_}_{func.__name__}.toml"
282
283        :param save_kwargs_to_: Path to save location for config else None. This should be a DIRECTORY.
284        :param args: To be updated
285        :param kwargs: To be updated
286        :return:
287        """
288        if save_kwargs_to_ is not None:
289            os.makedirs(save_kwargs_to_, exist_ok=True)
290
291            if os.path.exists(save_kwargs_to_):
292                output_file = f"{save_kwargs_to_}/{func.__name__}.toml"
293                loguru.logger.info(f"Saving kwargs to {output_file}")
294                toml.dump(kwargs, open(output_file, "w+"))
295
296        return func(*args, **kwargs)
297
298    return save_kwargs