debeir.utils.utils
1import inspect 2import os 3import sys 4from collections.abc import MutableMapping 5 6import loguru 7 8 9def create_output_file(config, config_fp, remove, output_file, output_directory, **kwargs): 10 """ 11 Create output file based on config instructions 12 13 :param config: The config object with output file options. 14 :param config_fp: The config file path used in default naming options for the output file. 15 :param remove: Overwrites the output file if it exists 16 :param output_file: The output file path if it exists 17 :param output_directory: The output directory used for default naming (specified in nir config) 18 :param kwargs: Compatibility arguments 19 :return: 20 """ 21 22 if output_file is None: 23 os.makedirs(name=f"{output_directory}/{config.index}", exist_ok=True) 24 output_file = ( 25 f"{output_directory}/{config.index}/{config_fp.split('/')[-1].replace('.toml', '')}" 26 ) 27 loguru.logger.info(f"Output file not specified, writing to: {output_file}") 28 29 if os.path.exists(output_file) and not remove: 30 loguru.logger.info(f"Output file exists: {output_file}. Exiting...") 31 sys.exit(0) 32 33 if remove: 34 loguru.logger.info(f"Output file exists: {output_file}. Overwriting...") 35 open(output_file, "w+").close() 36 37 assert ( 38 config.query_type 39 ), "At least config or argument must be provided for query type" 40 41 return output_file 42 43 44async def unpack_coroutine(f): 45 """ 46 Recursively unwraps co-routines until a result is reached. 47 48 :param f: Wrapped co-routine function. 49 :return: 50 Results from the (final) evaluated co-routine. 51 """ 52 res = await f 53 while inspect.isawaitable(res): 54 res = await res 55 56 return res 57 58 59def flatten(d, parent_key="", sep="_"): 60 """ 61 62 Flattens a multidimensional dictionary (dictionary of dictionaries) to a single layer with child keys seperated by 63 "sep" 64 65 :param d: Multi-level dictionary to flatten. 66 :param parent_key: Prepend a parent_key to all layers. 67 :param sep: Seperator token between child and parent layers. 68 :return: 69 A flattened 1-D dictionary with keys seperated by *sep*. 70 """ 71 items = [] 72 for k, v in d.items(): 73 new_key = parent_key + sep + k if parent_key else k 74 if isinstance(v, MutableMapping): 75 items.extend(flatten(v, new_key, sep=sep).items()) 76 else: 77 items.append((new_key, None)) 78 return dict(items) 79 80 81def remove_excess_whitespace(s): 82 s = s.replace("\r\n\r\n", "\n") 83 s = s.replace("\r\n", " ") 84 85 # Get rid of excess whitespace efficiently 86 while ' ' in s: 87 s = s.replace(' ', ' ') 88 89 return s
def
create_output_file(config, config_fp, remove, output_file, output_directory, **kwargs):
10def create_output_file(config, config_fp, remove, output_file, output_directory, **kwargs): 11 """ 12 Create output file based on config instructions 13 14 :param config: The config object with output file options. 15 :param config_fp: The config file path used in default naming options for the output file. 16 :param remove: Overwrites the output file if it exists 17 :param output_file: The output file path if it exists 18 :param output_directory: The output directory used for default naming (specified in nir config) 19 :param kwargs: Compatibility arguments 20 :return: 21 """ 22 23 if output_file is None: 24 os.makedirs(name=f"{output_directory}/{config.index}", exist_ok=True) 25 output_file = ( 26 f"{output_directory}/{config.index}/{config_fp.split('/')[-1].replace('.toml', '')}" 27 ) 28 loguru.logger.info(f"Output file not specified, writing to: {output_file}") 29 30 if os.path.exists(output_file) and not remove: 31 loguru.logger.info(f"Output file exists: {output_file}. Exiting...") 32 sys.exit(0) 33 34 if remove: 35 loguru.logger.info(f"Output file exists: {output_file}. Overwriting...") 36 open(output_file, "w+").close() 37 38 assert ( 39 config.query_type 40 ), "At least config or argument must be provided for query type" 41 42 return output_file
Create output file based on config instructions
Parameters
- config: The config object with output file options.
- config_fp: The config file path used in default naming options for the output file.
- remove: Overwrites the output file if it exists
- output_file: The output file path if it exists
- output_directory: The output directory used for default naming (specified in nir config)
- kwargs: Compatibility arguments
Returns
async def
unpack_coroutine(f):
45async def unpack_coroutine(f): 46 """ 47 Recursively unwraps co-routines until a result is reached. 48 49 :param f: Wrapped co-routine function. 50 :return: 51 Results from the (final) evaluated co-routine. 52 """ 53 res = await f 54 while inspect.isawaitable(res): 55 res = await res 56 57 return res
Recursively unwraps co-routines until a result is reached.
Parameters
- f: Wrapped co-routine function.
Returns
Results from the (final) evaluated co-routine.
def
flatten(d, parent_key='', sep='_'):
60def flatten(d, parent_key="", sep="_"): 61 """ 62 63 Flattens a multidimensional dictionary (dictionary of dictionaries) to a single layer with child keys seperated by 64 "sep" 65 66 :param d: Multi-level dictionary to flatten. 67 :param parent_key: Prepend a parent_key to all layers. 68 :param sep: Seperator token between child and parent layers. 69 :return: 70 A flattened 1-D dictionary with keys seperated by *sep*. 71 """ 72 items = [] 73 for k, v in d.items(): 74 new_key = parent_key + sep + k if parent_key else k 75 if isinstance(v, MutableMapping): 76 items.extend(flatten(v, new_key, sep=sep).items()) 77 else: 78 items.append((new_key, None)) 79 return dict(items)
Flattens a multidimensional dictionary (dictionary of dictionaries) to a single layer with child keys seperated by "sep"
Parameters
- d: Multi-level dictionary to flatten.
- parent_key: Prepend a parent_key to all layers.
- sep: Seperator token between child and parent layers.
Returns
A flattened 1-D dictionary with keys seperated by *sep*.
def
remove_excess_whitespace(s):