debeir.utils.utils

 1import inspect
 2import os
 3import sys
 4from collections.abc import MutableMapping
 5
 6import loguru
 7
 8
 9def create_output_file(config, config_fp, remove, output_file, output_directory, **kwargs):
10    """
11    Create output file based on config instructions
12
13    :param config: The config object with output file options.
14    :param config_fp: The config file path used in default naming options for the output file.
15    :param remove: Overwrites the output file if it exists
16    :param output_file: The output file path if it exists
17    :param output_directory: The output directory used for default naming (specified in nir config)
18    :param kwargs: Compatibility arguments
19    :return:
20    """
21
22    if output_file is None:
23        os.makedirs(name=f"{output_directory}/{config.index}", exist_ok=True)
24        output_file = (
25            f"{output_directory}/{config.index}/{config_fp.split('/')[-1].replace('.toml', '')}"
26        )
27        loguru.logger.info(f"Output file not specified, writing to: {output_file}")
28
29    if os.path.exists(output_file) and not remove:
30        loguru.logger.info(f"Output file exists: {output_file}. Exiting...")
31        sys.exit(0)
32
33    if remove:
34        loguru.logger.info(f"Output file exists: {output_file}. Overwriting...")
35        open(output_file, "w+").close()
36
37    assert (
38        config.query_type
39    ), "At least config or argument must be provided for query type"
40
41    return output_file
42
43
44async def unpack_coroutine(f):
45    """
46    Recursively unwraps co-routines until a result is reached.
47
48    :param f: Wrapped co-routine function.
49    :return:
50        Results from the (final) evaluated co-routine.
51    """
52    res = await f
53    while inspect.isawaitable(res):
54        res = await res
55
56    return res
57
58
59def flatten(d, parent_key="", sep="_"):
60    """
61
62    Flattens a multidimensional dictionary (dictionary of dictionaries) to a single layer with child keys seperated by
63    "sep"
64
65    :param d: Multi-level dictionary to flatten.
66    :param parent_key: Prepend a parent_key to all layers.
67    :param sep: Seperator token between child and parent layers.
68    :return:
69        A flattened 1-D dictionary with keys seperated by *sep*.
70    """
71    items = []
72    for k, v in d.items():
73        new_key = parent_key + sep + k if parent_key else k
74        if isinstance(v, MutableMapping):
75            items.extend(flatten(v, new_key, sep=sep).items())
76        else:
77            items.append((new_key, None))
78    return dict(items)
79
80
81def remove_excess_whitespace(s):
82    s = s.replace("\r\n\r\n", "\n")
83    s = s.replace("\r\n", " ")
84
85    # Get rid of excess whitespace efficiently
86    while '  ' in s:
87        s = s.replace('  ', ' ')
88
89    return s
def create_output_file(config, config_fp, remove, output_file, output_directory, **kwargs):
10def create_output_file(config, config_fp, remove, output_file, output_directory, **kwargs):
11    """
12    Create output file based on config instructions
13
14    :param config: The config object with output file options.
15    :param config_fp: The config file path used in default naming options for the output file.
16    :param remove: Overwrites the output file if it exists
17    :param output_file: The output file path if it exists
18    :param output_directory: The output directory used for default naming (specified in nir config)
19    :param kwargs: Compatibility arguments
20    :return:
21    """
22
23    if output_file is None:
24        os.makedirs(name=f"{output_directory}/{config.index}", exist_ok=True)
25        output_file = (
26            f"{output_directory}/{config.index}/{config_fp.split('/')[-1].replace('.toml', '')}"
27        )
28        loguru.logger.info(f"Output file not specified, writing to: {output_file}")
29
30    if os.path.exists(output_file) and not remove:
31        loguru.logger.info(f"Output file exists: {output_file}. Exiting...")
32        sys.exit(0)
33
34    if remove:
35        loguru.logger.info(f"Output file exists: {output_file}. Overwriting...")
36        open(output_file, "w+").close()
37
38    assert (
39        config.query_type
40    ), "At least config or argument must be provided for query type"
41
42    return output_file

Create output file based on config instructions

Parameters
  • config: The config object with output file options.
  • config_fp: The config file path used in default naming options for the output file.
  • remove: Overwrites the output file if it exists
  • output_file: The output file path if it exists
  • output_directory: The output directory used for default naming (specified in nir config)
  • kwargs: Compatibility arguments
Returns
async def unpack_coroutine(f):
45async def unpack_coroutine(f):
46    """
47    Recursively unwraps co-routines until a result is reached.
48
49    :param f: Wrapped co-routine function.
50    :return:
51        Results from the (final) evaluated co-routine.
52    """
53    res = await f
54    while inspect.isawaitable(res):
55        res = await res
56
57    return res

Recursively unwraps co-routines until a result is reached.

Parameters
  • f: Wrapped co-routine function.
Returns
Results from the (final) evaluated co-routine.
def flatten(d, parent_key='', sep='_'):
60def flatten(d, parent_key="", sep="_"):
61    """
62
63    Flattens a multidimensional dictionary (dictionary of dictionaries) to a single layer with child keys seperated by
64    "sep"
65
66    :param d: Multi-level dictionary to flatten.
67    :param parent_key: Prepend a parent_key to all layers.
68    :param sep: Seperator token between child and parent layers.
69    :return:
70        A flattened 1-D dictionary with keys seperated by *sep*.
71    """
72    items = []
73    for k, v in d.items():
74        new_key = parent_key + sep + k if parent_key else k
75        if isinstance(v, MutableMapping):
76            items.extend(flatten(v, new_key, sep=sep).items())
77        else:
78            items.append((new_key, None))
79    return dict(items)

Flattens a multidimensional dictionary (dictionary of dictionaries) to a single layer with child keys seperated by "sep"

Parameters
  • d: Multi-level dictionary to flatten.
  • parent_key: Prepend a parent_key to all layers.
  • sep: Seperator token between child and parent layers.
Returns
A flattened 1-D dictionary with keys seperated by *sep*.
def remove_excess_whitespace(s):
82def remove_excess_whitespace(s):
83    s = s.replace("\r\n\r\n", "\n")
84    s = s.replace("\r\n", " ")
85
86    # Get rid of excess whitespace efficiently
87    while '  ' in s:
88        s = s.replace('  ', ' ')
89
90    return s