debeir.datasets.types

 1import string
 2from collections import defaultdict
 3from enum import Enum
 4from typing import List, Union
 5
 6
 7class InputExample:
 8    """
 9    Copied from Sentence Transformer Library
10    Structure for one input example with texts, the label and a unique id
11    """
12
13    def __init__(self, guid: str = '', texts: List[str] = None, label: Union[int, float] = 0):
14        """
15        Creates one InputExample with the given texts, guid and label
16
17        :param guid
18            id for the example
19        :param texts
20            the texts for the example. Note, str.strip() is called on the texts
21        :param label
22            the label for the example
23        """
24        self.guid = guid
25        self.texts = [text.strip() for text in texts]
26        self.label = label
27
28    def __str__(self):
29        return "<InputExample> label: {}, texts: {}".format(str(self.label), "; ".join(self.texts))
30
31    def get_label(self):
32        return self.label
33
34    # def __getattr__(self, key):
35    #    if key == "label":
36    #        return self.get_label()
37
38    #    if key == "texts":
39    #        return self.texts
40
41    #    if key in ["guid", "id"]:
42    #        return self.guid
43
44    #    raise KeyError()
45
46    @classmethod
47    def to_dict(cls, data: List['InputExample']):
48        text_len = len(data[0].texts)
49        processed_data = defaultdict(lambda: [])
50
51        for datum in data:
52            # string.ascii_lowercase
53
54            processed_data["id"].append(datum.guid)
55            processed_data["label"].append(datum.get_label())
56
57            for i in range(text_len):
58                letter = string.ascii_lowercase[i]  # abcdefghi
59                # processed_data[text_a] = ...
60                processed_data[f"text_{letter}"].append(datum.texts[i])
61
62        return processed_data
63
64    @classmethod
65    def from_parser_output(cls, data):
66        pass
67
68
69class RelevanceExample(InputExample):
70    """
71    Converts Relevance Labels to 0 - 1
72    """
73
74    def __init__(self, max_score=2, *args, **kwargs):
75        super().__init__(*args, **kwargs)
76        self.max_score = max_score
77
78    def get_label(self):
79        return self.relevance()
80
81    def relevance(self):
82        """
83        :return:
84            Returns a normalised score for relevance between 0 - 1
85        """
86        return self.label / self.max_score
87
88
89class DatasetTypes(Enum):
90    """
91    A collection of common dataset types that is usable in the library.
92    """
93    List: "List"
94    ListInputExample: "ListInputExample"
95    ListDict: "ListDict"
96    HuggingfaceDataset: "HuggingfaceDataset"
class InputExample:
 8class InputExample:
 9    """
10    Copied from Sentence Transformer Library
11    Structure for one input example with texts, the label and a unique id
12    """
13
14    def __init__(self, guid: str = '', texts: List[str] = None, label: Union[int, float] = 0):
15        """
16        Creates one InputExample with the given texts, guid and label
17
18        :param guid
19            id for the example
20        :param texts
21            the texts for the example. Note, str.strip() is called on the texts
22        :param label
23            the label for the example
24        """
25        self.guid = guid
26        self.texts = [text.strip() for text in texts]
27        self.label = label
28
29    def __str__(self):
30        return "<InputExample> label: {}, texts: {}".format(str(self.label), "; ".join(self.texts))
31
32    def get_label(self):
33        return self.label
34
35    # def __getattr__(self, key):
36    #    if key == "label":
37    #        return self.get_label()
38
39    #    if key == "texts":
40    #        return self.texts
41
42    #    if key in ["guid", "id"]:
43    #        return self.guid
44
45    #    raise KeyError()
46
47    @classmethod
48    def to_dict(cls, data: List['InputExample']):
49        text_len = len(data[0].texts)
50        processed_data = defaultdict(lambda: [])
51
52        for datum in data:
53            # string.ascii_lowercase
54
55            processed_data["id"].append(datum.guid)
56            processed_data["label"].append(datum.get_label())
57
58            for i in range(text_len):
59                letter = string.ascii_lowercase[i]  # abcdefghi
60                # processed_data[text_a] = ...
61                processed_data[f"text_{letter}"].append(datum.texts[i])
62
63        return processed_data
64
65    @classmethod
66    def from_parser_output(cls, data):
67        pass

Copied from Sentence Transformer Library Structure for one input example with texts, the label and a unique id

InputExample( guid: str = '', texts: List[str] = None, label: Union[int, float] = 0)
14    def __init__(self, guid: str = '', texts: List[str] = None, label: Union[int, float] = 0):
15        """
16        Creates one InputExample with the given texts, guid and label
17
18        :param guid
19            id for the example
20        :param texts
21            the texts for the example. Note, str.strip() is called on the texts
22        :param label
23            the label for the example
24        """
25        self.guid = guid
26        self.texts = [text.strip() for text in texts]
27        self.label = label

Creates one InputExample with the given texts, guid and label

:param guid id for the example :param texts the texts for the example. Note, str.strip() is called on the texts :param label the label for the example

def get_label(self):
32    def get_label(self):
33        return self.label
@classmethod
def to_dict(cls, data: List[debeir.datasets.types.InputExample]):
47    @classmethod
48    def to_dict(cls, data: List['InputExample']):
49        text_len = len(data[0].texts)
50        processed_data = defaultdict(lambda: [])
51
52        for datum in data:
53            # string.ascii_lowercase
54
55            processed_data["id"].append(datum.guid)
56            processed_data["label"].append(datum.get_label())
57
58            for i in range(text_len):
59                letter = string.ascii_lowercase[i]  # abcdefghi
60                # processed_data[text_a] = ...
61                processed_data[f"text_{letter}"].append(datum.texts[i])
62
63        return processed_data
@classmethod
def from_parser_output(cls, data):
65    @classmethod
66    def from_parser_output(cls, data):
67        pass
class RelevanceExample(InputExample):
70class RelevanceExample(InputExample):
71    """
72    Converts Relevance Labels to 0 - 1
73    """
74
75    def __init__(self, max_score=2, *args, **kwargs):
76        super().__init__(*args, **kwargs)
77        self.max_score = max_score
78
79    def get_label(self):
80        return self.relevance()
81
82    def relevance(self):
83        """
84        :return:
85            Returns a normalised score for relevance between 0 - 1
86        """
87        return self.label / self.max_score

Converts Relevance Labels to 0 - 1

RelevanceExample(max_score=2, *args, **kwargs)
75    def __init__(self, max_score=2, *args, **kwargs):
76        super().__init__(*args, **kwargs)
77        self.max_score = max_score

Creates one InputExample with the given texts, guid and label

:param guid id for the example :param texts the texts for the example. Note, str.strip() is called on the texts :param label the label for the example

def get_label(self):
79    def get_label(self):
80        return self.relevance()
def relevance(self):
82    def relevance(self):
83        """
84        :return:
85            Returns a normalised score for relevance between 0 - 1
86        """
87        return self.label / self.max_score
Returns
Returns a normalised score for relevance between 0 - 1
class DatasetTypes(enum.Enum):
90class DatasetTypes(Enum):
91    """
92    A collection of common dataset types that is usable in the library.
93    """
94    List: "List"
95    ListInputExample: "ListInputExample"
96    ListDict: "ListDict"
97    HuggingfaceDataset: "HuggingfaceDataset"

A collection of common dataset types that is usable in the library.

Inherited Members
enum.Enum
name
value