Coverage for src / check_datapackage / exclusion.py: 100%

45 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-04-02 14:17 +0000

1import re 

2from typing import Any, Optional 

3 

4from pydantic import BaseModel 

5 

6from check_datapackage.internals import ( 

7 JsonPath, 

8 _filter, 

9 _get_direct_jsonpaths, 

10 _map, 

11) 

12from check_datapackage.issue import Issue 

13 

14 

15class Exclusion(BaseModel, frozen=True): 

16 r"""A check to be excluded when checking properties. 

17 

18 When you use both `jsonpath` and `type` in the same `Exclusion` object, only issues 

19 that match *both* will be excluded, meaning it is an `AND` logic. If you want `OR` 

20 logic, use multiple `Exclusion` objects in the `Config`. 

21 

22 Attributes: 

23 jsonpath (Optional[str]): [JSON path](https://jg-rp.github.io/python-jsonpath/syntax/) 

24 to the field or fields in the input object where issues should be ignored. 

25 Uses JSON path syntax for queries, e.g., `$.resources[0].name`, to ignore 

26 issues related to that path. 

27 type (Optional[str]): The type of check to exclude (e.g., a JSON schema type 

28 such as "required", "type", "pattern", or "format", or a custom type). 

29 

30 Examples: 

31 ```{python} 

32 import check_datapackage as cdp 

33 

34 exclusion_required = cdp.Exclusion(type="required") 

35 exclusion_name = cdp.Exclusion(jsonpath="$.name") 

36 exclusion_desc_required = cdp.Exclusion( 

37 type="required", 

38 jsonpath="$.resources[*].description" 

39 ) 

40 config = cdp.Config( 

41 exclusions=[ 

42 exclusion_required, 

43 exclusion_name, 

44 exclusion_desc_required 

45 ] 

46 ) 

47 ``` 

48 """ 

49 

50 jsonpath: Optional[JsonPath] = None 

51 type: Optional[str] = None 

52 

53 

54def exclude(issues: list[Issue], exclusions: list[Exclusion]) -> list[Issue]: 

55 """Exclude issues defined by Exclusion objects.""" 

56 return _filter( 

57 issues, 

58 lambda issue: not _get_any_matches(issue, exclusions), 

59 ) 

60 

61 

62def _get_any_matches(issue: Issue, exclusions: list[Exclusion]) -> bool: 

63 matches: list[bool] = _map( 

64 exclusions, lambda exclusion: _get_matches(issue, exclusion) 

65 ) 

66 return any(matches) 

67 

68 

69def _get_matches(issue: Issue, exclusion: Exclusion) -> bool: 

70 matches: list[bool] = [] 

71 

72 both_none = exclusion.jsonpath is None and exclusion.type is None 

73 if both_none: 

74 return False 

75 

76 if exclusion.jsonpath: 

77 matches.append(_jsonpaths_match(issue, exclusion.jsonpath)) 

78 

79 if exclusion.type: 

80 matches.append(_same_type(issue, exclusion.type)) 

81 

82 return all(matches) 

83 

84 

85def _jsonpaths_match(issue: Issue, jsonpath: str) -> bool: 

86 json_object: dict[str, Any] = _get_json_object_from_jsonpath(issue.jsonpath) 

87 jsonpaths = _get_direct_jsonpaths(jsonpath, json_object) 

88 return issue.jsonpath in jsonpaths 

89 

90 

91def _same_type(issue: Issue, type: str) -> bool: 

92 return type == issue.type 

93 

94 

95def _get_json_object_from_jsonpath(jsonpath: str) -> dict[str, Any]: 

96 """Builds an object with a property at the given JSON Path location.""" 

97 path_parts = jsonpath.removeprefix("$.").split(".") 

98 return _get_object_from_path_parts(path_parts) 

99 

100 

101def _get_object_from_path_parts(path_parts: list[str]) -> dict[str, Any]: 

102 current_part = path_parts[0] 

103 next_value = {} 

104 if len(path_parts) > 1: 

105 next_value = _get_object_from_path_parts(path_parts[1:]) 

106 

107 array_parts = _get_array_parts(current_part) 

108 if array_parts: 

109 # If the current field is an array, insert the next value as the last item 

110 # in the array 

111 name, index = array_parts.groups() 

112 value: list[dict[str, Any]] = _map(range(int(index)), lambda _: {}) 

113 return {name: value + [next_value]} 

114 

115 # If the current field is a dict, insert the next value as a property 

116 return {current_part: next_value} 

117 

118 

119def _get_array_parts(path_part: str) -> Optional[re.Match[str]]: 

120 """Extract the name and index from a JSON path part representing an array.""" 

121 return re.search(r"(\w+)\[(\d+)\]$", path_part)