Coverage for src / check_datapackage / exclusion.py: 100%

46 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-04-30 13:13 +0000

1import re 

2from typing import Any, Optional 

3 

4from pydantic import BaseModel 

5from seedcase_soil import fmap, keep 

6 

7from check_datapackage.internals import JsonPath, _get_direct_jsonpaths 

8from check_datapackage.issue import Issue 

9 

10 

11class Exclusion(BaseModel, frozen=True): 

12 r"""A check to be excluded when checking properties. 

13 

14 When you use both `jsonpath` and `type` in the same `Exclusion` object, only issues 

15 that match *both* will be excluded, meaning it is an `AND` logic. If you want `OR` 

16 logic, use multiple `Exclusion` objects in the `Config`. 

17 

18 Attributes: 

19 jsonpath (Optional[str]): [JSON path](https://jg-rp.github.io/python-jsonpath/syntax/) 

20 to the field or fields in the input object where issues should be ignored. 

21 Uses JSON path syntax for queries, e.g., `$.resources[0].name`, to ignore 

22 issues related to that path. 

23 type (Optional[str]): The type of check to exclude (e.g., a JSON schema type 

24 such as "required", "type", "pattern", or "format", or a custom type). 

25 

26 Examples: 

27 ```{python} 

28 import check_datapackage as cdp 

29 

30 exclusion_required = cdp.Exclusion(type="required") 

31 exclusion_name = cdp.Exclusion(jsonpath="$.name") 

32 exclusion_desc_required = cdp.Exclusion( 

33 type="required", 

34 jsonpath="$.resources[*].description" 

35 ) 

36 config = cdp.Config( 

37 exclusions=[ 

38 exclusion_required, 

39 exclusion_name, 

40 exclusion_desc_required 

41 ] 

42 ) 

43 ``` 

44 """ 

45 

46 jsonpath: Optional[JsonPath] = None 

47 type: Optional[str] = None 

48 

49 

50def exclude(issues: list[Issue], exclusions: list[Exclusion]) -> list[Issue]: 

51 """Exclude issues defined by Exclusion objects.""" 

52 return keep( 

53 issues, 

54 lambda issue: not _get_any_matches(issue, exclusions), 

55 ) 

56 

57 

58def _get_any_matches(issue: Issue, exclusions: list[Exclusion]) -> bool: 

59 matches: list[bool] = fmap( 

60 exclusions, lambda exclusion: _get_matches(issue, exclusion) 

61 ) 

62 return any(matches) 

63 

64 

65def _get_matches(issue: Issue, exclusion: Exclusion) -> bool: 

66 matches: list[bool] = [] 

67 

68 both_none = exclusion.jsonpath is None and exclusion.type is None 

69 if both_none: 

70 return False 

71 

72 if exclusion.jsonpath: 

73 matches.append(_jsonpaths_match(issue, exclusion.jsonpath)) 

74 

75 if exclusion.type: 

76 matches.append(_same_type(issue, exclusion.type)) 

77 

78 return all(matches) 

79 

80 

81def _jsonpaths_match(issue: Issue, jsonpath: str) -> bool: 

82 json_object: dict[str, Any] = _get_json_object_from_jsonpath(issue.jsonpath) 

83 jsonpaths = _get_direct_jsonpaths(jsonpath, json_object) 

84 return issue.jsonpath in jsonpaths 

85 

86 

87def _same_type(issue: Issue, type: str) -> bool: 

88 return type == issue.type 

89 

90 

91def _get_json_object_from_jsonpath(jsonpath: str) -> dict[str, Any]: 

92 """Builds an object with a property at the given JSON Path location.""" 

93 path_parts = jsonpath.removeprefix("$.").split(".") 

94 return _get_object_from_path_parts(path_parts) 

95 

96 

97def _get_object_from_path_parts(path_parts: list[str]) -> dict[str, Any]: 

98 current_part = path_parts[0] 

99 next_value = {} 

100 if len(path_parts) > 1: 

101 next_value = _get_object_from_path_parts(path_parts[1:]) 

102 

103 array_parts = _get_array_parts(current_part) 

104 if array_parts: 

105 # If the current field is an array, insert the next value as the last item 

106 # in the array 

107 name, index = array_parts.groups() 

108 value: list[dict[str, Any]] = fmap(range(int(index)), lambda _: {}) 

109 return {name: value + [next_value]} 

110 

111 # If the current field is a dict, insert the next value as a property 

112 return {current_part: next_value} 

113 

114 

115def _get_array_parts(path_part: str) -> Optional[re.Match[str]]: 

116 """Extract the name and index from a JSON path part representing an array.""" 

117 return re.search(r"(\w+)\[(\d+)\]$", path_part)