Coverage for src / check_datapackage / extensions.py: 100%
68 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-02 14:17 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-02 14:17 +0000
1from collections.abc import Callable
2from dataclasses import dataclass
3from operator import itemgetter
4from typing import Any, Self, cast
6from jsonpath import JSONPath, compile
7from jsonpath.segments import JSONPathRecursiveDescentSegment
8from jsonpath.selectors import NameSelector
9from pydantic import BaseModel, PrivateAttr, field_validator, model_validator
11from check_datapackage.internals import (
12 JsonPath,
13 PropertyField,
14 _filter,
15 _flat_map,
16 _get_direct_jsonpaths,
17 _get_fields_at_jsonpath,
18 _map,
19)
20from check_datapackage.issue import Issue
23class CustomCheck(BaseModel, frozen=True):
24 """A custom check to be done on Data Package metadata.
26 Attributes:
27 jsonpath (str): The location of the field or fields the custom check applies to,
28 expressed in [JSON path](https://jg-rp.github.io/python-jsonpath/syntax/)
29 notation (e.g., `$.resources[*].name`).
30 message (str): The message shown when the check is violated.
31 check (Callable[[Any], bool]): A function that expresses the custom check.
32 It takes the value at the `jsonpath` location as input and
33 returns true if the check is met, false if it isn't.
34 type (str): The type of the custom check (e.g., a JSON schema type such as
35 "required", "type", "pattern", or "format", or a custom type). It will be
36 shown in error messages and can be used in an `Exclusion` object to exclude
37 the check. Each custom check should have a unique `type`.
39 Examples:
40 ```{python}
41 import check_datapackage as cdp
43 license_check = cdp.CustomCheck(
44 type="only-mit",
45 jsonpath="$.licenses[*].name",
46 message="Data Packages may only be licensed under MIT.",
47 check=lambda license_name: license_name == "mit",
48 )
49 config = cdp.Config(
50 extensions=cdp.Extensions(
51 custom_checks=[license_check]
52 )
53 )
54 cdp.check(cdp.example_package_properties(), config=config)
55 ```
56 """
58 jsonpath: JsonPath
59 message: str
60 check: Callable[[Any], bool]
61 type: str = "custom"
63 @field_validator("type", mode="after")
64 @classmethod
65 def _check_not_required(cls, value: str) -> str:
66 if value == "required":
67 raise ValueError(
68 "Cannot use `CustomCheck` with `type='required'`."
69 " Use `RequiredCheck` to set properties as required instead."
70 )
71 return value
73 def apply(self, properties: dict[str, Any]) -> list[Issue]:
74 """Applies the custom check to the properties.
76 Args:
77 properties: The properties to check.
79 Returns:
80 A list of `Issue`s.
81 """
82 fields: list[PropertyField] = _get_fields_at_jsonpath(
83 self.jsonpath,
84 properties,
85 )
86 matches: list[PropertyField] = _filter(
87 fields,
88 lambda field: not self.check(field.value),
89 )
90 return _map(
91 matches,
92 lambda field: Issue(
93 jsonpath=field.jsonpath, type=self.type, message=self.message
94 ),
95 )
98@dataclass(frozen=True)
99class TargetJsonPath:
100 """A JSON path targeted by a `RequiredCheck`.
102 Attributes:
103 parent (str): The JSON path to the parent of the targeted field.
104 field (str): The name of the targeted field.
105 """
107 parent: str
108 field: str
111def _jsonpath_to_targets(jsonpath: JSONPath) -> list[TargetJsonPath]:
112 """Create a list of `TargetJsonPath`s from a `JSONPath`."""
113 # Segments are path parts, e.g., `resources`, `*`, `name` for `$.resources[*].name`
114 if not jsonpath.segments:
115 return []
117 full_path = jsonpath.segments[0].token.path
118 last_segment = jsonpath.segments[-1]
119 if isinstance(last_segment, JSONPathRecursiveDescentSegment):
120 raise ValueError(
121 f"Cannot use the JSON path `{full_path}` in `RequiredCheck`"
122 " because it ends in the recursive descent (`..`) operator."
123 )
125 # Things like field names, array indices, and/or wildcards.
126 selectors = last_segment.selectors
127 if _filter(selectors, lambda selector: not isinstance(selector, NameSelector)):
128 raise ValueError(
129 f"Cannot use `RequiredCheck` for the JSON path `{full_path}`"
130 " because it doesn't end in a name selector."
131 )
133 parent = "".join(_map(jsonpath.segments[:-1], str))
134 name_selectors = cast(tuple[NameSelector], selectors)
135 return _map(
136 name_selectors,
137 lambda selector: TargetJsonPath(
138 parent=str(compile(parent)), field=selector.name
139 ),
140 )
143class RequiredCheck(BaseModel, frozen=True):
144 """Set a specific property as required.
146 Attributes:
147 jsonpath (str): The location of the field or fields, expressed in [JSON
148 path](https://jg-rp.github.io/python-jsonpath/syntax/) notation, to which
149 the check applies (e.g., `$.resources[*].name`).
150 message (str): The message that is shown when the check fails.
152 Examples:
153 ```{python}
154 import check_datapackage as cdp
155 required_title_check = cdp.RequiredCheck(
156 jsonpath="$.title",
157 message="A title is required.",
158 )
159 ```
160 """
162 jsonpath: JsonPath
163 message: str
164 _targets: list[TargetJsonPath] = PrivateAttr()
166 @model_validator(mode="after")
167 def _check_field_name_in_jsonpath(self) -> Self:
168 jsonpath = compile(self.jsonpath)
169 if isinstance(jsonpath, JSONPath):
170 paths = [jsonpath]
171 else:
172 first_path = cast(JSONPath, jsonpath.path)
173 paths = [first_path] + _map(jsonpath.paths, itemgetter(1))
175 object.__setattr__(self, "_targets", _flat_map(paths, _jsonpath_to_targets))
176 return self
178 def apply(self, properties: dict[str, Any]) -> list[Issue]:
179 """Applies the required check to the properties.
181 Args:
182 properties: The properties to check.
184 Returns:
185 A list of `Issue`s.
186 """
187 matching_paths = _get_direct_jsonpaths(self.jsonpath, properties)
188 return _flat_map(
189 self._targets,
190 lambda target: self._target_to_issues(target, matching_paths, properties),
191 )
193 def _target_to_issues(
194 self,
195 target: TargetJsonPath,
196 matching_paths: list[str],
197 properties: dict[str, Any],
198 ) -> list[Issue]:
199 """Create a list of `Issue`s from a `TargetJsonPath`."""
200 direct_parent_paths = _get_direct_jsonpaths(target.parent, properties)
201 missing_paths = _filter(
202 direct_parent_paths,
203 lambda path: f"{path}.{target.field}" not in matching_paths,
204 )
205 return _map(
206 missing_paths,
207 lambda path: Issue(
208 jsonpath=f"{path}.{target.field}",
209 type="required",
210 message=self.message,
211 ),
212 )
215class Extensions(BaseModel, frozen=True):
216 """Extensions to the standard checks.
218 This sub-item of `Config` defines extensions, i.e., additional checks
219 that supplement those specified by the Data Package standard. It
220 contains sub-items that store additional checks. This `Extensions` class
221 can be expanded to include more types of extensions.
223 Each extension class must implement its own `apply()` method that takes
224 the `datapackage.json` properties `dict` as input and outputs an `Issue`
225 list that contains the issues found by that extension.
227 Attributes:
228 required_checks: A list of `RequiredCheck` objects defining properties
229 to set as required.
230 custom_checks: A list of `CustomCheck` objects defining extra, custom
231 checks to run alongside the standard checks.
233 Examples:
234 ```{python}
235 import check_datapackage as cdp
237 extensions = cdp.Extensions(
238 required_checks=[
239 cdp.RequiredCheck(
240 jsonpath="$.description",
241 message="Data Packages must include a description.",
242 ),
243 cdp.RequiredCheck(
244 jsonpath="$.contributors[*].email",
245 message="All contributors must have an email address.",
246 ),
247 ],
248 custom_checks=[
249 cdp.CustomCheck(
250 type="only-mit",
251 jsonpath="$.licenses[*].name",
252 message="Data Packages may only be licensed under MIT.",
253 check=lambda license_name: license_name == "mit",
254 )
255 ],
256 )
257 # check(properties, config=cdp.Config(extensions=extensions))
258 ```
259 """
261 required_checks: list[RequiredCheck] = []
262 custom_checks: list[CustomCheck] = []
265def apply_extensions(
266 properties: dict[str, Any],
267 extensions: Extensions,
268) -> list[Issue]:
269 """Applies the extension checks to the properties.
271 Args:
272 properties: The properties to check.
273 extensions: The user-defined extensions to apply to the properties.
275 Returns:
276 A list of `Issue`s.
277 """
278 extensions_as_one: list[CustomCheck | RequiredCheck] = (
279 extensions.required_checks + extensions.custom_checks
280 )
281 return _flat_map(
282 extensions_as_one,
283 lambda extension: extension.apply(properties),
284 )