Coverage for src / check_datapackage / extensions.py: 100%
76 statements
« prev ^ index » next coverage.py v7.14.0, created at 2026-05-21 12:38 +0000
« prev ^ index » next coverage.py v7.14.0, created at 2026-05-21 12:38 +0000
1from collections.abc import Callable
2from dataclasses import dataclass
3from operator import itemgetter
4from typing import Any, Self, cast
6from jsonpath import JSONPath, compile
7from jsonpath.segments import JSONPathRecursiveDescentSegment
8from jsonpath.selectors import NameSelector
9from pydantic import BaseModel, PrivateAttr, field_validator, model_validator
10from seedcase_soil import flat_fmap, fmap, keep
12from check_datapackage.internals import (
13 JsonPath,
14 PropertyField,
15 _get_direct_jsonpaths,
16 _get_fields_at_jsonpath,
17)
18from check_datapackage.issue import MISSING, Issue
20CUSTOM_CHECKS_CONFIG_ERROR = (
21 "Custom checks cannot be configured in TOML because `check` must be "
22 "a Python callable. Define CustomCheck extensions in Python instead."
23)
26class CustomCheck(BaseModel, frozen=True):
27 """A custom check to be done on Data Package metadata.
29 Attributes:
30 jsonpath (str): The location of the field or fields the custom check applies to,
31 expressed in [JSON path](https://jg-rp.github.io/python-jsonpath/syntax/)
32 notation (e.g., `$.resources[*].name`).
33 message (str): The message shown when the check is violated.
34 check (Callable[[Any], bool]): A function that expresses the custom check.
35 It takes the value at the `jsonpath` location as input and
36 returns true if the check is met, false if it isn't.
37 type (str): The type of the custom check (e.g., a JSON schema type such as
38 "required", "type", "pattern", or "format", or a custom type). It will be
39 shown in error messages and can be used in an `Exclusion` object to exclude
40 the check. Each custom check should have a unique `type`.
42 Examples:
43 ```{python}
44 import check_datapackage as cdp
46 license_check = cdp.CustomCheck(
47 type="only-mit",
48 jsonpath="$.licenses[*].name",
49 message="Data Packages may only be licensed under MIT.",
50 check=lambda license_name: license_name == "mit",
51 )
52 config = cdp.Config(
53 extensions=cdp.Extensions(
54 custom_checks=[license_check]
55 )
56 )
57 cdp.check(cdp.example_package_properties(), config=config)
58 ```
59 """
61 jsonpath: JsonPath
62 message: str
63 check: Callable[[Any], bool]
64 type: str = "custom"
66 @field_validator("type", mode="after")
67 @classmethod
68 def _check_not_required(cls, value: str) -> str:
69 if value == "required":
70 raise ValueError(
71 "Cannot use `CustomCheck` with `type='required'`."
72 " Use `RequiredCheck` to set properties as required instead."
73 )
74 return value
76 def apply(self, properties: dict[str, Any]) -> list[Issue]:
77 """Applies the custom check to the properties.
79 Args:
80 properties: The properties to check.
82 Returns:
83 A list of `Issue`s.
84 """
85 fields: list[PropertyField] = _get_fields_at_jsonpath(
86 self.jsonpath,
87 properties,
88 )
89 matches: list[PropertyField] = keep(
90 fields,
91 lambda field: not self.check(field.value),
92 )
93 return fmap(
94 matches,
95 lambda field: Issue(
96 jsonpath=field.jsonpath, type=self.type, message=self.message
97 ),
98 )
101@dataclass(frozen=True)
102class TargetJsonPath:
103 """A JSON path targeted by a `RequiredCheck`.
105 Attributes:
106 parent (str): The JSON path to the parent of the targeted field.
107 field (str): The name of the targeted field.
108 """
110 parent: str
111 field: str
114def _jsonpath_to_targets(jsonpath: JSONPath) -> list[TargetJsonPath]:
115 """Create a list of `TargetJsonPath`s from a `JSONPath`."""
116 # Segments are path parts, e.g., `resources`, `*`, `name` for `$.resources[*].name`
117 if not jsonpath.segments:
118 return []
120 full_path = jsonpath.segments[0].token.path
121 last_segment = jsonpath.segments[-1]
122 if isinstance(last_segment, JSONPathRecursiveDescentSegment):
123 raise ValueError(
124 f"Cannot use the JSON path `{full_path}` in `RequiredCheck`"
125 " because it ends in the recursive descent (`..`) operator."
126 )
128 # Things like field names, array indices, and/or wildcards.
129 selectors = last_segment.selectors
130 if keep(selectors, lambda selector: not isinstance(selector, NameSelector)):
131 raise ValueError(
132 f"Cannot use `RequiredCheck` for the JSON path `{full_path}`"
133 " because it doesn't end in a name selector."
134 )
136 parent = "".join(fmap(jsonpath.segments[:-1], str))
137 name_selectors = cast(tuple[NameSelector], selectors)
138 return fmap(
139 name_selectors,
140 lambda selector: TargetJsonPath(
141 parent=str(compile(parent)), field=selector.name
142 ),
143 )
146class RequiredCheck(BaseModel, frozen=True):
147 """Set a specific property as required.
149 Attributes:
150 jsonpath (str): The location of the field or fields, expressed in [JSON
151 path](https://jg-rp.github.io/python-jsonpath/syntax/) notation, to which
152 the check applies (e.g., `$.resources[*].name`).
153 message (str): The message that is shown when the check fails.
155 Examples:
156 ```{python}
157 import check_datapackage as cdp
158 required_title_check = cdp.RequiredCheck(
159 jsonpath="$.title",
160 message="A title is required.",
161 )
162 ```
163 """
165 jsonpath: JsonPath
166 message: str
167 _targets: list[TargetJsonPath] = PrivateAttr()
169 @model_validator(mode="after")
170 def _check_field_name_in_jsonpath(self) -> Self:
171 jsonpath = compile(self.jsonpath)
172 if isinstance(jsonpath, JSONPath):
173 paths = [jsonpath]
174 else:
175 first_path = cast(JSONPath, jsonpath.path)
176 paths = [first_path] + fmap(jsonpath.paths, itemgetter(1))
178 object.__setattr__(self, "_targets", flat_fmap(paths, _jsonpath_to_targets))
179 return self
181 def apply(self, properties: dict[str, Any]) -> list[Issue]:
182 """Applies the required check to the properties.
184 Args:
185 properties: The properties to check.
187 Returns:
188 A list of `Issue`s.
189 """
190 matching_paths = _get_direct_jsonpaths(self.jsonpath, properties)
191 return flat_fmap(
192 self._targets,
193 lambda target: self._target_to_issues(target, matching_paths, properties),
194 )
196 def _target_to_issues(
197 self,
198 target: TargetJsonPath,
199 matching_paths: list[str],
200 properties: dict[str, Any],
201 ) -> list[Issue]:
202 """Create a list of `Issue`s from a `TargetJsonPath`."""
203 direct_parent_paths = _get_direct_jsonpaths(target.parent, properties)
204 missing_paths = keep(
205 direct_parent_paths,
206 lambda path: f"{path}.{target.field}" not in matching_paths,
207 )
208 return fmap(
209 missing_paths,
210 lambda path: Issue(
211 jsonpath=f"{path}.{target.field}",
212 type="required",
213 message=self.message,
214 instance=MISSING,
215 ),
216 )
219class Extensions(BaseModel, frozen=True):
220 """Extensions to the standard checks.
222 This sub-item of `Config` defines extensions, i.e., additional checks
223 that supplement those specified by the Data Package standard. It
224 contains sub-items that store additional checks. This `Extensions` class
225 can be expanded to include more types of extensions.
227 Each extension class must implement its own `apply()` method that takes
228 the `datapackage.json` properties `dict` as input and outputs an `Issue`
229 list that contains the issues found by that extension.
231 Attributes:
232 required_checks: A list of `RequiredCheck` objects defining properties
233 to set as required.
234 custom_checks: A list of `CustomCheck` objects defining extra, custom
235 checks to run alongside the standard checks.
237 Examples:
238 ```{python}
239 import check_datapackage as cdp
241 extensions = cdp.Extensions(
242 required_checks=[
243 cdp.RequiredCheck(
244 jsonpath="$.description",
245 message="Data Packages must include a description.",
246 ),
247 cdp.RequiredCheck(
248 jsonpath="$.contributors[*].email",
249 message="All contributors must have an email address.",
250 ),
251 ],
252 custom_checks=[
253 cdp.CustomCheck(
254 type="only-mit",
255 jsonpath="$.licenses[*].name",
256 message="Data Packages may only be licensed under MIT.",
257 check=lambda license_name: license_name == "mit",
258 )
259 ],
260 )
261 # check(properties, config=cdp.Config(extensions=extensions))
262 ```
263 """
265 required_checks: list[RequiredCheck] = []
266 custom_checks: list[CustomCheck] = []
268 @field_validator("custom_checks", mode="before")
269 @classmethod
270 def _reject_config_custom_checks(cls, value: Any) -> Any:
271 if isinstance(value, list) and any(isinstance(item, dict) for item in value):
272 raise ValueError(CUSTOM_CHECKS_CONFIG_ERROR)
273 return value
276def apply_extensions(
277 properties: dict[str, Any],
278 extensions: Extensions,
279) -> list[Issue]:
280 """Applies the extension checks to the properties.
282 Args:
283 properties: The properties to check.
284 extensions: The user-defined extensions to apply to the properties.
286 Returns:
287 A list of `Issue`s.
288 """
289 extensions_as_one: list[CustomCheck | RequiredCheck] = (
290 extensions.required_checks + extensions.custom_checks
291 )
292 return flat_fmap(
293 extensions_as_one,
294 lambda extension: extension.apply(properties),
295 )