Coverage for src / check_datapackage / extensions.py: 100%
69 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-30 13:13 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-30 13:13 +0000
1from collections.abc import Callable
2from dataclasses import dataclass
3from operator import itemgetter
4from typing import Any, Self, cast
6from jsonpath import JSONPath, compile
7from jsonpath.segments import JSONPathRecursiveDescentSegment
8from jsonpath.selectors import NameSelector
9from pydantic import BaseModel, PrivateAttr, field_validator, model_validator
10from seedcase_soil import flat_fmap, fmap, keep
12from check_datapackage.internals import (
13 JsonPath,
14 PropertyField,
15 _get_direct_jsonpaths,
16 _get_fields_at_jsonpath,
17)
18from check_datapackage.issue import MISSING, Issue
21class CustomCheck(BaseModel, frozen=True):
22 """A custom check to be done on Data Package metadata.
24 Attributes:
25 jsonpath (str): The location of the field or fields the custom check applies to,
26 expressed in [JSON path](https://jg-rp.github.io/python-jsonpath/syntax/)
27 notation (e.g., `$.resources[*].name`).
28 message (str): The message shown when the check is violated.
29 check (Callable[[Any], bool]): A function that expresses the custom check.
30 It takes the value at the `jsonpath` location as input and
31 returns true if the check is met, false if it isn't.
32 type (str): The type of the custom check (e.g., a JSON schema type such as
33 "required", "type", "pattern", or "format", or a custom type). It will be
34 shown in error messages and can be used in an `Exclusion` object to exclude
35 the check. Each custom check should have a unique `type`.
37 Examples:
38 ```{python}
39 import check_datapackage as cdp
41 license_check = cdp.CustomCheck(
42 type="only-mit",
43 jsonpath="$.licenses[*].name",
44 message="Data Packages may only be licensed under MIT.",
45 check=lambda license_name: license_name == "mit",
46 )
47 config = cdp.Config(
48 extensions=cdp.Extensions(
49 custom_checks=[license_check]
50 )
51 )
52 cdp.check(cdp.example_package_properties(), config=config)
53 ```
54 """
56 jsonpath: JsonPath
57 message: str
58 check: Callable[[Any], bool]
59 type: str = "custom"
61 @field_validator("type", mode="after")
62 @classmethod
63 def _check_not_required(cls, value: str) -> str:
64 if value == "required":
65 raise ValueError(
66 "Cannot use `CustomCheck` with `type='required'`."
67 " Use `RequiredCheck` to set properties as required instead."
68 )
69 return value
71 def apply(self, properties: dict[str, Any]) -> list[Issue]:
72 """Applies the custom check to the properties.
74 Args:
75 properties: The properties to check.
77 Returns:
78 A list of `Issue`s.
79 """
80 fields: list[PropertyField] = _get_fields_at_jsonpath(
81 self.jsonpath,
82 properties,
83 )
84 matches: list[PropertyField] = keep(
85 fields,
86 lambda field: not self.check(field.value),
87 )
88 return fmap(
89 matches,
90 lambda field: Issue(
91 jsonpath=field.jsonpath, type=self.type, message=self.message
92 ),
93 )
96@dataclass(frozen=True)
97class TargetJsonPath:
98 """A JSON path targeted by a `RequiredCheck`.
100 Attributes:
101 parent (str): The JSON path to the parent of the targeted field.
102 field (str): The name of the targeted field.
103 """
105 parent: str
106 field: str
109def _jsonpath_to_targets(jsonpath: JSONPath) -> list[TargetJsonPath]:
110 """Create a list of `TargetJsonPath`s from a `JSONPath`."""
111 # Segments are path parts, e.g., `resources`, `*`, `name` for `$.resources[*].name`
112 if not jsonpath.segments:
113 return []
115 full_path = jsonpath.segments[0].token.path
116 last_segment = jsonpath.segments[-1]
117 if isinstance(last_segment, JSONPathRecursiveDescentSegment):
118 raise ValueError(
119 f"Cannot use the JSON path `{full_path}` in `RequiredCheck`"
120 " because it ends in the recursive descent (`..`) operator."
121 )
123 # Things like field names, array indices, and/or wildcards.
124 selectors = last_segment.selectors
125 if keep(selectors, lambda selector: not isinstance(selector, NameSelector)):
126 raise ValueError(
127 f"Cannot use `RequiredCheck` for the JSON path `{full_path}`"
128 " because it doesn't end in a name selector."
129 )
131 parent = "".join(fmap(jsonpath.segments[:-1], str))
132 name_selectors = cast(tuple[NameSelector], selectors)
133 return fmap(
134 name_selectors,
135 lambda selector: TargetJsonPath(
136 parent=str(compile(parent)), field=selector.name
137 ),
138 )
141class RequiredCheck(BaseModel, frozen=True):
142 """Set a specific property as required.
144 Attributes:
145 jsonpath (str): The location of the field or fields, expressed in [JSON
146 path](https://jg-rp.github.io/python-jsonpath/syntax/) notation, to which
147 the check applies (e.g., `$.resources[*].name`).
148 message (str): The message that is shown when the check fails.
150 Examples:
151 ```{python}
152 import check_datapackage as cdp
153 required_title_check = cdp.RequiredCheck(
154 jsonpath="$.title",
155 message="A title is required.",
156 )
157 ```
158 """
160 jsonpath: JsonPath
161 message: str
162 _targets: list[TargetJsonPath] = PrivateAttr()
164 @model_validator(mode="after")
165 def _check_field_name_in_jsonpath(self) -> Self:
166 jsonpath = compile(self.jsonpath)
167 if isinstance(jsonpath, JSONPath):
168 paths = [jsonpath]
169 else:
170 first_path = cast(JSONPath, jsonpath.path)
171 paths = [first_path] + fmap(jsonpath.paths, itemgetter(1))
173 object.__setattr__(self, "_targets", flat_fmap(paths, _jsonpath_to_targets))
174 return self
176 def apply(self, properties: dict[str, Any]) -> list[Issue]:
177 """Applies the required check to the properties.
179 Args:
180 properties: The properties to check.
182 Returns:
183 A list of `Issue`s.
184 """
185 matching_paths = _get_direct_jsonpaths(self.jsonpath, properties)
186 return flat_fmap(
187 self._targets,
188 lambda target: self._target_to_issues(target, matching_paths, properties),
189 )
191 def _target_to_issues(
192 self,
193 target: TargetJsonPath,
194 matching_paths: list[str],
195 properties: dict[str, Any],
196 ) -> list[Issue]:
197 """Create a list of `Issue`s from a `TargetJsonPath`."""
198 direct_parent_paths = _get_direct_jsonpaths(target.parent, properties)
199 missing_paths = keep(
200 direct_parent_paths,
201 lambda path: f"{path}.{target.field}" not in matching_paths,
202 )
203 return fmap(
204 missing_paths,
205 lambda path: Issue(
206 jsonpath=f"{path}.{target.field}",
207 type="required",
208 message=self.message,
209 instance=MISSING,
210 ),
211 )
214class Extensions(BaseModel, frozen=True):
215 """Extensions to the standard checks.
217 This sub-item of `Config` defines extensions, i.e., additional checks
218 that supplement those specified by the Data Package standard. It
219 contains sub-items that store additional checks. This `Extensions` class
220 can be expanded to include more types of extensions.
222 Each extension class must implement its own `apply()` method that takes
223 the `datapackage.json` properties `dict` as input and outputs an `Issue`
224 list that contains the issues found by that extension.
226 Attributes:
227 required_checks: A list of `RequiredCheck` objects defining properties
228 to set as required.
229 custom_checks: A list of `CustomCheck` objects defining extra, custom
230 checks to run alongside the standard checks.
232 Examples:
233 ```{python}
234 import check_datapackage as cdp
236 extensions = cdp.Extensions(
237 required_checks=[
238 cdp.RequiredCheck(
239 jsonpath="$.description",
240 message="Data Packages must include a description.",
241 ),
242 cdp.RequiredCheck(
243 jsonpath="$.contributors[*].email",
244 message="All contributors must have an email address.",
245 ),
246 ],
247 custom_checks=[
248 cdp.CustomCheck(
249 type="only-mit",
250 jsonpath="$.licenses[*].name",
251 message="Data Packages may only be licensed under MIT.",
252 check=lambda license_name: license_name == "mit",
253 )
254 ],
255 )
256 # check(properties, config=cdp.Config(extensions=extensions))
257 ```
258 """
260 required_checks: list[RequiredCheck] = []
261 custom_checks: list[CustomCheck] = []
264def apply_extensions(
265 properties: dict[str, Any],
266 extensions: Extensions,
267) -> list[Issue]:
268 """Applies the extension checks to the properties.
270 Args:
271 properties: The properties to check.
272 extensions: The user-defined extensions to apply to the properties.
274 Returns:
275 A list of `Issue`s.
276 """
277 extensions_as_one: list[CustomCheck | RequiredCheck] = (
278 extensions.required_checks + extensions.custom_checks
279 )
280 return flat_fmap(
281 extensions_as_one,
282 lambda extension: extension.apply(properties),
283 )