Coverage for src / check_datapackage / extensions.py: 100%

76 statements  

« prev     ^ index     » next       coverage.py v7.14.0, created at 2026-05-21 12:38 +0000

1from collections.abc import Callable 

2from dataclasses import dataclass 

3from operator import itemgetter 

4from typing import Any, Self, cast 

5 

6from jsonpath import JSONPath, compile 

7from jsonpath.segments import JSONPathRecursiveDescentSegment 

8from jsonpath.selectors import NameSelector 

9from pydantic import BaseModel, PrivateAttr, field_validator, model_validator 

10from seedcase_soil import flat_fmap, fmap, keep 

11 

12from check_datapackage.internals import ( 

13 JsonPath, 

14 PropertyField, 

15 _get_direct_jsonpaths, 

16 _get_fields_at_jsonpath, 

17) 

18from check_datapackage.issue import MISSING, Issue 

19 

20CUSTOM_CHECKS_CONFIG_ERROR = ( 

21 "Custom checks cannot be configured in TOML because `check` must be " 

22 "a Python callable. Define CustomCheck extensions in Python instead." 

23) 

24 

25 

26class CustomCheck(BaseModel, frozen=True): 

27 """A custom check to be done on Data Package metadata. 

28 

29 Attributes: 

30 jsonpath (str): The location of the field or fields the custom check applies to, 

31 expressed in [JSON path](https://jg-rp.github.io/python-jsonpath/syntax/) 

32 notation (e.g., `$.resources[*].name`). 

33 message (str): The message shown when the check is violated. 

34 check (Callable[[Any], bool]): A function that expresses the custom check. 

35 It takes the value at the `jsonpath` location as input and 

36 returns true if the check is met, false if it isn't. 

37 type (str): The type of the custom check (e.g., a JSON schema type such as 

38 "required", "type", "pattern", or "format", or a custom type). It will be 

39 shown in error messages and can be used in an `Exclusion` object to exclude 

40 the check. Each custom check should have a unique `type`. 

41 

42 Examples: 

43 ```{python} 

44 import check_datapackage as cdp 

45 

46 license_check = cdp.CustomCheck( 

47 type="only-mit", 

48 jsonpath="$.licenses[*].name", 

49 message="Data Packages may only be licensed under MIT.", 

50 check=lambda license_name: license_name == "mit", 

51 ) 

52 config = cdp.Config( 

53 extensions=cdp.Extensions( 

54 custom_checks=[license_check] 

55 ) 

56 ) 

57 cdp.check(cdp.example_package_properties(), config=config) 

58 ``` 

59 """ 

60 

61 jsonpath: JsonPath 

62 message: str 

63 check: Callable[[Any], bool] 

64 type: str = "custom" 

65 

66 @field_validator("type", mode="after") 

67 @classmethod 

68 def _check_not_required(cls, value: str) -> str: 

69 if value == "required": 

70 raise ValueError( 

71 "Cannot use `CustomCheck` with `type='required'`." 

72 " Use `RequiredCheck` to set properties as required instead." 

73 ) 

74 return value 

75 

76 def apply(self, properties: dict[str, Any]) -> list[Issue]: 

77 """Applies the custom check to the properties. 

78 

79 Args: 

80 properties: The properties to check. 

81 

82 Returns: 

83 A list of `Issue`s. 

84 """ 

85 fields: list[PropertyField] = _get_fields_at_jsonpath( 

86 self.jsonpath, 

87 properties, 

88 ) 

89 matches: list[PropertyField] = keep( 

90 fields, 

91 lambda field: not self.check(field.value), 

92 ) 

93 return fmap( 

94 matches, 

95 lambda field: Issue( 

96 jsonpath=field.jsonpath, type=self.type, message=self.message 

97 ), 

98 ) 

99 

100 

101@dataclass(frozen=True) 

102class TargetJsonPath: 

103 """A JSON path targeted by a `RequiredCheck`. 

104 

105 Attributes: 

106 parent (str): The JSON path to the parent of the targeted field. 

107 field (str): The name of the targeted field. 

108 """ 

109 

110 parent: str 

111 field: str 

112 

113 

114def _jsonpath_to_targets(jsonpath: JSONPath) -> list[TargetJsonPath]: 

115 """Create a list of `TargetJsonPath`s from a `JSONPath`.""" 

116 # Segments are path parts, e.g., `resources`, `*`, `name` for `$.resources[*].name` 

117 if not jsonpath.segments: 

118 return [] 

119 

120 full_path = jsonpath.segments[0].token.path 

121 last_segment = jsonpath.segments[-1] 

122 if isinstance(last_segment, JSONPathRecursiveDescentSegment): 

123 raise ValueError( 

124 f"Cannot use the JSON path `{full_path}` in `RequiredCheck`" 

125 " because it ends in the recursive descent (`..`) operator." 

126 ) 

127 

128 # Things like field names, array indices, and/or wildcards. 

129 selectors = last_segment.selectors 

130 if keep(selectors, lambda selector: not isinstance(selector, NameSelector)): 

131 raise ValueError( 

132 f"Cannot use `RequiredCheck` for the JSON path `{full_path}`" 

133 " because it doesn't end in a name selector." 

134 ) 

135 

136 parent = "".join(fmap(jsonpath.segments[:-1], str)) 

137 name_selectors = cast(tuple[NameSelector], selectors) 

138 return fmap( 

139 name_selectors, 

140 lambda selector: TargetJsonPath( 

141 parent=str(compile(parent)), field=selector.name 

142 ), 

143 ) 

144 

145 

146class RequiredCheck(BaseModel, frozen=True): 

147 """Set a specific property as required. 

148 

149 Attributes: 

150 jsonpath (str): The location of the field or fields, expressed in [JSON 

151 path](https://jg-rp.github.io/python-jsonpath/syntax/) notation, to which 

152 the check applies (e.g., `$.resources[*].name`). 

153 message (str): The message that is shown when the check fails. 

154 

155 Examples: 

156 ```{python} 

157 import check_datapackage as cdp 

158 required_title_check = cdp.RequiredCheck( 

159 jsonpath="$.title", 

160 message="A title is required.", 

161 ) 

162 ``` 

163 """ 

164 

165 jsonpath: JsonPath 

166 message: str 

167 _targets: list[TargetJsonPath] = PrivateAttr() 

168 

169 @model_validator(mode="after") 

170 def _check_field_name_in_jsonpath(self) -> Self: 

171 jsonpath = compile(self.jsonpath) 

172 if isinstance(jsonpath, JSONPath): 

173 paths = [jsonpath] 

174 else: 

175 first_path = cast(JSONPath, jsonpath.path) 

176 paths = [first_path] + fmap(jsonpath.paths, itemgetter(1)) 

177 

178 object.__setattr__(self, "_targets", flat_fmap(paths, _jsonpath_to_targets)) 

179 return self 

180 

181 def apply(self, properties: dict[str, Any]) -> list[Issue]: 

182 """Applies the required check to the properties. 

183 

184 Args: 

185 properties: The properties to check. 

186 

187 Returns: 

188 A list of `Issue`s. 

189 """ 

190 matching_paths = _get_direct_jsonpaths(self.jsonpath, properties) 

191 return flat_fmap( 

192 self._targets, 

193 lambda target: self._target_to_issues(target, matching_paths, properties), 

194 ) 

195 

196 def _target_to_issues( 

197 self, 

198 target: TargetJsonPath, 

199 matching_paths: list[str], 

200 properties: dict[str, Any], 

201 ) -> list[Issue]: 

202 """Create a list of `Issue`s from a `TargetJsonPath`.""" 

203 direct_parent_paths = _get_direct_jsonpaths(target.parent, properties) 

204 missing_paths = keep( 

205 direct_parent_paths, 

206 lambda path: f"{path}.{target.field}" not in matching_paths, 

207 ) 

208 return fmap( 

209 missing_paths, 

210 lambda path: Issue( 

211 jsonpath=f"{path}.{target.field}", 

212 type="required", 

213 message=self.message, 

214 instance=MISSING, 

215 ), 

216 ) 

217 

218 

219class Extensions(BaseModel, frozen=True): 

220 """Extensions to the standard checks. 

221 

222 This sub-item of `Config` defines extensions, i.e., additional checks 

223 that supplement those specified by the Data Package standard. It 

224 contains sub-items that store additional checks. This `Extensions` class 

225 can be expanded to include more types of extensions. 

226 

227 Each extension class must implement its own `apply()` method that takes 

228 the `datapackage.json` properties `dict` as input and outputs an `Issue` 

229 list that contains the issues found by that extension. 

230 

231 Attributes: 

232 required_checks: A list of `RequiredCheck` objects defining properties 

233 to set as required. 

234 custom_checks: A list of `CustomCheck` objects defining extra, custom 

235 checks to run alongside the standard checks. 

236 

237 Examples: 

238 ```{python} 

239 import check_datapackage as cdp 

240 

241 extensions = cdp.Extensions( 

242 required_checks=[ 

243 cdp.RequiredCheck( 

244 jsonpath="$.description", 

245 message="Data Packages must include a description.", 

246 ), 

247 cdp.RequiredCheck( 

248 jsonpath="$.contributors[*].email", 

249 message="All contributors must have an email address.", 

250 ), 

251 ], 

252 custom_checks=[ 

253 cdp.CustomCheck( 

254 type="only-mit", 

255 jsonpath="$.licenses[*].name", 

256 message="Data Packages may only be licensed under MIT.", 

257 check=lambda license_name: license_name == "mit", 

258 ) 

259 ], 

260 ) 

261 # check(properties, config=cdp.Config(extensions=extensions)) 

262 ``` 

263 """ 

264 

265 required_checks: list[RequiredCheck] = [] 

266 custom_checks: list[CustomCheck] = [] 

267 

268 @field_validator("custom_checks", mode="before") 

269 @classmethod 

270 def _reject_config_custom_checks(cls, value: Any) -> Any: 

271 if isinstance(value, list) and any(isinstance(item, dict) for item in value): 

272 raise ValueError(CUSTOM_CHECKS_CONFIG_ERROR) 

273 return value 

274 

275 

276def apply_extensions( 

277 properties: dict[str, Any], 

278 extensions: Extensions, 

279) -> list[Issue]: 

280 """Applies the extension checks to the properties. 

281 

282 Args: 

283 properties: The properties to check. 

284 extensions: The user-defined extensions to apply to the properties. 

285 

286 Returns: 

287 A list of `Issue`s. 

288 """ 

289 extensions_as_one: list[CustomCheck | RequiredCheck] = ( 

290 extensions.required_checks + extensions.custom_checks 

291 ) 

292 return flat_fmap( 

293 extensions_as_one, 

294 lambda extension: extension.apply(properties), 

295 )