Coverage for src / check_datapackage / extensions.py: 100%

68 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-04-02 14:17 +0000

1from collections.abc import Callable 

2from dataclasses import dataclass 

3from operator import itemgetter 

4from typing import Any, Self, cast 

5 

6from jsonpath import JSONPath, compile 

7from jsonpath.segments import JSONPathRecursiveDescentSegment 

8from jsonpath.selectors import NameSelector 

9from pydantic import BaseModel, PrivateAttr, field_validator, model_validator 

10 

11from check_datapackage.internals import ( 

12 JsonPath, 

13 PropertyField, 

14 _filter, 

15 _flat_map, 

16 _get_direct_jsonpaths, 

17 _get_fields_at_jsonpath, 

18 _map, 

19) 

20from check_datapackage.issue import Issue 

21 

22 

23class CustomCheck(BaseModel, frozen=True): 

24 """A custom check to be done on Data Package metadata. 

25 

26 Attributes: 

27 jsonpath (str): The location of the field or fields the custom check applies to, 

28 expressed in [JSON path](https://jg-rp.github.io/python-jsonpath/syntax/) 

29 notation (e.g., `$.resources[*].name`). 

30 message (str): The message shown when the check is violated. 

31 check (Callable[[Any], bool]): A function that expresses the custom check. 

32 It takes the value at the `jsonpath` location as input and 

33 returns true if the check is met, false if it isn't. 

34 type (str): The type of the custom check (e.g., a JSON schema type such as 

35 "required", "type", "pattern", or "format", or a custom type). It will be 

36 shown in error messages and can be used in an `Exclusion` object to exclude 

37 the check. Each custom check should have a unique `type`. 

38 

39 Examples: 

40 ```{python} 

41 import check_datapackage as cdp 

42 

43 license_check = cdp.CustomCheck( 

44 type="only-mit", 

45 jsonpath="$.licenses[*].name", 

46 message="Data Packages may only be licensed under MIT.", 

47 check=lambda license_name: license_name == "mit", 

48 ) 

49 config = cdp.Config( 

50 extensions=cdp.Extensions( 

51 custom_checks=[license_check] 

52 ) 

53 ) 

54 cdp.check(cdp.example_package_properties(), config=config) 

55 ``` 

56 """ 

57 

58 jsonpath: JsonPath 

59 message: str 

60 check: Callable[[Any], bool] 

61 type: str = "custom" 

62 

63 @field_validator("type", mode="after") 

64 @classmethod 

65 def _check_not_required(cls, value: str) -> str: 

66 if value == "required": 

67 raise ValueError( 

68 "Cannot use `CustomCheck` with `type='required'`." 

69 " Use `RequiredCheck` to set properties as required instead." 

70 ) 

71 return value 

72 

73 def apply(self, properties: dict[str, Any]) -> list[Issue]: 

74 """Applies the custom check to the properties. 

75 

76 Args: 

77 properties: The properties to check. 

78 

79 Returns: 

80 A list of `Issue`s. 

81 """ 

82 fields: list[PropertyField] = _get_fields_at_jsonpath( 

83 self.jsonpath, 

84 properties, 

85 ) 

86 matches: list[PropertyField] = _filter( 

87 fields, 

88 lambda field: not self.check(field.value), 

89 ) 

90 return _map( 

91 matches, 

92 lambda field: Issue( 

93 jsonpath=field.jsonpath, type=self.type, message=self.message 

94 ), 

95 ) 

96 

97 

98@dataclass(frozen=True) 

99class TargetJsonPath: 

100 """A JSON path targeted by a `RequiredCheck`. 

101 

102 Attributes: 

103 parent (str): The JSON path to the parent of the targeted field. 

104 field (str): The name of the targeted field. 

105 """ 

106 

107 parent: str 

108 field: str 

109 

110 

111def _jsonpath_to_targets(jsonpath: JSONPath) -> list[TargetJsonPath]: 

112 """Create a list of `TargetJsonPath`s from a `JSONPath`.""" 

113 # Segments are path parts, e.g., `resources`, `*`, `name` for `$.resources[*].name` 

114 if not jsonpath.segments: 

115 return [] 

116 

117 full_path = jsonpath.segments[0].token.path 

118 last_segment = jsonpath.segments[-1] 

119 if isinstance(last_segment, JSONPathRecursiveDescentSegment): 

120 raise ValueError( 

121 f"Cannot use the JSON path `{full_path}` in `RequiredCheck`" 

122 " because it ends in the recursive descent (`..`) operator." 

123 ) 

124 

125 # Things like field names, array indices, and/or wildcards. 

126 selectors = last_segment.selectors 

127 if _filter(selectors, lambda selector: not isinstance(selector, NameSelector)): 

128 raise ValueError( 

129 f"Cannot use `RequiredCheck` for the JSON path `{full_path}`" 

130 " because it doesn't end in a name selector." 

131 ) 

132 

133 parent = "".join(_map(jsonpath.segments[:-1], str)) 

134 name_selectors = cast(tuple[NameSelector], selectors) 

135 return _map( 

136 name_selectors, 

137 lambda selector: TargetJsonPath( 

138 parent=str(compile(parent)), field=selector.name 

139 ), 

140 ) 

141 

142 

143class RequiredCheck(BaseModel, frozen=True): 

144 """Set a specific property as required. 

145 

146 Attributes: 

147 jsonpath (str): The location of the field or fields, expressed in [JSON 

148 path](https://jg-rp.github.io/python-jsonpath/syntax/) notation, to which 

149 the check applies (e.g., `$.resources[*].name`). 

150 message (str): The message that is shown when the check fails. 

151 

152 Examples: 

153 ```{python} 

154 import check_datapackage as cdp 

155 required_title_check = cdp.RequiredCheck( 

156 jsonpath="$.title", 

157 message="A title is required.", 

158 ) 

159 ``` 

160 """ 

161 

162 jsonpath: JsonPath 

163 message: str 

164 _targets: list[TargetJsonPath] = PrivateAttr() 

165 

166 @model_validator(mode="after") 

167 def _check_field_name_in_jsonpath(self) -> Self: 

168 jsonpath = compile(self.jsonpath) 

169 if isinstance(jsonpath, JSONPath): 

170 paths = [jsonpath] 

171 else: 

172 first_path = cast(JSONPath, jsonpath.path) 

173 paths = [first_path] + _map(jsonpath.paths, itemgetter(1)) 

174 

175 object.__setattr__(self, "_targets", _flat_map(paths, _jsonpath_to_targets)) 

176 return self 

177 

178 def apply(self, properties: dict[str, Any]) -> list[Issue]: 

179 """Applies the required check to the properties. 

180 

181 Args: 

182 properties: The properties to check. 

183 

184 Returns: 

185 A list of `Issue`s. 

186 """ 

187 matching_paths = _get_direct_jsonpaths(self.jsonpath, properties) 

188 return _flat_map( 

189 self._targets, 

190 lambda target: self._target_to_issues(target, matching_paths, properties), 

191 ) 

192 

193 def _target_to_issues( 

194 self, 

195 target: TargetJsonPath, 

196 matching_paths: list[str], 

197 properties: dict[str, Any], 

198 ) -> list[Issue]: 

199 """Create a list of `Issue`s from a `TargetJsonPath`.""" 

200 direct_parent_paths = _get_direct_jsonpaths(target.parent, properties) 

201 missing_paths = _filter( 

202 direct_parent_paths, 

203 lambda path: f"{path}.{target.field}" not in matching_paths, 

204 ) 

205 return _map( 

206 missing_paths, 

207 lambda path: Issue( 

208 jsonpath=f"{path}.{target.field}", 

209 type="required", 

210 message=self.message, 

211 ), 

212 ) 

213 

214 

215class Extensions(BaseModel, frozen=True): 

216 """Extensions to the standard checks. 

217 

218 This sub-item of `Config` defines extensions, i.e., additional checks 

219 that supplement those specified by the Data Package standard. It 

220 contains sub-items that store additional checks. This `Extensions` class 

221 can be expanded to include more types of extensions. 

222 

223 Each extension class must implement its own `apply()` method that takes 

224 the `datapackage.json` properties `dict` as input and outputs an `Issue` 

225 list that contains the issues found by that extension. 

226 

227 Attributes: 

228 required_checks: A list of `RequiredCheck` objects defining properties 

229 to set as required. 

230 custom_checks: A list of `CustomCheck` objects defining extra, custom 

231 checks to run alongside the standard checks. 

232 

233 Examples: 

234 ```{python} 

235 import check_datapackage as cdp 

236 

237 extensions = cdp.Extensions( 

238 required_checks=[ 

239 cdp.RequiredCheck( 

240 jsonpath="$.description", 

241 message="Data Packages must include a description.", 

242 ), 

243 cdp.RequiredCheck( 

244 jsonpath="$.contributors[*].email", 

245 message="All contributors must have an email address.", 

246 ), 

247 ], 

248 custom_checks=[ 

249 cdp.CustomCheck( 

250 type="only-mit", 

251 jsonpath="$.licenses[*].name", 

252 message="Data Packages may only be licensed under MIT.", 

253 check=lambda license_name: license_name == "mit", 

254 ) 

255 ], 

256 ) 

257 # check(properties, config=cdp.Config(extensions=extensions)) 

258 ``` 

259 """ 

260 

261 required_checks: list[RequiredCheck] = [] 

262 custom_checks: list[CustomCheck] = [] 

263 

264 

265def apply_extensions( 

266 properties: dict[str, Any], 

267 extensions: Extensions, 

268) -> list[Issue]: 

269 """Applies the extension checks to the properties. 

270 

271 Args: 

272 properties: The properties to check. 

273 extensions: The user-defined extensions to apply to the properties. 

274 

275 Returns: 

276 A list of `Issue`s. 

277 """ 

278 extensions_as_one: list[CustomCheck | RequiredCheck] = ( 

279 extensions.required_checks + extensions.custom_checks 

280 ) 

281 return _flat_map( 

282 extensions_as_one, 

283 lambda extension: extension.apply(properties), 

284 )