Coverage for src / check_datapackage / extensions.py: 100%

69 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-04-30 13:13 +0000

1from collections.abc import Callable 

2from dataclasses import dataclass 

3from operator import itemgetter 

4from typing import Any, Self, cast 

5 

6from jsonpath import JSONPath, compile 

7from jsonpath.segments import JSONPathRecursiveDescentSegment 

8from jsonpath.selectors import NameSelector 

9from pydantic import BaseModel, PrivateAttr, field_validator, model_validator 

10from seedcase_soil import flat_fmap, fmap, keep 

11 

12from check_datapackage.internals import ( 

13 JsonPath, 

14 PropertyField, 

15 _get_direct_jsonpaths, 

16 _get_fields_at_jsonpath, 

17) 

18from check_datapackage.issue import MISSING, Issue 

19 

20 

21class CustomCheck(BaseModel, frozen=True): 

22 """A custom check to be done on Data Package metadata. 

23 

24 Attributes: 

25 jsonpath (str): The location of the field or fields the custom check applies to, 

26 expressed in [JSON path](https://jg-rp.github.io/python-jsonpath/syntax/) 

27 notation (e.g., `$.resources[*].name`). 

28 message (str): The message shown when the check is violated. 

29 check (Callable[[Any], bool]): A function that expresses the custom check. 

30 It takes the value at the `jsonpath` location as input and 

31 returns true if the check is met, false if it isn't. 

32 type (str): The type of the custom check (e.g., a JSON schema type such as 

33 "required", "type", "pattern", or "format", or a custom type). It will be 

34 shown in error messages and can be used in an `Exclusion` object to exclude 

35 the check. Each custom check should have a unique `type`. 

36 

37 Examples: 

38 ```{python} 

39 import check_datapackage as cdp 

40 

41 license_check = cdp.CustomCheck( 

42 type="only-mit", 

43 jsonpath="$.licenses[*].name", 

44 message="Data Packages may only be licensed under MIT.", 

45 check=lambda license_name: license_name == "mit", 

46 ) 

47 config = cdp.Config( 

48 extensions=cdp.Extensions( 

49 custom_checks=[license_check] 

50 ) 

51 ) 

52 cdp.check(cdp.example_package_properties(), config=config) 

53 ``` 

54 """ 

55 

56 jsonpath: JsonPath 

57 message: str 

58 check: Callable[[Any], bool] 

59 type: str = "custom" 

60 

61 @field_validator("type", mode="after") 

62 @classmethod 

63 def _check_not_required(cls, value: str) -> str: 

64 if value == "required": 

65 raise ValueError( 

66 "Cannot use `CustomCheck` with `type='required'`." 

67 " Use `RequiredCheck` to set properties as required instead." 

68 ) 

69 return value 

70 

71 def apply(self, properties: dict[str, Any]) -> list[Issue]: 

72 """Applies the custom check to the properties. 

73 

74 Args: 

75 properties: The properties to check. 

76 

77 Returns: 

78 A list of `Issue`s. 

79 """ 

80 fields: list[PropertyField] = _get_fields_at_jsonpath( 

81 self.jsonpath, 

82 properties, 

83 ) 

84 matches: list[PropertyField] = keep( 

85 fields, 

86 lambda field: not self.check(field.value), 

87 ) 

88 return fmap( 

89 matches, 

90 lambda field: Issue( 

91 jsonpath=field.jsonpath, type=self.type, message=self.message 

92 ), 

93 ) 

94 

95 

96@dataclass(frozen=True) 

97class TargetJsonPath: 

98 """A JSON path targeted by a `RequiredCheck`. 

99 

100 Attributes: 

101 parent (str): The JSON path to the parent of the targeted field. 

102 field (str): The name of the targeted field. 

103 """ 

104 

105 parent: str 

106 field: str 

107 

108 

109def _jsonpath_to_targets(jsonpath: JSONPath) -> list[TargetJsonPath]: 

110 """Create a list of `TargetJsonPath`s from a `JSONPath`.""" 

111 # Segments are path parts, e.g., `resources`, `*`, `name` for `$.resources[*].name` 

112 if not jsonpath.segments: 

113 return [] 

114 

115 full_path = jsonpath.segments[0].token.path 

116 last_segment = jsonpath.segments[-1] 

117 if isinstance(last_segment, JSONPathRecursiveDescentSegment): 

118 raise ValueError( 

119 f"Cannot use the JSON path `{full_path}` in `RequiredCheck`" 

120 " because it ends in the recursive descent (`..`) operator." 

121 ) 

122 

123 # Things like field names, array indices, and/or wildcards. 

124 selectors = last_segment.selectors 

125 if keep(selectors, lambda selector: not isinstance(selector, NameSelector)): 

126 raise ValueError( 

127 f"Cannot use `RequiredCheck` for the JSON path `{full_path}`" 

128 " because it doesn't end in a name selector." 

129 ) 

130 

131 parent = "".join(fmap(jsonpath.segments[:-1], str)) 

132 name_selectors = cast(tuple[NameSelector], selectors) 

133 return fmap( 

134 name_selectors, 

135 lambda selector: TargetJsonPath( 

136 parent=str(compile(parent)), field=selector.name 

137 ), 

138 ) 

139 

140 

141class RequiredCheck(BaseModel, frozen=True): 

142 """Set a specific property as required. 

143 

144 Attributes: 

145 jsonpath (str): The location of the field or fields, expressed in [JSON 

146 path](https://jg-rp.github.io/python-jsonpath/syntax/) notation, to which 

147 the check applies (e.g., `$.resources[*].name`). 

148 message (str): The message that is shown when the check fails. 

149 

150 Examples: 

151 ```{python} 

152 import check_datapackage as cdp 

153 required_title_check = cdp.RequiredCheck( 

154 jsonpath="$.title", 

155 message="A title is required.", 

156 ) 

157 ``` 

158 """ 

159 

160 jsonpath: JsonPath 

161 message: str 

162 _targets: list[TargetJsonPath] = PrivateAttr() 

163 

164 @model_validator(mode="after") 

165 def _check_field_name_in_jsonpath(self) -> Self: 

166 jsonpath = compile(self.jsonpath) 

167 if isinstance(jsonpath, JSONPath): 

168 paths = [jsonpath] 

169 else: 

170 first_path = cast(JSONPath, jsonpath.path) 

171 paths = [first_path] + fmap(jsonpath.paths, itemgetter(1)) 

172 

173 object.__setattr__(self, "_targets", flat_fmap(paths, _jsonpath_to_targets)) 

174 return self 

175 

176 def apply(self, properties: dict[str, Any]) -> list[Issue]: 

177 """Applies the required check to the properties. 

178 

179 Args: 

180 properties: The properties to check. 

181 

182 Returns: 

183 A list of `Issue`s. 

184 """ 

185 matching_paths = _get_direct_jsonpaths(self.jsonpath, properties) 

186 return flat_fmap( 

187 self._targets, 

188 lambda target: self._target_to_issues(target, matching_paths, properties), 

189 ) 

190 

191 def _target_to_issues( 

192 self, 

193 target: TargetJsonPath, 

194 matching_paths: list[str], 

195 properties: dict[str, Any], 

196 ) -> list[Issue]: 

197 """Create a list of `Issue`s from a `TargetJsonPath`.""" 

198 direct_parent_paths = _get_direct_jsonpaths(target.parent, properties) 

199 missing_paths = keep( 

200 direct_parent_paths, 

201 lambda path: f"{path}.{target.field}" not in matching_paths, 

202 ) 

203 return fmap( 

204 missing_paths, 

205 lambda path: Issue( 

206 jsonpath=f"{path}.{target.field}", 

207 type="required", 

208 message=self.message, 

209 instance=MISSING, 

210 ), 

211 ) 

212 

213 

214class Extensions(BaseModel, frozen=True): 

215 """Extensions to the standard checks. 

216 

217 This sub-item of `Config` defines extensions, i.e., additional checks 

218 that supplement those specified by the Data Package standard. It 

219 contains sub-items that store additional checks. This `Extensions` class 

220 can be expanded to include more types of extensions. 

221 

222 Each extension class must implement its own `apply()` method that takes 

223 the `datapackage.json` properties `dict` as input and outputs an `Issue` 

224 list that contains the issues found by that extension. 

225 

226 Attributes: 

227 required_checks: A list of `RequiredCheck` objects defining properties 

228 to set as required. 

229 custom_checks: A list of `CustomCheck` objects defining extra, custom 

230 checks to run alongside the standard checks. 

231 

232 Examples: 

233 ```{python} 

234 import check_datapackage as cdp 

235 

236 extensions = cdp.Extensions( 

237 required_checks=[ 

238 cdp.RequiredCheck( 

239 jsonpath="$.description", 

240 message="Data Packages must include a description.", 

241 ), 

242 cdp.RequiredCheck( 

243 jsonpath="$.contributors[*].email", 

244 message="All contributors must have an email address.", 

245 ), 

246 ], 

247 custom_checks=[ 

248 cdp.CustomCheck( 

249 type="only-mit", 

250 jsonpath="$.licenses[*].name", 

251 message="Data Packages may only be licensed under MIT.", 

252 check=lambda license_name: license_name == "mit", 

253 ) 

254 ], 

255 ) 

256 # check(properties, config=cdp.Config(extensions=extensions)) 

257 ``` 

258 """ 

259 

260 required_checks: list[RequiredCheck] = [] 

261 custom_checks: list[CustomCheck] = [] 

262 

263 

264def apply_extensions( 

265 properties: dict[str, Any], 

266 extensions: Extensions, 

267) -> list[Issue]: 

268 """Applies the extension checks to the properties. 

269 

270 Args: 

271 properties: The properties to check. 

272 extensions: The user-defined extensions to apply to the properties. 

273 

274 Returns: 

275 A list of `Issue`s. 

276 """ 

277 extensions_as_one: list[CustomCheck | RequiredCheck] = ( 

278 extensions.required_checks + extensions.custom_checks 

279 ) 

280 return flat_fmap( 

281 extensions_as_one, 

282 lambda extension: extension.apply(properties), 

283 )