Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1#!/usr/local/bin/python 

2# encoding: utf-8 

3""" 

4*Perform a conesearch on a database table with existing HTMId columns pre-populated* 

5 

6:Author: 

7 David Young 

8""" 

9from __future__ import print_function 

10from __future__ import division 

11from future import standard_library 

12standard_library.install_aliases() 

13from builtins import zip 

14from builtins import str 

15from builtins import map 

16from builtins import object 

17from past.utils import old_div 

18import sys 

19import os 

20import re 

21os.environ['TERM'] = 'vt100' 

22from fundamentals import tools 

23from HMpTy.htm import HTM 

24import numpy as np 

25from fundamentals.mysql import readquery 

26from io import StringIO 

27import copy 

28 

29 

30class conesearch(object): 

31 """ 

32 *The worker class for the conesearch module* 

33 

34 **Key Arguments** 

35 

36 - ``log`` -- logger 

37 - ``dbConn`` -- a database connection 

38 - ``tableName`` -- the name of the database table to perform the conesearch on. 

39 - ``columns`` -- the columns requested from the database table 

40 - ``ra`` -- the right ascension of the conesearch centre, can be single value or list of values 

41 - ``dec`` -- the declination of the conesearch centre, can be single value or list of values 

42 - ``radiusArcsec`` -- radius of the conesearch to be performed in arcsecs 

43 - ``sqlWhere`` -- clause to add after "where" in the initial sql query of the conesearch. Default *False* 

44 - ``raCol`` -- the database table ra column name. Default * raDeg* 

45 - ``decCol`` -- the database table dec column name. Default *decDeg* 

46 

47 

48 - ``separations`` -- include the separations in the final output. Default *False* 

49 - ``distinct`` -- request distinct columns from the database table (i.e. *select DISTINCT ...*). Default *False* 

50 - ``closest`` -- return the closest match only. Default *False* 

51 

52 **Usage** 

53 

54 Say we have 5 locations we wish to search a database table called *transientBucket* to see if it contains sources at those locations. Add the coordinates to those locations to RA and DEC lists like so: 

55 

56 ```python 

57 raList = ["23:25:53.56", "02:10:08.16", 

58 "13:20:00.00", 1.47329, 35.34279] 

59 decList = ["+26:54:23.9", "-48:38:24.3", 

60 "+24:18:00.00", 8.43016, -42.34428] 

61 ``` 

62 

63 Note coorinates can be in decimal degrees or sexegesimal format (or both). 

64 

65 To initialise a 10 arcsec conesearch to return the *transientBucketId* and *spectralType* values from any resulting match use the code: 

66 

67 ```python 

68 from HMpTy.mysql import conesearch 

69 cs = conesearch( 

70 log=log, 

71 dbConn=dbConn, 

72 tableName="transientBucket", 

73 columns="transientBucketId, spectralType", 

74 ra=raList, 

75 dec=decList, 

76 radiusArcsec=10, 

77 separations=False, 

78 distinct=False, 

79 sqlWhere=False 

80 ) 

81 ``` 

82 

83 Using the ``query`` property of the coneseach object you can inspect the initial sql query to be run on the database: 

84 

85 ```python 

86 print(cs.query) 

87 ``` 

88 

89 ```text 

90 select transientBucketId, spectralType, raDeg, decDeg from transientBucket where htm16ID in (51985593986,51985593989,51985593993,51985593994,51985593995,51985593996,51985593997,51985593998,51985593999,51985594037, ... ,38488627914,38488627916,38488627918,38488627919,38488627956,38488627957,38488627959) 

91 ``` 

92 

93 To execute the query and return the results: 

94 

95 ```python 

96 matchIndies, matches = cs.search() 

97 ``` 

98 

99 The ``matchIndies`` are the indices of the coordinate in the original ``raList`` and ``decList`` lists and the ``matches`` the matched rows from the database table. 

100 

101 To constrain your results a little more define the ``distinct`` and or ``sqlWhere`` attributes of the conesearch: 

102 

103 ```python 

104 from HMpTy.mysql import conesearch 

105 cs = conesearch( 

106 log=log, 

107 dbConn=dbConn, 

108 tableName="transientBucket", 

109 columns="transientBucketId, spectralType", 

110 ra=raList1, 

111 dec=decList1, 

112 radiusArcsec=10, 

113 separations=True, 

114 distinct=True, 

115 sqlWhere="spectralType is not null" 

116 ) 

117 

118 

119 matchIndies, matches = cs.search() 

120 

121 for row in matches.list: 

122 print(row) 

123 ``` 

124 

125 ```text 

126 {'raDeg': 351.473208333, 'cmSepArcsec': 0.13379807128325164, 'decDeg': 26.9066388889, 'spectralType': u'SN Ia', 'transientBucketId': 1375799L} 

127 {'raDeg': 32.534, 'cmSepArcsec': 0.031941633602975743, 'decDeg': -48.6400888889, 'spectralType': u'II', 'transientBucketId': 1328883L} 

128 {'raDeg': 1.47329166667, 'cmSepArcsec': 0.0068727452774991196, 'decDeg': 8.43016111111, 'spectralType': u'SN Ia', 'transientBucketId': 1321322L} 

129 {'raDeg': 35.3427916667, 'cmSepArcsec': 0.0043467057710126393, 'decDeg': -42.3442805556, 'spectralType': u'Ia', 'transientBucketId': 1307226L} 

130 ``` 

131 

132 Note that by adding ``separations=True`` the matched source seperations from the original coordinate lists have been injected into the results. 

133 

134 It is possible to render the results in csv, json, markdown, yaml or ascii table format. For example: 

135 

136 ```python 

137 print(matches.table()) 

138 ``` 

139 

140 ```text 

141 +-----------+---------------+-----------+--------------+--------------------+ 

142 | raDeg | spectralType | decDeg | cmSepArcsec | transientBucketId | 

143 +-----------+---------------+-----------+--------------+--------------------+ 

144 | 351.4732 | SN Ia | 26.9066 | 0.1338 | 1375799 | 

145 | 32.5340 | II | -48.6401 | 0.0319 | 1328883 | 

146 | 1.4733 | SN Ia | 8.4302 | 0.0069 | 1321322 | 

147 | 35.3428 | Ia | -42.3443 | 0.0043 | 1307226 | 

148 +-----------+---------------+-----------+--------------+--------------------+ 

149 ``` 

150 

151 To save the results to file: 

152 

153 ```python 

154 matches.table(filepath="/path/to/my/results.dat") 

155 ``` 

156 

157 To instead render as csv, json, markdown or yaml use: 

158 

159 ```python 

160 matches.csv(filepath="/path/to/my/results.csv") 

161 matches.json(filepath="/path/to/my/results.json") 

162 matches.markdown(filepath="/path/to/my/results.md") 

163 matches.markdown(filepath="/path/to/my/results.yaml") 

164 ``` 

165 

166 Finally, to render the results as mysql inserts, use the following code: 

167 

168 ```python 

169 matches.mysql(tableName="mysql_table", filepath=None, createStatement=False) 

170 ``` 

171 

172 ```text 

173 INSERT INTO `mysql_table` (cmSepArcsec,decDeg,raDeg,spectralType,transientBucketId) VALUES ("0.133798071283" ,"26.9066388889" ,"351.473208333" ,"SN Ia" ,"1375799") ON DUPLICATE KEY UPDATE cmSepArcsec="0.133798071283", decDeg="26.9066388889", raDeg="351.473208333", spectralType="SN Ia", transientBucketId="1375799", updated=IF( cmSepArcsec="0.133798071283" AND decDeg="26.9066388889" AND raDeg="351.473208333" AND spectralType="SN Ia" AND transientBucketId="1375799", 0, 1), dateLastModified=IF( cmSepArcsec="0.133798071283" AND decDeg="26.9066388889" AND raDeg="351.473208333" AND spectralType="SN Ia" AND transientBucketId="1375799", dateLastModified, NOW()) ; 

174 INSERT INTO `mysql_table` (cmSepArcsec,decDeg,raDeg,spectralType,transientBucketId) VALUES ("0.031941633603" ,"-48.6400888889" ,"32.534" ,"II" ,"1328883") ON DUPLICATE KEY UPDATE cmSepArcsec="0.031941633603", decDeg="-48.6400888889", raDeg="32.534", spectralType="II", transientBucketId="1328883", updated=IF( cmSepArcsec="0.031941633603" AND decDeg="-48.6400888889" AND raDeg="32.534" AND spectralType="II" AND transientBucketId="1328883", 0, 1), dateLastModified=IF( cmSepArcsec="0.031941633603" AND decDeg="-48.6400888889" AND raDeg="32.534" AND spectralType="II" AND transientBucketId="1328883", dateLastModified, NOW()) ; 

175 INSERT INTO `mysql_table` (cmSepArcsec,decDeg,raDeg,spectralType,transientBucketId) VALUES ("0.0068727452775" ,"8.43016111111" ,"1.47329166667" ,"SN Ia" ,"1321322") ON DUPLICATE KEY UPDATE cmSepArcsec="0.0068727452775", decDeg="8.43016111111", raDeg="1.47329166667", spectralType="SN Ia", transientBucketId="1321322", updated=IF( cmSepArcsec="0.0068727452775" AND decDeg="8.43016111111" AND raDeg="1.47329166667" AND spectralType="SN Ia" AND transientBucketId="1321322", 0, 1), dateLastModified=IF( cmSepArcsec="0.0068727452775" AND decDeg="8.43016111111" AND raDeg="1.47329166667" AND spectralType="SN Ia" AND transientBucketId="1321322", dateLastModified, NOW()) ; 

176 INSERT INTO `mysql_table` (cmSepArcsec,decDeg,raDeg,spectralType,transientBucketId) VALUES ("0.00434670577101" ,"-42.3442805556" ,"35.3427916667" ,"Ia" ,"1307226") ON DUPLICATE KEY UPDATE cmSepArcsec="0.00434670577101", decDeg="-42.3442805556", raDeg="35.3427916667", spectralType="Ia", transientBucketId="1307226", updated=IF( cmSepArcsec="0.00434670577101" AND decDeg="-42.3442805556" AND raDeg="35.3427916667" AND spectralType="Ia" AND transientBucketId="1307226", 0, 1), dateLastModified=IF( cmSepArcsec="0.00434670577101" AND decDeg="-42.3442805556" AND raDeg="35.3427916667" AND spectralType="Ia" AND transientBucketId="1307226", dateLastModified, NOW()) ; 

177 ``` 

178 """ 

179 # Initialisation 

180 

181 def __init__( 

182 self, 

183 log, 

184 dbConn, 

185 tableName, 

186 columns, 

187 ra, 

188 dec, 

189 radiusArcsec, 

190 sqlWhere=False, 

191 raCol="raDeg", 

192 decCol="decDeg", 

193 separations=False, 

194 distinct=False, 

195 closest=False 

196 ): 

197 self.log = log 

198 log.debug("instansiating a new 'conesearch' object") 

199 self.tableName = tableName 

200 self.dbConn = dbConn 

201 self.radius = float(radiusArcsec) 

202 self.raCol = raCol 

203 self.decCol = decCol 

204 self.columns = columns 

205 self.separations = separations 

206 self.distinct = distinct 

207 self.sqlWhere = sqlWhere 

208 self.closest = closest 

209 

210 if not self.columns: 

211 self.columns = "*" 

212 

213 # xt-self-arg-tmpx 

214 

215 # BULK COORDINATE INTO NUMPY ARRAY 

216 from astrocalc.coords import coordinates_to_array 

217 self.ra, self.dec = coordinates_to_array( 

218 log=self.log, 

219 ra=ra, 

220 dec=dec 

221 ) 

222 

223 # SETUP THE MESH 

224 # LESS THAN 1 ARCMIN 

225 if self.radius < 60.: 

226 self.htmDepth = 16 

227 # LESS THAN 1 DEG BUT GREATER THAN 1 ARCMIN 

228 elif old_div(self.radius, (60 * 60)) < 1.: 

229 self.htmDepth = 13 

230 # GREATER THAN 1 DEGREE 

231 else: 

232 self.htmDepth = 10 

233 

234 # SETUP A MESH AT CHOOSEN DEPTH 

235 self.mesh = HTM( 

236 depth=self.htmDepth, 

237 log=self.log 

238 ) 

239 

240 # DATETIME REGEX - EXPENSIVE OPERATION, LET"S JUST DO IT ONCE 

241 self.reDatetime = re.compile('^[0-9]{4}-[0-9]{2}-[0-9]{2}T') 

242 

243 return None 

244 

245 @property 

246 def query( 

247 self): 

248 """*return the sql query for the HTM trixel search* 

249 

250 **Usage** 

251 

252 cs.query 

253 

254 """ 

255 return self._get_on_trixel_sources_from_database_query() 

256 

257 def search(self): 

258 """ 

259 *Return the results of the database conesearch* 

260 

261 **Return** 

262 

263 - ``conesearch`` 

264 

265 

266 **Usage** 

267 

268 See class usage. 

269 

270 """ 

271 self.log.debug('starting the ``get`` method') 

272 

273 sqlQuery = self._get_on_trixel_sources_from_database_query() 

274 

275 databaseRows = self._execute_query(sqlQuery) 

276 matchIndies, matches = self._list_crossmatch(databaseRows) 

277 

278 from fundamentals.renderer import list_of_dictionaries 

279 matches = list_of_dictionaries( 

280 log=self.log, 

281 listOfDictionaries=matches, 

282 reDatetime=self.reDatetime 

283 ) 

284 

285 self.log.debug('completed the ``get`` method') 

286 return matchIndies, matches 

287 

288 def _get_on_trixel_sources_from_database_query( 

289 self): 

290 """*generate the mysql query before executing it* 

291 """ 

292 self.log.debug( 

293 'completed the ````_get_on_trixel_sources_from_database_query`` method') 

294 

295 tableName = self.tableName 

296 raCol = self.raCol 

297 decCol = self.decCol 

298 radiusArc = self.radius 

299 radius = old_div(self.radius, (60. * 60.)) 

300 

301 # GET ALL THE TRIXELS REQUIRED 

302 trixelArray = self._get_trixel_ids_that_overlap_conesearch_circles() 

303 if trixelArray.size > 50000 and self.htmDepth == 16: 

304 self.htmDepth = 13 

305 self.mesh = HTM( 

306 depth=self.htmDepth, 

307 log=self.log 

308 ) 

309 trixelArray = self._get_trixel_ids_that_overlap_conesearch_circles() 

310 if trixelArray.size > 50000 and self.htmDepth == 13: 

311 self.htmDepth = 10 

312 self.mesh = HTM( 

313 depth=self.htmDepth, 

314 log=self.log 

315 ) 

316 trixelArray = self._get_trixel_ids_that_overlap_conesearch_circles() 

317 

318 htmLevel = "htm%sID" % self.htmDepth 

319 if trixelArray.size > 150000: 

320 self.log.info( 

321 "Your search radius of the `%(tableName)s` table may be too large (%(radiusArc)s arcsec)" % locals()) 

322 minID = np.min(trixelArray) 

323 maxID = np.max(trixelArray) 

324 htmWhereClause = "where %(htmLevel)s between %(minID)s and %(maxID)s " % locals( 

325 ) 

326 else: 

327 thesHtmIds = ",".join(np.array(list(map(str, trixelArray)))) 

328 htmWhereClause = "where %(htmLevel)s in (%(thesHtmIds)s)" % locals( 

329 ) 

330 

331 cols = self.columns[:] 

332 if cols != "*" and raCol.lower() not in cols.lower(): 

333 cols += ", " + raCol 

334 if cols != "*" and decCol.lower() not in cols.lower(): 

335 cols += ", " + decCol 

336 

337 # FINALLY BUILD THE FULL QUERY 

338 if self.distinct: 

339 sqlQuery = """select DISTINCT %(cols)s from %(tableName)s %(htmWhereClause)s""" % locals( 

340 ) 

341 else: 

342 sqlQuery = """select %(cols)s from %(tableName)s %(htmWhereClause)s""" % locals( 

343 ) 

344 

345 if self.sqlWhere and len(self.sqlWhere): 

346 sqlQuery += " and " + self.sqlWhere 

347 

348 self.log.debug( 

349 'completed the ``_get_on_trixel_sources_from_database_query`` method') 

350 

351 return sqlQuery 

352 

353 def _get_trixel_ids_that_overlap_conesearch_circles( 

354 self): 

355 """*Get an array of all of the trixels IDs that overlap the conesearch circles(s)* 

356 

357 **Return** 

358 

359 - ``trixelArray`` -- an array of all the overlapping trixel ids 

360 

361 """ 

362 self.log.debug( 

363 'starting the ````_get_trixel_ids_that_overlap_conesearch_circles`` method') 

364 

365 trixelArray = np.array([], dtype='int16', ndmin=1, copy=False) 

366 # FOR EACH RA, DEC SET IN THE NUMPY ARRAY, COLLECT THE OVERLAPPING HTM 

367 # TRIXELS 

368 r = old_div(self.radius, (60. * 60.)) 

369 

370 trixelArray = [] 

371 

372 trixelArray[:] = [self.mesh.intersect( 

373 ra1, dec1, r, inclusive=True, convertCoordinates=False) for ra1, dec1 in zip(self.ra, self.dec)] 

374 

375 trixelArray = np.unique(np.concatenate(trixelArray)) 

376 

377 self.log.debug( 

378 'completed the ``_get_trixel_ids_that_overlap_conesearch_circles`` method') 

379 return trixelArray 

380 

381 def _execute_query( 

382 self, 

383 sqlQuery): 

384 """* execute query and trim results* 

385 

386 **Key Arguments** 

387 

388 - ``sqlQuery`` -- the sql database query to grab low-resolution results. 

389 

390 

391 **Return** 

392 

393 - ``databaseRows`` -- the database rows found on HTM trixles with requested IDs 

394 

395 """ 

396 self.log.debug( 

397 'completed the ````_execute_query`` method') 

398 

399 try: 

400 databaseRows = readquery( 

401 log=self.log, 

402 sqlQuery=sqlQuery, 

403 dbConn=self.dbConn 

404 ) 

405 except Exception as e: 

406 if "Unknown column 'htm" in str(e): 

407 message = "Please add and populate the HTM columns to this database table BEFORE running any conesearches. You can use HMpTy to do this: http://hmpty.readthedocs.io/en/stable/" 

408 self.log.error(message) 

409 raise IOError(message) 

410 elif "Truncated incorrect DOUBLE value" in str(e) or "Truncated incorrect DECIMAL value" in str(e): 

411 databaseRows = readquery( 

412 log=self.log, 

413 sqlQuery=sqlQuery, 

414 dbConn=self.dbConn, 

415 quiet=True 

416 ) 

417 else: 

418 print(sqlQuery) 

419 raise e 

420 

421 if self.distinct and (self.columns != "*" and (self.raCol.lower() not in self.columns.lower() or self.decCol.lower() not in self.columns.lower())): 

422 distinctRows = [] 

423 theseKeys = [] 

424 for r in databaseRows: 

425 constraintKey = "" 

426 for k, v in list(r.items()): 

427 if k.lower() != self.raCol.lower() and k.lower() != self.decCol.lower(): 

428 constraintKey += str(v) 

429 if self.raCol.lower() in self.columns.lower(): 

430 constraintKey += str(databaseRows[self.raCol]) 

431 if self.decCol.lower() in self.columns.lower(): 

432 constraintKey += str(databaseRows[self.decCol]) 

433 if constraintKey not in theseKeys: 

434 theseKeys.append(constraintKey) 

435 distinctRows.append(r) 

436 databaseRows = distinctRows 

437 

438 self.log.debug( 

439 'completed the ``_execute_query`` method') 

440 return databaseRows 

441 

442 def _list_crossmatch( 

443 self, 

444 dbRows): 

445 """*to a finer grain crossmatch of the input coordinates and the database results.* 

446 

447 **Key Arguments** 

448 

449 - ``dbRows`` -- the rows return from the database on first crossmatch pass. 

450 

451 

452 **Return** 

453 

454 - ``matchIndices1`` -- indices of the coordinate in the original ra and dec lists 

455 - ``matches`` -- the matched database rows 

456 

457 """ 

458 self.log.debug('starting the ``_list_crossmatch`` method') 

459 

460 dbRas = [] 

461 dbRas[:] = [d[self.raCol] for d in dbRows] 

462 dbDecs = [] 

463 dbDecs[:] = [d[self.decCol] for d in dbRows] 

464 

465 # 12 SEEMS TO BE GIVING OPTIMAL SPEED FOR MATCHES (VERY ROUGH SPEED 

466 # TESTS) 

467 mesh = HTM( 

468 depth=12, 

469 log=self.log 

470 ) 

471 

472 if self.closest: 

473 maxmatch = 1 

474 else: 

475 maxmatch = 0 

476 matchIndices1, matchIndices2, seps = mesh.match( 

477 ra1=self.ra, 

478 dec1=self.dec, 

479 ra2=np.array(dbRas), 

480 dec2=np.array(dbDecs), 

481 radius=float(old_div(self.radius, (60. * 60.))), 

482 maxmatch=maxmatch # 1 = match closest 1, 0 = match all 

483 ) 

484 

485 matches = [] 

486 

487 for m1, m2, s in zip(matchIndices1, matchIndices2, seps): 

488 

489 if self.separations: 

490 

491 dbRows[m2]["cmSepArcsec"] = s * (60. * 60.) 

492 # DEEPCOPY NEEDED IF SAME ELEMENT MATCHED BY 2 SEPERATE 

493 # ITEMS IN FIRST LIST 

494 matches.append(copy.deepcopy(dbRows[m2])) 

495 

496 self.log.debug('completed the ``_list_crossmatch`` method') 

497 return matchIndices1, matches 

498 

499 # xt-class-method