Coverage for HMpTy/mysql/conesearch.py : 77%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1#!/usr/local/bin/python
2# encoding: utf-8
3"""
4*Perform a conesearch on a database table with existing HTMId columns pre-populated*
6:Author:
7 David Young
8"""
9from __future__ import print_function
10from __future__ import division
11from future import standard_library
12standard_library.install_aliases()
13from builtins import zip
14from builtins import str
15from builtins import map
16from builtins import object
17from past.utils import old_div
18import sys
19import os
20import re
21os.environ['TERM'] = 'vt100'
22from fundamentals import tools
23from HMpTy.htm import HTM
24import numpy as np
25from fundamentals.mysql import readquery
26from io import StringIO
27import copy
30class conesearch(object):
31 """
32 *The worker class for the conesearch module*
34 **Key Arguments**
36 - ``log`` -- logger
37 - ``dbConn`` -- a database connection
38 - ``tableName`` -- the name of the database table to perform the conesearch on.
39 - ``columns`` -- the columns requested from the database table
40 - ``ra`` -- the right ascension of the conesearch centre, can be single value or list of values
41 - ``dec`` -- the declination of the conesearch centre, can be single value or list of values
42 - ``radiusArcsec`` -- radius of the conesearch to be performed in arcsecs
43 - ``sqlWhere`` -- clause to add after "where" in the initial sql query of the conesearch. Default *False*
44 - ``raCol`` -- the database table ra column name. Default * raDeg*
45 - ``decCol`` -- the database table dec column name. Default *decDeg*
48 - ``separations`` -- include the separations in the final output. Default *False*
49 - ``distinct`` -- request distinct columns from the database table (i.e. *select DISTINCT ...*). Default *False*
50 - ``closest`` -- return the closest match only. Default *False*
52 **Usage**
54 Say we have 5 locations we wish to search a database table called *transientBucket* to see if it contains sources at those locations. Add the coordinates to those locations to RA and DEC lists like so:
56 ```python
57 raList = ["23:25:53.56", "02:10:08.16",
58 "13:20:00.00", 1.47329, 35.34279]
59 decList = ["+26:54:23.9", "-48:38:24.3",
60 "+24:18:00.00", 8.43016, -42.34428]
61 ```
63 Note coorinates can be in decimal degrees or sexegesimal format (or both).
65 To initialise a 10 arcsec conesearch to return the *transientBucketId* and *spectralType* values from any resulting match use the code:
67 ```python
68 from HMpTy.mysql import conesearch
69 cs = conesearch(
70 log=log,
71 dbConn=dbConn,
72 tableName="transientBucket",
73 columns="transientBucketId, spectralType",
74 ra=raList,
75 dec=decList,
76 radiusArcsec=10,
77 separations=False,
78 distinct=False,
79 sqlWhere=False
80 )
81 ```
83 Using the ``query`` property of the coneseach object you can inspect the initial sql query to be run on the database:
85 ```python
86 print(cs.query)
87 ```
89 ```text
90 select transientBucketId, spectralType, raDeg, decDeg from transientBucket where htm16ID in (51985593986,51985593989,51985593993,51985593994,51985593995,51985593996,51985593997,51985593998,51985593999,51985594037, ... ,38488627914,38488627916,38488627918,38488627919,38488627956,38488627957,38488627959)
91 ```
93 To execute the query and return the results:
95 ```python
96 matchIndies, matches = cs.search()
97 ```
99 The ``matchIndies`` are the indices of the coordinate in the original ``raList`` and ``decList`` lists and the ``matches`` the matched rows from the database table.
101 To constrain your results a little more define the ``distinct`` and or ``sqlWhere`` attributes of the conesearch:
103 ```python
104 from HMpTy.mysql import conesearch
105 cs = conesearch(
106 log=log,
107 dbConn=dbConn,
108 tableName="transientBucket",
109 columns="transientBucketId, spectralType",
110 ra=raList1,
111 dec=decList1,
112 radiusArcsec=10,
113 separations=True,
114 distinct=True,
115 sqlWhere="spectralType is not null"
116 )
119 matchIndies, matches = cs.search()
121 for row in matches.list:
122 print(row)
123 ```
125 ```text
126 {'raDeg': 351.473208333, 'cmSepArcsec': 0.13379807128325164, 'decDeg': 26.9066388889, 'spectralType': u'SN Ia', 'transientBucketId': 1375799L}
127 {'raDeg': 32.534, 'cmSepArcsec': 0.031941633602975743, 'decDeg': -48.6400888889, 'spectralType': u'II', 'transientBucketId': 1328883L}
128 {'raDeg': 1.47329166667, 'cmSepArcsec': 0.0068727452774991196, 'decDeg': 8.43016111111, 'spectralType': u'SN Ia', 'transientBucketId': 1321322L}
129 {'raDeg': 35.3427916667, 'cmSepArcsec': 0.0043467057710126393, 'decDeg': -42.3442805556, 'spectralType': u'Ia', 'transientBucketId': 1307226L}
130 ```
132 Note that by adding ``separations=True`` the matched source seperations from the original coordinate lists have been injected into the results.
134 It is possible to render the results in csv, json, markdown, yaml or ascii table format. For example:
136 ```python
137 print(matches.table())
138 ```
140 ```text
141 +-----------+---------------+-----------+--------------+--------------------+
142 | raDeg | spectralType | decDeg | cmSepArcsec | transientBucketId |
143 +-----------+---------------+-----------+--------------+--------------------+
144 | 351.4732 | SN Ia | 26.9066 | 0.1338 | 1375799 |
145 | 32.5340 | II | -48.6401 | 0.0319 | 1328883 |
146 | 1.4733 | SN Ia | 8.4302 | 0.0069 | 1321322 |
147 | 35.3428 | Ia | -42.3443 | 0.0043 | 1307226 |
148 +-----------+---------------+-----------+--------------+--------------------+
149 ```
151 To save the results to file:
153 ```python
154 matches.table(filepath="/path/to/my/results.dat")
155 ```
157 To instead render as csv, json, markdown or yaml use:
159 ```python
160 matches.csv(filepath="/path/to/my/results.csv")
161 matches.json(filepath="/path/to/my/results.json")
162 matches.markdown(filepath="/path/to/my/results.md")
163 matches.markdown(filepath="/path/to/my/results.yaml")
164 ```
166 Finally, to render the results as mysql inserts, use the following code:
168 ```python
169 matches.mysql(tableName="mysql_table", filepath=None, createStatement=False)
170 ```
172 ```text
173 INSERT INTO `mysql_table` (cmSepArcsec,decDeg,raDeg,spectralType,transientBucketId) VALUES ("0.133798071283" ,"26.9066388889" ,"351.473208333" ,"SN Ia" ,"1375799") ON DUPLICATE KEY UPDATE cmSepArcsec="0.133798071283", decDeg="26.9066388889", raDeg="351.473208333", spectralType="SN Ia", transientBucketId="1375799", updated=IF( cmSepArcsec="0.133798071283" AND decDeg="26.9066388889" AND raDeg="351.473208333" AND spectralType="SN Ia" AND transientBucketId="1375799", 0, 1), dateLastModified=IF( cmSepArcsec="0.133798071283" AND decDeg="26.9066388889" AND raDeg="351.473208333" AND spectralType="SN Ia" AND transientBucketId="1375799", dateLastModified, NOW()) ;
174 INSERT INTO `mysql_table` (cmSepArcsec,decDeg,raDeg,spectralType,transientBucketId) VALUES ("0.031941633603" ,"-48.6400888889" ,"32.534" ,"II" ,"1328883") ON DUPLICATE KEY UPDATE cmSepArcsec="0.031941633603", decDeg="-48.6400888889", raDeg="32.534", spectralType="II", transientBucketId="1328883", updated=IF( cmSepArcsec="0.031941633603" AND decDeg="-48.6400888889" AND raDeg="32.534" AND spectralType="II" AND transientBucketId="1328883", 0, 1), dateLastModified=IF( cmSepArcsec="0.031941633603" AND decDeg="-48.6400888889" AND raDeg="32.534" AND spectralType="II" AND transientBucketId="1328883", dateLastModified, NOW()) ;
175 INSERT INTO `mysql_table` (cmSepArcsec,decDeg,raDeg,spectralType,transientBucketId) VALUES ("0.0068727452775" ,"8.43016111111" ,"1.47329166667" ,"SN Ia" ,"1321322") ON DUPLICATE KEY UPDATE cmSepArcsec="0.0068727452775", decDeg="8.43016111111", raDeg="1.47329166667", spectralType="SN Ia", transientBucketId="1321322", updated=IF( cmSepArcsec="0.0068727452775" AND decDeg="8.43016111111" AND raDeg="1.47329166667" AND spectralType="SN Ia" AND transientBucketId="1321322", 0, 1), dateLastModified=IF( cmSepArcsec="0.0068727452775" AND decDeg="8.43016111111" AND raDeg="1.47329166667" AND spectralType="SN Ia" AND transientBucketId="1321322", dateLastModified, NOW()) ;
176 INSERT INTO `mysql_table` (cmSepArcsec,decDeg,raDeg,spectralType,transientBucketId) VALUES ("0.00434670577101" ,"-42.3442805556" ,"35.3427916667" ,"Ia" ,"1307226") ON DUPLICATE KEY UPDATE cmSepArcsec="0.00434670577101", decDeg="-42.3442805556", raDeg="35.3427916667", spectralType="Ia", transientBucketId="1307226", updated=IF( cmSepArcsec="0.00434670577101" AND decDeg="-42.3442805556" AND raDeg="35.3427916667" AND spectralType="Ia" AND transientBucketId="1307226", 0, 1), dateLastModified=IF( cmSepArcsec="0.00434670577101" AND decDeg="-42.3442805556" AND raDeg="35.3427916667" AND spectralType="Ia" AND transientBucketId="1307226", dateLastModified, NOW()) ;
177 ```
178 """
179 # Initialisation
181 def __init__(
182 self,
183 log,
184 dbConn,
185 tableName,
186 columns,
187 ra,
188 dec,
189 radiusArcsec,
190 sqlWhere=False,
191 raCol="raDeg",
192 decCol="decDeg",
193 separations=False,
194 distinct=False,
195 closest=False
196 ):
197 self.log = log
198 log.debug("instansiating a new 'conesearch' object")
199 self.tableName = tableName
200 self.dbConn = dbConn
201 self.radius = float(radiusArcsec)
202 self.raCol = raCol
203 self.decCol = decCol
204 self.columns = columns
205 self.separations = separations
206 self.distinct = distinct
207 self.sqlWhere = sqlWhere
208 self.closest = closest
210 if not self.columns:
211 self.columns = "*"
213 # xt-self-arg-tmpx
215 # BULK COORDINATE INTO NUMPY ARRAY
216 from astrocalc.coords import coordinates_to_array
217 self.ra, self.dec = coordinates_to_array(
218 log=self.log,
219 ra=ra,
220 dec=dec
221 )
223 # SETUP THE MESH
224 # LESS THAN 1 ARCMIN
225 if self.radius < 60.:
226 self.htmDepth = 16
227 # LESS THAN 1 DEG BUT GREATER THAN 1 ARCMIN
228 elif old_div(self.radius, (60 * 60)) < 1.:
229 self.htmDepth = 13
230 # GREATER THAN 1 DEGREE
231 else:
232 self.htmDepth = 10
234 # SETUP A MESH AT CHOOSEN DEPTH
235 self.mesh = HTM(
236 depth=self.htmDepth,
237 log=self.log
238 )
240 # DATETIME REGEX - EXPENSIVE OPERATION, LET"S JUST DO IT ONCE
241 self.reDatetime = re.compile('^[0-9]{4}-[0-9]{2}-[0-9]{2}T')
243 return None
245 @property
246 def query(
247 self):
248 """*return the sql query for the HTM trixel search*
250 **Usage**
252 cs.query
254 """
255 return self._get_on_trixel_sources_from_database_query()
257 def search(self):
258 """
259 *Return the results of the database conesearch*
261 **Return**
263 - ``conesearch``
266 **Usage**
268 See class usage.
270 """
271 self.log.debug('starting the ``get`` method')
273 sqlQuery = self._get_on_trixel_sources_from_database_query()
275 databaseRows = self._execute_query(sqlQuery)
276 matchIndies, matches = self._list_crossmatch(databaseRows)
278 from fundamentals.renderer import list_of_dictionaries
279 matches = list_of_dictionaries(
280 log=self.log,
281 listOfDictionaries=matches,
282 reDatetime=self.reDatetime
283 )
285 self.log.debug('completed the ``get`` method')
286 return matchIndies, matches
288 def _get_on_trixel_sources_from_database_query(
289 self):
290 """*generate the mysql query before executing it*
291 """
292 self.log.debug(
293 'completed the ````_get_on_trixel_sources_from_database_query`` method')
295 tableName = self.tableName
296 raCol = self.raCol
297 decCol = self.decCol
298 radiusArc = self.radius
299 radius = old_div(self.radius, (60. * 60.))
301 # GET ALL THE TRIXELS REQUIRED
302 trixelArray = self._get_trixel_ids_that_overlap_conesearch_circles()
303 if trixelArray.size > 50000 and self.htmDepth == 16:
304 self.htmDepth = 13
305 self.mesh = HTM(
306 depth=self.htmDepth,
307 log=self.log
308 )
309 trixelArray = self._get_trixel_ids_that_overlap_conesearch_circles()
310 if trixelArray.size > 50000 and self.htmDepth == 13:
311 self.htmDepth = 10
312 self.mesh = HTM(
313 depth=self.htmDepth,
314 log=self.log
315 )
316 trixelArray = self._get_trixel_ids_that_overlap_conesearch_circles()
318 htmLevel = "htm%sID" % self.htmDepth
319 if trixelArray.size > 150000:
320 self.log.info(
321 "Your search radius of the `%(tableName)s` table may be too large (%(radiusArc)s arcsec)" % locals())
322 minID = np.min(trixelArray)
323 maxID = np.max(trixelArray)
324 htmWhereClause = "where %(htmLevel)s between %(minID)s and %(maxID)s " % locals(
325 )
326 else:
327 thesHtmIds = ",".join(np.array(list(map(str, trixelArray))))
328 htmWhereClause = "where %(htmLevel)s in (%(thesHtmIds)s)" % locals(
329 )
331 cols = self.columns[:]
332 if cols != "*" and raCol.lower() not in cols.lower():
333 cols += ", " + raCol
334 if cols != "*" and decCol.lower() not in cols.lower():
335 cols += ", " + decCol
337 # FINALLY BUILD THE FULL QUERY
338 if self.distinct:
339 sqlQuery = """select DISTINCT %(cols)s from %(tableName)s %(htmWhereClause)s""" % locals(
340 )
341 else:
342 sqlQuery = """select %(cols)s from %(tableName)s %(htmWhereClause)s""" % locals(
343 )
345 if self.sqlWhere and len(self.sqlWhere):
346 sqlQuery += " and " + self.sqlWhere
348 self.log.debug(
349 'completed the ``_get_on_trixel_sources_from_database_query`` method')
351 return sqlQuery
353 def _get_trixel_ids_that_overlap_conesearch_circles(
354 self):
355 """*Get an array of all of the trixels IDs that overlap the conesearch circles(s)*
357 **Return**
359 - ``trixelArray`` -- an array of all the overlapping trixel ids
361 """
362 self.log.debug(
363 'starting the ````_get_trixel_ids_that_overlap_conesearch_circles`` method')
365 trixelArray = np.array([], dtype='int16', ndmin=1, copy=False)
366 # FOR EACH RA, DEC SET IN THE NUMPY ARRAY, COLLECT THE OVERLAPPING HTM
367 # TRIXELS
368 r = old_div(self.radius, (60. * 60.))
370 trixelArray = []
372 trixelArray[:] = [self.mesh.intersect(
373 ra1, dec1, r, inclusive=True, convertCoordinates=False) for ra1, dec1 in zip(self.ra, self.dec)]
375 trixelArray = np.unique(np.concatenate(trixelArray))
377 self.log.debug(
378 'completed the ``_get_trixel_ids_that_overlap_conesearch_circles`` method')
379 return trixelArray
381 def _execute_query(
382 self,
383 sqlQuery):
384 """* execute query and trim results*
386 **Key Arguments**
388 - ``sqlQuery`` -- the sql database query to grab low-resolution results.
391 **Return**
393 - ``databaseRows`` -- the database rows found on HTM trixles with requested IDs
395 """
396 self.log.debug(
397 'completed the ````_execute_query`` method')
399 try:
400 databaseRows = readquery(
401 log=self.log,
402 sqlQuery=sqlQuery,
403 dbConn=self.dbConn
404 )
405 except Exception as e:
406 if "Unknown column 'htm" in str(e):
407 message = "Please add and populate the HTM columns to this database table BEFORE running any conesearches. You can use HMpTy to do this: http://hmpty.readthedocs.io/en/stable/"
408 self.log.error(message)
409 raise IOError(message)
410 elif "Truncated incorrect DOUBLE value" in str(e) or "Truncated incorrect DECIMAL value" in str(e):
411 databaseRows = readquery(
412 log=self.log,
413 sqlQuery=sqlQuery,
414 dbConn=self.dbConn,
415 quiet=True
416 )
417 else:
418 print(sqlQuery)
419 raise e
421 if self.distinct and (self.columns != "*" and (self.raCol.lower() not in self.columns.lower() or self.decCol.lower() not in self.columns.lower())):
422 distinctRows = []
423 theseKeys = []
424 for r in databaseRows:
425 constraintKey = ""
426 for k, v in list(r.items()):
427 if k.lower() != self.raCol.lower() and k.lower() != self.decCol.lower():
428 constraintKey += str(v)
429 if self.raCol.lower() in self.columns.lower():
430 constraintKey += str(databaseRows[self.raCol])
431 if self.decCol.lower() in self.columns.lower():
432 constraintKey += str(databaseRows[self.decCol])
433 if constraintKey not in theseKeys:
434 theseKeys.append(constraintKey)
435 distinctRows.append(r)
436 databaseRows = distinctRows
438 self.log.debug(
439 'completed the ``_execute_query`` method')
440 return databaseRows
442 def _list_crossmatch(
443 self,
444 dbRows):
445 """*to a finer grain crossmatch of the input coordinates and the database results.*
447 **Key Arguments**
449 - ``dbRows`` -- the rows return from the database on first crossmatch pass.
452 **Return**
454 - ``matchIndices1`` -- indices of the coordinate in the original ra and dec lists
455 - ``matches`` -- the matched database rows
457 """
458 self.log.debug('starting the ``_list_crossmatch`` method')
460 dbRas = []
461 dbRas[:] = [d[self.raCol] for d in dbRows]
462 dbDecs = []
463 dbDecs[:] = [d[self.decCol] for d in dbRows]
465 # 12 SEEMS TO BE GIVING OPTIMAL SPEED FOR MATCHES (VERY ROUGH SPEED
466 # TESTS)
467 mesh = HTM(
468 depth=12,
469 log=self.log
470 )
472 if self.closest:
473 maxmatch = 1
474 else:
475 maxmatch = 0
476 matchIndices1, matchIndices2, seps = mesh.match(
477 ra1=self.ra,
478 dec1=self.dec,
479 ra2=np.array(dbRas),
480 dec2=np.array(dbDecs),
481 radius=float(old_div(self.radius, (60. * 60.))),
482 maxmatch=maxmatch # 1 = match closest 1, 0 = match all
483 )
485 matches = []
487 for m1, m2, s in zip(matchIndices1, matchIndices2, seps):
489 if self.separations:
491 dbRows[m2]["cmSepArcsec"] = s * (60. * 60.)
492 # DEEPCOPY NEEDED IF SAME ELEMENT MATCHED BY 2 SEPERATE
493 # ITEMS IN FIRST LIST
494 matches.append(copy.deepcopy(dbRows[m2]))
496 self.log.debug('completed the ``_list_crossmatch`` method')
497 return matchIndices1, matches
499 # xt-class-method