Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1#!/usr/local/bin/python
2# encoding: utf-8
3"""
4*Given a database connection, a name of a table and the column names for RA and DEC, generates ID for one or more HTM level in the table*
6:Author:
7 David Young
8"""
9from __future__ import print_function
10from __future__ import division
11from builtins import zip
12from builtins import str
13from builtins import range
14from past.utils import old_div
15import sys
16import os
17import math
18os.environ['TERM'] = 'vt100'
19from fundamentals import tools
20import pymysql as ms
21from fundamentals.mysql import readquery, writequery, insert_list_of_dictionaries_into_database_tables
22from datetime import datetime, date
23from fundamentals import times
24import time
26def add_htm_ids_to_mysql_database_table(
27 raColName,
28 declColName,
29 tableName,
30 dbConn,
31 log,
32 primaryIdColumnName="primaryId",
33 cartesian=False,
34 batchSize=50000,
35 reindex=False,
36 dbSettings=False):
37 """*Given a database connection, a name of a table and the column names for RA and DEC, generates ID for one or more HTM level in the table*
39 **Key Arguments**
41 - ``raColName`` -- ra in sexegesimal
42 - ``declColName`` -- dec in sexegesimal
43 - ``tableName`` -- name of table to add htmid info to
44 - ``dbConn`` -- database hosting the above table
45 - ``log`` -- logger
46 - ``primaryIdColumnName`` -- the primary id for the table
47 - ``cartesian`` -- add cartesian columns. Default *False*
48 - ``batchSize`` -- the size of the batches of rows to add HTMIds to concurrently. Default *2500*
49 - ``reindex`` -- reindex the entire table
50 - ``dbSettings`` -- yaml settings for database
53 **Return**
55 - None
58 **Usage**
60 ```python
61 from HMpTy.mysql import add_htm_ids_to_mysql_database_table
62 add_htm_ids_to_mysql_database_table(
63 raColName="raDeg",
64 declColName="decDeg",
65 tableName="my_big_star_table",
66 dbConn=dbConn,
67 log=log,
68 primaryIdColumnName="primaryId",
69 reindex=False
70 )
71 ```
73 """
74 log.debug('starting the ``add_htm_ids_to_mysql_database_table`` function')
76 # TEST TABLE EXIST
77 sqlQuery = """show tables"""
78 rows = readquery(
79 log=log,
80 sqlQuery=sqlQuery,
81 dbConn=dbConn
82 )
84 log.debug(
85 """Checking the table %(tableName)s exists in the database""" % locals())
86 tableList = []
87 for row in rows:
88 tableList.append(list(row.values())[0].lower())
89 if tableName.lower() not in tableList:
90 message = "The %s table does not exist in the database" % (tableName,)
91 log.critical(message)
92 raise IOError(message)
94 log.debug(
95 """Checking the RA and DEC columns exist in the %(tableName)s table""" % locals())
96 # TEST COLUMNS EXISTS
97 cursor = dbConn.cursor(ms.cursors.DictCursor)
98 sqlQuery = """SELECT * FROM %s LIMIT 1""" % (tableName,)
99 cursor.execute(sqlQuery)
100 rows = cursor.fetchall()
101 desc = cursor.description
102 existingColumns = []
103 for i in range(len(desc)):
104 existingColumns.append(desc[i][0])
105 if (raColName not in existingColumns) or (declColName not in existingColumns):
106 message = 'Please make sure you have got the naes of the RA and DEC columns correct'
107 log.critical(message)
108 raise IOError(message)
110 if cartesian:
111 # ACTION(S) ##
112 htmCols = {
113 'htm16ID': 'BIGINT(20)',
114 'htm13ID': 'INT',
115 'htm10ID': 'INT',
116 'cx': 'DOUBLE',
117 'cy': 'DOUBLE',
118 'cz': 'DOUBLE'
119 }
120 else:
121 htmCols = {
122 'htm16ID': 'BIGINT(20)',
123 'htm13ID': 'INT',
124 'htm10ID': 'INT'
125 }
127 # CHECK IF COLUMNS EXISTS YET - IF NOT CREATE FROM
128 for key in list(htmCols.keys()):
129 try:
130 log.debug(
131 'attempting to check and generate the HTMId columns for the %s db table' %
132 (tableName, ))
133 colExists = \
134 """SELECT *
135 FROM information_schema.COLUMNS
136 WHERE TABLE_SCHEMA=DATABASE()
137 AND COLUMN_NAME='%s'
138 AND TABLE_NAME='%s'""" \
139 % (key, tableName)
140 colExists = readquery(
141 log=log,
142 sqlQuery=colExists,
143 dbConn=dbConn
144 )
145 switch = 0
146 if not colExists:
147 if switch == 0:
148 print("Adding the HTMCircle columns to %(tableName)s" % locals())
149 switch = 1
150 sqlQuery = 'ALTER TABLE ' + tableName + ' ADD ' + \
151 key + ' ' + htmCols[key] + ' DEFAULT NULL'
152 writequery(
153 log=log,
154 sqlQuery=sqlQuery,
155 dbConn=dbConn,
156 )
157 except Exception as e:
158 log.critical('could not check and generate the HTMId columns for the %s db table - failed with this error: %s '
159 % (tableName, str(e)))
160 raise e
162 log.debug(
163 """Counting the number of rows still requiring HTMID information""" % locals())
164 if reindex:
165 sqlQuery = u"""
166 SELECT INDEX_NAME FROM INFORMATION_SCHEMA.STATISTICS
167 WHERE table_schema=DATABASE() AND table_name='%(tableName)s' and COLUMN_NAME = "%(primaryIdColumnName)s";
168 """ % locals()
169 keyname = readquery(
170 log=log,
171 sqlQuery=sqlQuery,
172 dbConn=dbConn
173 )[0]["INDEX_NAME"]
174 if keyname != "PRIMARY":
175 log.error('To reindex the entire table the primaryID you submit must be unique. "%(primaryIdColumnName)s" is not unique in table "%(tableName)s"' % locals())
176 return
178 sqlQuery = """ALTER TABLE `%(tableName)s` disable keys""" % locals()
179 writequery(
180 log=log,
181 sqlQuery=sqlQuery,
182 dbConn=dbConn
183 )
185 sqlQuery = """SELECT count(*) as count from `%(tableName)s`""" % locals(
186 )
187 elif cartesian:
188 # COUNT ROWS WHERE HTMIDs ARE NOT SET
189 sqlQuery = """SELECT count(*) as count from `%(tableName)s` where htm10ID is NULL or cx is null and %(raColName)s is not null""" % locals(
190 )
191 else:
192 # COUNT ROWS WHERE HTMIDs ARE NOT SET
193 sqlQuery = """SELECT count(*) as count from `%(tableName)s` where htm10ID is NULL and %(raColName)s is not null""" % locals(
194 )
195 log.debug(
196 """SQLQUERY:\n\n%(sqlQuery)s\n\n""" % locals())
197 rowCount = readquery(
198 log=log,
199 sqlQuery=sqlQuery,
200 dbConn=dbConn,
201 quiet=False
202 )
203 totalCount = rowCount[0]["count"]
205 # ADD HTMIDs IN BATCHES
206 total = totalCount
207 batches = int(old_div(total, batchSize))
209 count = 0
210 lastId = False
211 # NOW GENERATE THE HTMLIds FOR THESE ROWS
212 for i in range(batches + 1):
213 if total == 0:
214 continue
215 count += batchSize
216 if count > batchSize:
217 # Cursor up one line and clear line
218 sys.stdout.write("\x1b[1A\x1b[2K")
219 if count > totalCount:
220 count = totalCount
222 start = time.time()
224 log.debug(
225 """Selecting the next %(batchSize)s rows requiring HTMID information in the %(tableName)s table""" % locals())
226 if reindex:
227 # SELECT THE ROWS WHERE THE HTMIds ARE NOT SET
228 if lastId:
229 sqlQuery = """SELECT `%s`, `%s`, `%s` from `%s` where `%s` > '%s' order by `%s` limit %s""" % (
230 primaryIdColumnName, raColName, declColName, tableName, primaryIdColumnName, lastId, primaryIdColumnName, batchSize)
231 else:
232 sqlQuery = """SELECT `%s`, `%s`, `%s` from `%s` order by `%s` limit %s""" % (
233 primaryIdColumnName, raColName, declColName, tableName, primaryIdColumnName, batchSize)
234 elif cartesian:
235 # SELECT THE ROWS WHERE THE HTMIds ARE NOT SET
236 sqlQuery = """SELECT `%s`, `%s`, `%s` from `%s` where `%s` is not null and `%s` >= 0 and ((htm10ID is NULL or cx is null)) limit %s""" % (
237 primaryIdColumnName, raColName, declColName, tableName, raColName, raColName, batchSize)
238 else:
239 # SELECT THE ROWS WHERE THE HTMIds ARE NOT SET
240 sqlQuery = """SELECT `%s`, `%s`, `%s` from `%s` where `%s` is not null and `%s` >= 0 and htm10ID is NULL limit %s""" % (
241 primaryIdColumnName, raColName, declColName, tableName, raColName, raColName, batchSize)
242 batch = readquery(
243 log=log,
244 sqlQuery=sqlQuery,
245 dbConn=dbConn
246 )
247 if reindex and len(batch):
248 lastId = batch[-1][primaryIdColumnName]
249 log.debug(
250 """The next %(batchSize)s rows requiring HTMID information have now been selected""" % locals())
252 raList = []
253 decList = []
254 pIdList = []
255 raList[:] = [r[raColName] for r in batch]
256 decList[:] = [r[declColName] for r in batch]
257 pIdList[:] = [r[primaryIdColumnName] for r in batch]
259 from HMpTy import htm
260 mesh16 = htm.HTM(16)
261 mesh13 = htm.HTM(13)
262 mesh10 = htm.HTM(10)
264 log.debug(
265 'calculating htmIds for batch of %s rows in %s db table' % (batchSize, tableName, ))
266 htm16Ids = mesh16.lookup_id(raList, decList)
267 htm13Ids = mesh13.lookup_id(raList, decList)
268 htm10Ids = mesh10.lookup_id(raList, decList)
269 log.debug(
270 'finshed calculating htmIds for batch of %s rows in %s db table' % (batchSize, tableName, ))
272 if cartesian:
273 log.debug(
274 'calculating cartesian coordinates for batch of %s rows in %s db table' % (batchSize, tableName, ))
275 cx = []
276 cy = []
277 cz = []
278 for r, d in zip(raList, decList):
279 r = math.radians(r)
280 d = math.radians(d)
281 cos_dec = math.cos(d)
282 cx.append(math.cos(r) * cos_dec)
283 cy.append(math.sin(r) * cos_dec)
284 cz.append(math.sin(d))
286 updates = []
287 updates[:] = [{"htm16ID": int(h16), "htm13ID": int(h13), "htm10ID": int(h10), primaryIdColumnName: pid, "cx": float(ccx), "cy": float(ccy), "cz": float(ccz)} for h16,
288 h13, h10, pid, ccx, ccy, ccz in zip(htm16Ids, htm13Ids, htm10Ids, pIdList, cx, cy, cz)]
290 log.debug(
291 'finished calculating cartesian coordinates for batch of %s rows in %s db table' % (
292 batchSize, tableName, ))
293 else:
294 log.debug('building the sqlquery')
295 updates = []
296 # updates[:] = ["UPDATE `%(tableName)s` SET htm16ID=%(h16)s, htm13ID=%(h13)s, htm10ID=%(h10)s where %(primaryIdColumnName)s = '%(pid)s';" % locals() for h16,
297 # h13, h10, pid in zip(htm16Ids, htm13Ids, htm10Ids, pIdList)]
298 updates[:] = [{"htm16ID": int(h16), "htm13ID": int(h13), "htm10ID": int(h10), primaryIdColumnName: pid} for h16,
299 h13, h10, pid in zip(htm16Ids, htm13Ids, htm10Ids, pIdList)]
300 log.debug('finshed building the sqlquery')
302 if len(updates):
303 log.debug(
304 'starting to update the HTMIds for new objects in the %s db table' % (tableName, ))
306 # USE dbSettings & dbConn TO ACTIVATE MULTIPROCESSING
307 insert_list_of_dictionaries_into_database_tables(
308 dbConn=dbConn,
309 log=log,
310 dictList=updates,
311 dbTableName=tableName,
312 uniqueKeyList=[],
313 dateModified=False,
314 batchSize=20000,
315 replace=True,
316 dbSettings=dbSettings,
317 dateCreated=False
318 )
320 # writequery(
321 # log=log,
322 # sqlQuery=sqlQuery,
323 # dbConn=dbConn,
324 # )
325 log.debug(
326 'finished updating the HTMIds for new objects in the %s db table' % (tableName, ))
327 else:
328 log.debug(
329 'no HTMIds to add to the %s db table' % (tableName, ))
331 percent = float(count) * 100. / float(totalCount)
332 print("%(count)s / %(totalCount)s htmIds added to %(tableName)s (%(percent)0.5f%% complete)" % locals())
333 end = time.time()
334 timediff = end - start
335 timediff = timediff * 1000000. / float(batchSize)
336 print("Update speed: %(timediff)0.2fs/1e6 rows\n" % locals())
338 # APPLY INDEXES IF NEEDED
339 sqlQuery = ""
340 for index in ["htm10ID", "htm13ID", "htm16ID"]:
341 log.debug('adding %(index)s index to %(tableName)s' % locals())
342 iname = "idx_" + index
343 asqlQuery = u"""
344 SELECT COUNT(1) IndexIsThere FROM INFORMATION_SCHEMA.STATISTICS
345 WHERE table_schema=DATABASE() AND table_name='%(tableName)s' AND index_name='%(iname)s';
346 """ % locals()
347 count = readquery(
348 log=log,
349 sqlQuery=asqlQuery,
350 dbConn=dbConn
351 )[0]["IndexIsThere"]
353 if count == 0:
354 if not len(sqlQuery):
355 sqlQuery += u"""
356 ALTER TABLE %(tableName)s ADD INDEX `%(iname)s` (`%(index)s` ASC)
357 """ % locals()
358 else:
359 sqlQuery += u""", ADD INDEX `%(iname)s` (`%(index)s` ASC)""" % locals()
360 if len(sqlQuery):
361 writequery(
362 log=log,
363 sqlQuery=sqlQuery + ";",
364 dbConn=dbConn,
365 )
366 log.debug('finished adding indexes to %(tableName)s' % locals())
368 if reindex:
369 print("Re-enabling keys within the '%(tableName)s' table" % locals())
370 sqlQuery = """ALTER TABLE `%(tableName)s` enable keys""" % locals()
371 writequery(
372 log=log,
373 sqlQuery=sqlQuery,
374 dbConn=dbConn
375 )
377 print("All HTMIds added to %(tableName)s" % locals())
379 log.debug('completed the ``add_htm_ids_to_mysql_database_table`` function')
380 return None