Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1#!/usr/local/bin/python 

2# encoding: utf-8 

3""" 

4*Given a database connection, a name of a table and the column names for RA and DEC, generates ID for one or more HTM level in the table* 

5 

6:Author: 

7 David Young 

8""" 

9from __future__ import print_function 

10from __future__ import division 

11from builtins import zip 

12from builtins import str 

13from builtins import range 

14from past.utils import old_div 

15import sys 

16import os 

17import math 

18os.environ['TERM'] = 'vt100' 

19from fundamentals import tools 

20import pymysql as ms 

21from fundamentals.mysql import readquery, writequery, insert_list_of_dictionaries_into_database_tables 

22from datetime import datetime, date 

23from fundamentals import times 

24import time 

25 

26def add_htm_ids_to_mysql_database_table( 

27 raColName, 

28 declColName, 

29 tableName, 

30 dbConn, 

31 log, 

32 primaryIdColumnName="primaryId", 

33 cartesian=False, 

34 batchSize=50000, 

35 reindex=False, 

36 dbSettings=False): 

37 """*Given a database connection, a name of a table and the column names for RA and DEC, generates ID for one or more HTM level in the table* 

38 

39 **Key Arguments** 

40 

41 - ``raColName`` -- ra in sexegesimal 

42 - ``declColName`` -- dec in sexegesimal 

43 - ``tableName`` -- name of table to add htmid info to 

44 - ``dbConn`` -- database hosting the above table 

45 - ``log`` -- logger 

46 - ``primaryIdColumnName`` -- the primary id for the table 

47 - ``cartesian`` -- add cartesian columns. Default *False* 

48 - ``batchSize`` -- the size of the batches of rows to add HTMIds to concurrently. Default *2500* 

49 - ``reindex`` -- reindex the entire table 

50 - ``dbSettings`` -- yaml settings for database 

51  

52 

53 **Return** 

54 

55 - None 

56  

57 

58 **Usage** 

59 

60 ```python 

61 from HMpTy.mysql import add_htm_ids_to_mysql_database_table 

62 add_htm_ids_to_mysql_database_table( 

63 raColName="raDeg", 

64 declColName="decDeg", 

65 tableName="my_big_star_table", 

66 dbConn=dbConn, 

67 log=log, 

68 primaryIdColumnName="primaryId", 

69 reindex=False 

70 ) 

71 ``` 

72  

73 """ 

74 log.debug('starting the ``add_htm_ids_to_mysql_database_table`` function') 

75 

76 # TEST TABLE EXIST 

77 sqlQuery = """show tables""" 

78 rows = readquery( 

79 log=log, 

80 sqlQuery=sqlQuery, 

81 dbConn=dbConn 

82 ) 

83 

84 log.debug( 

85 """Checking the table %(tableName)s exists in the database""" % locals()) 

86 tableList = [] 

87 for row in rows: 

88 tableList.append(list(row.values())[0].lower()) 

89 if tableName.lower() not in tableList: 

90 message = "The %s table does not exist in the database" % (tableName,) 

91 log.critical(message) 

92 raise IOError(message) 

93 

94 log.debug( 

95 """Checking the RA and DEC columns exist in the %(tableName)s table""" % locals()) 

96 # TEST COLUMNS EXISTS 

97 cursor = dbConn.cursor(ms.cursors.DictCursor) 

98 sqlQuery = """SELECT * FROM %s LIMIT 1""" % (tableName,) 

99 cursor.execute(sqlQuery) 

100 rows = cursor.fetchall() 

101 desc = cursor.description 

102 existingColumns = [] 

103 for i in range(len(desc)): 

104 existingColumns.append(desc[i][0]) 

105 if (raColName not in existingColumns) or (declColName not in existingColumns): 

106 message = 'Please make sure you have got the naes of the RA and DEC columns correct' 

107 log.critical(message) 

108 raise IOError(message) 

109 

110 if cartesian: 

111 # ACTION(S) ## 

112 htmCols = { 

113 'htm16ID': 'BIGINT(20)', 

114 'htm13ID': 'INT', 

115 'htm10ID': 'INT', 

116 'cx': 'DOUBLE', 

117 'cy': 'DOUBLE', 

118 'cz': 'DOUBLE' 

119 } 

120 else: 

121 htmCols = { 

122 'htm16ID': 'BIGINT(20)', 

123 'htm13ID': 'INT', 

124 'htm10ID': 'INT' 

125 } 

126 

127 # CHECK IF COLUMNS EXISTS YET - IF NOT CREATE FROM 

128 for key in list(htmCols.keys()): 

129 try: 

130 log.debug( 

131 'attempting to check and generate the HTMId columns for the %s db table' % 

132 (tableName, )) 

133 colExists = \ 

134 """SELECT * 

135 FROM information_schema.COLUMNS 

136 WHERE TABLE_SCHEMA=DATABASE() 

137 AND COLUMN_NAME='%s' 

138 AND TABLE_NAME='%s'""" \ 

139 % (key, tableName) 

140 colExists = readquery( 

141 log=log, 

142 sqlQuery=colExists, 

143 dbConn=dbConn 

144 ) 

145 switch = 0 

146 if not colExists: 

147 if switch == 0: 

148 print("Adding the HTMCircle columns to %(tableName)s" % locals()) 

149 switch = 1 

150 sqlQuery = 'ALTER TABLE ' + tableName + ' ADD ' + \ 

151 key + ' ' + htmCols[key] + ' DEFAULT NULL' 

152 writequery( 

153 log=log, 

154 sqlQuery=sqlQuery, 

155 dbConn=dbConn, 

156 ) 

157 except Exception as e: 

158 log.critical('could not check and generate the HTMId columns for the %s db table - failed with this error: %s ' 

159 % (tableName, str(e))) 

160 raise e 

161 

162 log.debug( 

163 """Counting the number of rows still requiring HTMID information""" % locals()) 

164 if reindex: 

165 sqlQuery = u""" 

166 SELECT INDEX_NAME FROM INFORMATION_SCHEMA.STATISTICS 

167 WHERE table_schema=DATABASE() AND table_name='%(tableName)s' and COLUMN_NAME = "%(primaryIdColumnName)s"; 

168 """ % locals() 

169 keyname = readquery( 

170 log=log, 

171 sqlQuery=sqlQuery, 

172 dbConn=dbConn 

173 )[0]["INDEX_NAME"] 

174 if keyname != "PRIMARY": 

175 log.error('To reindex the entire table the primaryID you submit must be unique. "%(primaryIdColumnName)s" is not unique in table "%(tableName)s"' % locals()) 

176 return 

177 

178 sqlQuery = """ALTER TABLE `%(tableName)s` disable keys""" % locals() 

179 writequery( 

180 log=log, 

181 sqlQuery=sqlQuery, 

182 dbConn=dbConn 

183 ) 

184 

185 sqlQuery = """SELECT count(*) as count from `%(tableName)s`""" % locals( 

186 ) 

187 elif cartesian: 

188 # COUNT ROWS WHERE HTMIDs ARE NOT SET 

189 sqlQuery = """SELECT count(*) as count from `%(tableName)s` where htm10ID is NULL or cx is null and %(raColName)s is not null""" % locals( 

190 ) 

191 else: 

192 # COUNT ROWS WHERE HTMIDs ARE NOT SET 

193 sqlQuery = """SELECT count(*) as count from `%(tableName)s` where htm10ID is NULL and %(raColName)s is not null""" % locals( 

194 ) 

195 log.debug( 

196 """SQLQUERY:\n\n%(sqlQuery)s\n\n""" % locals()) 

197 rowCount = readquery( 

198 log=log, 

199 sqlQuery=sqlQuery, 

200 dbConn=dbConn, 

201 quiet=False 

202 ) 

203 totalCount = rowCount[0]["count"] 

204 

205 # ADD HTMIDs IN BATCHES 

206 total = totalCount 

207 batches = int(old_div(total, batchSize)) 

208 

209 count = 0 

210 lastId = False 

211 # NOW GENERATE THE HTMLIds FOR THESE ROWS 

212 for i in range(batches + 1): 

213 if total == 0: 

214 continue 

215 count += batchSize 

216 if count > batchSize: 

217 # Cursor up one line and clear line 

218 sys.stdout.write("\x1b[1A\x1b[2K") 

219 if count > totalCount: 

220 count = totalCount 

221 

222 start = time.time() 

223 

224 log.debug( 

225 """Selecting the next %(batchSize)s rows requiring HTMID information in the %(tableName)s table""" % locals()) 

226 if reindex: 

227 # SELECT THE ROWS WHERE THE HTMIds ARE NOT SET 

228 if lastId: 

229 sqlQuery = """SELECT `%s`, `%s`, `%s` from `%s` where `%s` > '%s' order by `%s` limit %s""" % ( 

230 primaryIdColumnName, raColName, declColName, tableName, primaryIdColumnName, lastId, primaryIdColumnName, batchSize) 

231 else: 

232 sqlQuery = """SELECT `%s`, `%s`, `%s` from `%s` order by `%s` limit %s""" % ( 

233 primaryIdColumnName, raColName, declColName, tableName, primaryIdColumnName, batchSize) 

234 elif cartesian: 

235 # SELECT THE ROWS WHERE THE HTMIds ARE NOT SET 

236 sqlQuery = """SELECT `%s`, `%s`, `%s` from `%s` where `%s` is not null and `%s` >= 0 and ((htm10ID is NULL or cx is null)) limit %s""" % ( 

237 primaryIdColumnName, raColName, declColName, tableName, raColName, raColName, batchSize) 

238 else: 

239 # SELECT THE ROWS WHERE THE HTMIds ARE NOT SET 

240 sqlQuery = """SELECT `%s`, `%s`, `%s` from `%s` where `%s` is not null and `%s` >= 0 and htm10ID is NULL limit %s""" % ( 

241 primaryIdColumnName, raColName, declColName, tableName, raColName, raColName, batchSize) 

242 batch = readquery( 

243 log=log, 

244 sqlQuery=sqlQuery, 

245 dbConn=dbConn 

246 ) 

247 if reindex and len(batch): 

248 lastId = batch[-1][primaryIdColumnName] 

249 log.debug( 

250 """The next %(batchSize)s rows requiring HTMID information have now been selected""" % locals()) 

251 

252 raList = [] 

253 decList = [] 

254 pIdList = [] 

255 raList[:] = [r[raColName] for r in batch] 

256 decList[:] = [r[declColName] for r in batch] 

257 pIdList[:] = [r[primaryIdColumnName] for r in batch] 

258 

259 from HMpTy import htm 

260 mesh16 = htm.HTM(16) 

261 mesh13 = htm.HTM(13) 

262 mesh10 = htm.HTM(10) 

263 

264 log.debug( 

265 'calculating htmIds for batch of %s rows in %s db table' % (batchSize, tableName, )) 

266 htm16Ids = mesh16.lookup_id(raList, decList) 

267 htm13Ids = mesh13.lookup_id(raList, decList) 

268 htm10Ids = mesh10.lookup_id(raList, decList) 

269 log.debug( 

270 'finshed calculating htmIds for batch of %s rows in %s db table' % (batchSize, tableName, )) 

271 

272 if cartesian: 

273 log.debug( 

274 'calculating cartesian coordinates for batch of %s rows in %s db table' % (batchSize, tableName, )) 

275 cx = [] 

276 cy = [] 

277 cz = [] 

278 for r, d in zip(raList, decList): 

279 r = math.radians(r) 

280 d = math.radians(d) 

281 cos_dec = math.cos(d) 

282 cx.append(math.cos(r) * cos_dec) 

283 cy.append(math.sin(r) * cos_dec) 

284 cz.append(math.sin(d)) 

285 

286 updates = [] 

287 updates[:] = [{"htm16ID": int(h16), "htm13ID": int(h13), "htm10ID": int(h10), primaryIdColumnName: pid, "cx": float(ccx), "cy": float(ccy), "cz": float(ccz)} for h16, 

288 h13, h10, pid, ccx, ccy, ccz in zip(htm16Ids, htm13Ids, htm10Ids, pIdList, cx, cy, cz)] 

289 

290 log.debug( 

291 'finished calculating cartesian coordinates for batch of %s rows in %s db table' % ( 

292 batchSize, tableName, )) 

293 else: 

294 log.debug('building the sqlquery') 

295 updates = [] 

296 # updates[:] = ["UPDATE `%(tableName)s` SET htm16ID=%(h16)s, htm13ID=%(h13)s, htm10ID=%(h10)s where %(primaryIdColumnName)s = '%(pid)s';" % locals() for h16, 

297 # h13, h10, pid in zip(htm16Ids, htm13Ids, htm10Ids, pIdList)] 

298 updates[:] = [{"htm16ID": int(h16), "htm13ID": int(h13), "htm10ID": int(h10), primaryIdColumnName: pid} for h16, 

299 h13, h10, pid in zip(htm16Ids, htm13Ids, htm10Ids, pIdList)] 

300 log.debug('finshed building the sqlquery') 

301 

302 if len(updates): 

303 log.debug( 

304 'starting to update the HTMIds for new objects in the %s db table' % (tableName, )) 

305 

306 # USE dbSettings & dbConn TO ACTIVATE MULTIPROCESSING 

307 insert_list_of_dictionaries_into_database_tables( 

308 dbConn=dbConn, 

309 log=log, 

310 dictList=updates, 

311 dbTableName=tableName, 

312 uniqueKeyList=[], 

313 dateModified=False, 

314 batchSize=20000, 

315 replace=True, 

316 dbSettings=dbSettings, 

317 dateCreated=False 

318 ) 

319 

320 # writequery( 

321 # log=log, 

322 # sqlQuery=sqlQuery, 

323 # dbConn=dbConn, 

324 # ) 

325 log.debug( 

326 'finished updating the HTMIds for new objects in the %s db table' % (tableName, )) 

327 else: 

328 log.debug( 

329 'no HTMIds to add to the %s db table' % (tableName, )) 

330 

331 percent = float(count) * 100. / float(totalCount) 

332 print("%(count)s / %(totalCount)s htmIds added to %(tableName)s (%(percent)0.5f%% complete)" % locals()) 

333 end = time.time() 

334 timediff = end - start 

335 timediff = timediff * 1000000. / float(batchSize) 

336 print("Update speed: %(timediff)0.2fs/1e6 rows\n" % locals()) 

337 

338 # APPLY INDEXES IF NEEDED 

339 sqlQuery = "" 

340 for index in ["htm10ID", "htm13ID", "htm16ID"]: 

341 log.debug('adding %(index)s index to %(tableName)s' % locals()) 

342 iname = "idx_" + index 

343 asqlQuery = u""" 

344 SELECT COUNT(1) IndexIsThere FROM INFORMATION_SCHEMA.STATISTICS 

345 WHERE table_schema=DATABASE() AND table_name='%(tableName)s' AND index_name='%(iname)s'; 

346 """ % locals() 

347 count = readquery( 

348 log=log, 

349 sqlQuery=asqlQuery, 

350 dbConn=dbConn 

351 )[0]["IndexIsThere"] 

352 

353 if count == 0: 

354 if not len(sqlQuery): 

355 sqlQuery += u""" 

356 ALTER TABLE %(tableName)s ADD INDEX `%(iname)s` (`%(index)s` ASC) 

357 """ % locals() 

358 else: 

359 sqlQuery += u""", ADD INDEX `%(iname)s` (`%(index)s` ASC)""" % locals() 

360 if len(sqlQuery): 

361 writequery( 

362 log=log, 

363 sqlQuery=sqlQuery + ";", 

364 dbConn=dbConn, 

365 ) 

366 log.debug('finished adding indexes to %(tableName)s' % locals()) 

367 

368 if reindex: 

369 print("Re-enabling keys within the '%(tableName)s' table" % locals()) 

370 sqlQuery = """ALTER TABLE `%(tableName)s` enable keys""" % locals() 

371 writequery( 

372 log=log, 

373 sqlQuery=sqlQuery, 

374 dbConn=dbConn 

375 ) 

376 

377 print("All HTMIds added to %(tableName)s" % locals()) 

378 

379 log.debug('completed the ``add_htm_ids_to_mysql_database_table`` function') 

380 return None