Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1#!/usr/local/bin/python 

2# encoding: utf-8 

3""" 

4*Import ned stream into sherlock-catalogues database* 

5 

6:Author: 

7 David Young 

8""" 

9from __future__ import print_function 

10from __future__ import division 

11from past.utils import old_div 

12import sys 

13import os 

14os.environ['TERM'] = 'vt100' 

15import readline 

16import glob 

17import pickle 

18import codecs 

19import string 

20import re 

21from datetime import datetime, date, time 

22from docopt import docopt 

23from neddy import namesearch, conesearch 

24from HMpTy.mysql import add_htm_ids_to_mysql_database_table 

25from fundamentals.mysql import insert_list_of_dictionaries_into_database_tables 

26from astrocalc.coords import unit_conversion 

27from fundamentals.renderer import list_of_dictionaries 

28from fundamentals.mysql import directory_script_runner, readquery, writequery 

29from ._base_importer import _base_importer 

30 

31 

32class ned(_base_importer): 

33 """ 

34 *Using a list of coordinates, query the online* `NED <https://ned.ipac.caltech.edu/>`_ *database and import sources found within a given search radius of each of the loctions into the sherlock-catalogues database* 

35 

36 The code: 

37 

38 1. Uses the list of transient coordinates and queries NED (conesearch) for the results within the given search radius 

39 2. Creates the `tcs_cat_ned_stream` table if it doesn't exist 

40 3. Adds the resulting matched NED IDs/Names to the `tcs_cat_ned_stream` table 

41 4. Updates the NED query history table 

42 5. Queris NED via NED IDs (object search) for the remaining source metadata to be added to the `tcs_cat_ned_stream` table 

43 

44 Note it's up to the user to filter the input coordinate list by checking whether or not the same area of the sky has been imported into the `tcs_cat_ned_stream` table recently (by checking the `tcs_helper_ned_query_history` table) 

45 

46 **Key Arguments** 

47 

48 - ``dbConn`` -- mysql database connection 

49 - ``log`` -- logger 

50 - ``settings`` -- the settings dictionary 

51 - ``coordinateList`` -- list of coordinates (a list of strings with RA and DEC space separated) 

52 - ``radiusArcsec`` - - the radius in arcsec with which to perform the initial NED conesearch. Default * False* 

53 

54 

55 **Usage** 

56 

57 To import the ned catalogue stream, run the following: 

58 

59 

60 ```python 

61 from sherlock.imports import ned 

62 ``` 

63 

64 stream = ned( 

65 log=log, 

66 settings=settings, 

67 coordinateList=["23.12323 -12.34343","345.43234 45.26789"], 

68 radiusArcsec=180 

69 ) 

70 stream.ingest() 

71 

72 .. todo :: 

73 

74 - test this code is still working after changes 

75 - add option to filter coordinate list via the `tcs_helper_ned_query_history` table 

76 - check sublime snippet exists 

77 - clip any useful text to docs mindmap 

78 """ 

79 # INITIALISATION 

80 

81 def ingest(self): 

82 """*Perform conesearches of the online NED database and import the results into a the sherlock-database* 

83 

84 The code: 

85 

86 1. uses the list of transient coordinates and queries NED for the results within the given search radius 

87 2. Creates the `tcs_cat_ned_stream` table if it doesn't exist 

88 3. Adds the resulting NED IDs/Names to the `tcs_cat_ned_stream` table 

89 4. Updates the NED query history table 

90 5. Queris NED via NED IDs for the remaining source metadata to be added to the `tcs_cat_ned_stream` table 

91 

92 **Usage** 

93 

94 Having setup the NED object with a coordinate list and cone-search radius, run the `ingest()` method 

95 

96 ```python 

97 stream.ingest() 

98 ``` 

99 

100 

101 .. todo :: 

102 

103 - check sublime snippet exists 

104 - clip any useful text to docs mindmap 

105 - regenerate the docs and check redendering of this docstring 

106 """ 

107 self.log.debug('starting the ``ingest`` method') 

108 

109 if not self.radiusArcsec: 

110 self.log.error( 

111 'please give a radius in arcsec with which to preform the initial NED conesearch' % locals()) 

112 sys.exit(0) 

113 

114 # VARIABLES 

115 # SIZE OF NUMBER OF ROWS TO INSERT INTO DATABASE TABLE AT ANY ONE GO 

116 self.databaseInsertbatchSize = 10000 

117 

118 # THE DATABASE TABLE TO STREAM THE NED DATA INTO 

119 self.dbTableName = "tcs_cat_ned_stream" 

120 

121 dictList = self._create_dictionary_of_ned() 

122 

123 tableName = self.dbTableName 

124 

125 createStatement = """CREATE TABLE IF NOT EXISTS `%(tableName)s` ( 

126 `primaryId` bigint(20) NOT NULL AUTO_INCREMENT COMMENT 'An internal counter', 

127 `ned_name` varchar(150) NOT NULL, 

128 `redshift` double DEFAULT NULL, 

129 `dateCreated` datetime DEFAULT CURRENT_TIMESTAMP, 

130 `dateLastModified` datetime DEFAULT CURRENT_TIMESTAMP, 

131 `updated` varchar(45) DEFAULT '0', 

132 `major_diameter_arcmin` double DEFAULT NULL, 

133 `ned_notes` varchar(700) DEFAULT NULL, 

134 `object_type` varchar(100) DEFAULT NULL, 

135 `redshift_err` double DEFAULT NULL, 

136 `redshift_quality` varchar(100) DEFAULT NULL, 

137 `magnitude_filter` varchar(10) DEFAULT NULL, 

138 `minor_diameter_arcmin` double DEFAULT NULL, 

139 `morphology` varchar(50) DEFAULT NULL, 

140 `hierarchy` varchar(50) DEFAULT NULL, 

141 `galaxy_morphology` varchar(50) DEFAULT NULL, 

142 `radio_morphology` varchar(50) DEFAULT NULL, 

143 `activity_type` varchar(50) DEFAULT NULL, 

144 `raDeg` double DEFAULT NULL, 

145 `decDeg` double DEFAULT NULL, 

146 `eb_v` double DEFAULT NULL, 

147 `htm16ID` bigint(20) DEFAULT NULL, 

148 `download_error` tinyint(1) DEFAULT '0', 

149 `htm10ID` bigint(20) DEFAULT NULL, 

150 `htm13ID` bigint(20) DEFAULT NULL, 

151 PRIMARY KEY (`primaryId`), 

152 UNIQUE KEY `ned_name` (`ned_name`), 

153 KEY `idx_htm16ID` (`htm16ID`), 

154 KEY `raDeg` (`raDeg`), 

155 KEY `downloadError` (`download_error`), 

156 KEY `idx_htm10ID` (`htm10ID`), 

157 KEY `idx_htm13ID` (`htm13ID`) 

158) ENGINE=MyISAM AUTO_INCREMENT=0 DEFAULT CHARSET=latin1; 

159""" % locals() 

160 

161 self.add_data_to_database_table( 

162 dictList=dictList, 

163 createStatement=createStatement 

164 ) 

165 

166 self._update_ned_query_history() 

167 self._download_ned_source_metadata() 

168 

169 self.log.debug('completed the ``ingest`` method') 

170 return None 

171 

172 def _create_dictionary_of_ned( 

173 self): 

174 """*Create a list of dictionaries containing all the object ids (NED names) in the ned stream* 

175 

176 **Return** 

177 

178 - ``dictList`` - a list of dictionaries containing all the object ids (NED names) in the ned stream 

179 

180 

181 **Usage** 

182 

183 ```python 

184 dictList = stream._create_dictionary_of_ned() 

185 ``` 

186 

187 """ 

188 self.log.debug( 

189 'starting the ``_create_dictionary_of_ned`` method') 

190 

191 # GET THE NAMES (UNIQUE IDS) OF THE SOURCES WITHIN THE CONESEARCH FROM 

192 # NED 

193 names, searchParams = conesearch( 

194 log=self.log, 

195 radiusArcsec=self.radiusArcsec, 

196 nearestOnly=False, 

197 unclassified=True, 

198 quiet=False, 

199 listOfCoordinates=self.coordinateList, 

200 outputFilePath=False, 

201 verbose=False 

202 ).get_crossmatch_names() 

203 

204 dictList = [] 

205 dictList[:] = [{"ned_name": n} for n in names] 

206 

207 self.log.debug( 

208 'completed the ``_create_dictionary_of_ned`` method') 

209 return dictList 

210 

211 def _update_ned_query_history( 

212 self): 

213 """*Update the database helper table to give details of the ned cone searches performed* 

214 

215 *Usage:* 

216 

217 ```python 

218 stream._update_ned_query_history() 

219 ``` 

220 """ 

221 self.log.debug('starting the ``_update_ned_query_history`` method') 

222 

223 myPid = self.myPid 

224 

225 # ASTROCALC UNIT CONVERTER OBJECT 

226 converter = unit_conversion( 

227 log=self.log 

228 ) 

229 

230 # UPDATE THE DATABASE HELPER TABLE TO GIVE DETAILS OF THE NED CONE 

231 # SEARCHES PERFORMED 

232 dataList = [] 

233 for i, coord in enumerate(self.coordinateList): 

234 if isinstance(coord, ("".__class__, u"".__class__)): 

235 ra = coord.split(" ")[0] 

236 dec = coord.split(" ")[1] 

237 elif isinstance(coord, tuple) or isinstance(coord, list): 

238 ra = coord[0] 

239 dec = coord[1] 

240 

241 dataList.append( 

242 {"raDeg": ra, 

243 "decDeg": dec, 

244 "arcsecRadius": self.radiusArcsec} 

245 ) 

246 

247 if len(dataList) == 0: 

248 return None 

249 

250 # CREATE TABLE IF NOT EXIST 

251 createStatement = """CREATE TABLE IF NOT EXISTS `tcs_helper_ned_query_history` ( 

252 `primaryId` bigint(20) NOT NULL AUTO_INCREMENT, 

253 `raDeg` double DEFAULT NULL, 

254 `decDeg` double DEFAULT NULL, 

255 `dateCreated` datetime DEFAULT CURRENT_TIMESTAMP, 

256 `dateLastModified` datetime DEFAULT CURRENT_TIMESTAMP, 

257 `updated` varchar(45) DEFAULT '0', 

258 `arcsecRadius` int(11) DEFAULT NULL, 

259 `dateQueried` datetime DEFAULT CURRENT_TIMESTAMP, 

260 `htm16ID` bigint(20) DEFAULT NULL, 

261 `htm13ID` int(11) DEFAULT NULL, 

262 `htm10ID` int(11) DEFAULT NULL, 

263 PRIMARY KEY (`primaryId`), 

264 KEY `idx_htm16ID` (`htm16ID`), 

265 KEY `dateQueried` (`dateQueried`), 

266 KEY `dateHtm16` (`dateQueried`,`htm16ID`), 

267 KEY `idx_htm10ID` (`htm10ID`), 

268 KEY `idx_htm13ID` (`htm13ID`) 

269) ENGINE=MyISAM AUTO_INCREMENT=0 DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; 

270 """ 

271 writequery( 

272 log=self.log, 

273 sqlQuery=createStatement, 

274 dbConn=self.cataloguesDbConn 

275 ) 

276 

277 # USE dbSettings TO ACTIVATE MULTIPROCESSING 

278 insert_list_of_dictionaries_into_database_tables( 

279 dbConn=self.cataloguesDbConn, 

280 log=self.log, 

281 dictList=dataList, 

282 dbTableName="tcs_helper_ned_query_history", 

283 uniqueKeyList=[], 

284 dateModified=True, 

285 batchSize=10000, 

286 replace=True, 

287 dbSettings=self.settings["database settings"][ 

288 "static catalogues"] 

289 ) 

290 

291 # INDEX THE TABLE FOR LATER SEARCHES 

292 add_htm_ids_to_mysql_database_table( 

293 raColName="raDeg", 

294 declColName="decDeg", 

295 tableName="tcs_helper_ned_query_history", 

296 dbConn=self.cataloguesDbConn, 

297 log=self.log, 

298 primaryIdColumnName="primaryId", 

299 dbSettings=self.settings["database settings"]["static catalogues"] 

300 ) 

301 

302 self.log.debug('completed the ``_update_ned_query_history`` method') 

303 return None 

304 

305 def _download_ned_source_metadata( 

306 self): 

307 """*Query NED using the names of the NED sources in our local database to retrieve extra metadata* 

308 

309 *Usage:* 

310 

311 ```python 

312 stream._download_ned_source_metadata() 

313 ``` 

314 """ 

315 self.log.debug('starting the ``_download_ned_source_metadata`` method') 

316 

317 self.dbTableName = "tcs_cat_ned_stream" 

318 

319 total, batches = self._count_ned_sources_in_database_requiring_metadata() 

320 

321 self.log.info( 

322 "%(total)s galaxies require metadata. Need to send %(batches)s batch requests to NED." % locals()) 

323 

324 totalBatches = self.batches 

325 thisCount = 0 

326 

327 # FOR EACH BATCH, GET THE GALAXY IDs, QUERY NED AND UPDATE THE DATABASE 

328 # THEN RECOUNT TO DETERMINE IF THERE ARE REMAINING SOURCES TO GRAB 

329 # METADATA FOR 

330 while self.total: 

331 thisCount += 1 

332 self._get_ned_sources_needing_metadata() 

333 self._do_ned_namesearch_queries_and_add_resulting_metadata_to_database( 

334 thisCount) 

335 self._count_ned_sources_in_database_requiring_metadata() 

336 

337 self.log.debug( 

338 'completed the ``_download_ned_source_metadata`` method') 

339 return None 

340 

341 def _get_ned_sources_needing_metadata( 

342 self): 

343 """*Get the names of 50000 or less NED sources that still require metabase in the database* 

344 

345 **Return** 

346 

347 - ``len(self.theseIds)`` -- the number of NED IDs returned 

348 

349 

350 *Usage:* 

351 

352 ```python 

353 numberSources = stream._get_ned_sources_needing_metadata() 

354 ``` 

355 """ 

356 self.log.debug( 

357 'starting the ``_get_ned_sources_needing_metadata`` method') 

358 

359 tableName = self.dbTableName 

360 

361 # SELECT THE DATA FROM NED TABLE 

362 sqlQuery = u""" 

363 select ned_name from %(tableName)s where raDeg is null and (download_error != 1 or download_error is null) limit 50000; 

364 """ % locals() 

365 rows = readquery( 

366 log=self.log, 

367 sqlQuery=sqlQuery, 

368 dbConn=self.cataloguesDbConn, 

369 quiet=False 

370 ) 

371 

372 self.theseIds = [] 

373 self.theseIds[:] = [r["ned_name"] for r in rows] 

374 

375 self.log.debug( 

376 'completed the ``_get_ned_sources_needing_metadata`` method') 

377 

378 return len(self.theseIds) 

379 

380 def _do_ned_namesearch_queries_and_add_resulting_metadata_to_database( 

381 self, 

382 batchCount): 

383 """*Query NED via name searcha and add result metadata to database* 

384 

385 **Key Arguments** 

386 

387 - ``batchCount`` - the index number of the batch sent to NED (only needed for printing to STDOUT to give user idea of progress) 

388 

389 

390 *Usage:* 

391 

392 ```python 

393 numberSources = stream._do_ned_namesearch_queries_and_add_resulting_metadata_to_database(batchCount=10) 

394 ``` 

395 """ 

396 self.log.debug( 

397 'starting the ``_do_ned_namesearch_queries_and_add_resulting_metadata_to_database`` method') 

398 

399 # ASTROCALC UNIT CONVERTER OBJECT 

400 converter = unit_conversion( 

401 log=self.log 

402 ) 

403 tableName = self.dbTableName 

404 

405 # QUERY NED WITH BATCH 

406 totalCount = len(self.theseIds) 

407 print("requesting metadata from NED for %(totalCount)s galaxies (batch %(batchCount)s)" % locals()) 

408 # QUERY THE ONLINE NED DATABASE USING NEDDY'S NAMESEARCH METHOD 

409 search = namesearch( 

410 log=self.log, 

411 names=self.theseIds, 

412 quiet=True 

413 ) 

414 results = search.get() 

415 print("results returned from ned -- starting to add to database" % locals()) 

416 

417 # CLEAN THE RETURNED DATA AND UPDATE DATABASE 

418 totalCount = len(results) 

419 count = 0 

420 sqlQuery = "" 

421 dictList = [] 

422 for thisDict in results: 

423 thisDict["tableName"] = tableName 

424 count += 1 

425 for k, v in list(thisDict.items()): 

426 if not v or len(v) == 0: 

427 thisDict[k] = "null" 

428 if k in ["major_diameter_arcmin", "minor_diameter_arcmin"] and (":" in v or "?" in v or "<" in v): 

429 thisDict[k] = v.replace(":", "").replace( 

430 "?", "").replace("<", "") 

431 if isinstance(v, ("".__class__, u"".__class__)) and '"' in v: 

432 thisDict[k] = v.replace('"', '\\"') 

433 if "Input name not" not in thisDict["input_note"] and "Same object as" not in thisDict["input_note"]: 

434 try: 

435 thisDict["raDeg"] = converter.ra_sexegesimal_to_decimal( 

436 ra=thisDict["ra"] 

437 ) 

438 thisDict["decDeg"] = converter.dec_sexegesimal_to_decimal( 

439 dec=thisDict["dec"] 

440 ) 

441 except: 

442 name = thisDict["input_name"] 

443 self.log.warning( 

444 "Could not convert the RA & DEC for the %(name)s NED source" % locals()) 

445 continue 

446 thisDict["eb_v"] = thisDict["eb-v"] 

447 thisDict["ned_name"] = thisDict["input_name"] 

448 row = {} 

449 for k in ["redshift_quality", "redshift", "hierarchy", "object_type", "major_diameter_arcmin", "morphology", "magnitude_filter", "ned_notes", "eb_v", "raDeg", "radio_morphology", "activity_type", "minor_diameter_arcmin", "decDeg", "redshift_err", "ned_name"]: 

450 if thisDict[k] == "null": 

451 row[k] = None 

452 else: 

453 row[k] = thisDict[k] 

454 

455 dictList.append(row) 

456 

457 self.add_data_to_database_table( 

458 dictList=dictList, 

459 createStatement="""SET SESSION sql_mode="";""" 

460 ) 

461 

462 theseIds = ("\", \"").join(self.theseIds) 

463 

464 sqlQuery = u""" 

465 update %(tableName)s set download_error = 1 where ned_name in ("%(theseIds)s"); 

466 """ % locals() 

467 writequery( 

468 log=self.log, 

469 sqlQuery=sqlQuery, 

470 dbConn=self.cataloguesDbConn, 

471 ) 

472 

473 print("%(count)s/%(totalCount)s galaxy metadata batch entries added to database" % locals()) 

474 if count < totalCount: 

475 # Cursor up one line and clear line 

476 sys.stdout.write("\x1b[1A\x1b[2K") 

477 

478 sqlQuery = u""" 

479 update tcs_helper_catalogue_tables_info set last_updated = now() where table_name = "%(tableName)s" 

480 """ % locals() 

481 writequery( 

482 log=self.log, 

483 sqlQuery=sqlQuery, 

484 dbConn=self.cataloguesDbConn, 

485 ) 

486 

487 self.log.debug( 

488 'completed the ``_do_ned_namesearch_queries_and_add_resulting_metadata_to_database`` method') 

489 return None 

490 

491 def _count_ned_sources_in_database_requiring_metadata( 

492 self): 

493 """*Count the sources in the NED table requiring metadata* 

494 

495 **Return** 

496 

497 - ``self.total``, ``self.batches`` -- total number of galaxies needing metadata & the number of batches required to be sent to NED 

498 

499 

500 *Usage:* 

501 

502 ```python 

503 totalRemaining, numberOfBatches = stream._count_ned_sources_in_database_requiring_metadata() 

504 ``` 

505 """ 

506 self.log.debug( 

507 'starting the ``_count_ned_sources_in_database_requiring_metadata`` method') 

508 

509 tableName = self.dbTableName 

510 

511 sqlQuery = u""" 

512 select count(*) as count from %(tableName)s where raDeg is null and (download_error != 1 or download_error is null) 

513 """ % locals() 

514 rows = readquery( 

515 log=self.log, 

516 sqlQuery=sqlQuery, 

517 dbConn=self.cataloguesDbConn, 

518 quiet=False 

519 ) 

520 self.total = rows[0]["count"] 

521 self.batches = int(old_div(self.total, 50000.)) + 1 

522 

523 if self.total == 0: 

524 self.batches = 0 

525 

526 self.log.debug( 

527 'completed the ``_count_ned_sources_in_database_requiring_metadata`` method') 

528 return self.total, self.batches 

529 

530 # use the tab-trigger below for new method 

531 # xt-class-method