Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1#!/usr/local/bin/python 

2# encoding: utf-8 

3""" 

4*The base importer for sherlock catalogue imports* 

5 

6:Author: 

7 David Young 

8 

9.. todo :: 

10 

11 - document this module 

12""" 

13from __future__ import print_function 

14from builtins import str 

15from builtins import object 

16import sys 

17import os 

18os.environ['TERM'] = 'vt100' 

19import readline 

20import glob 

21import pickle 

22import codecs 

23import re 

24import string 

25from sherlock.database_cleaner import database_cleaner 

26from datetime import datetime, date, time 

27from docopt import docopt 

28from fundamentals.mysql import insert_list_of_dictionaries_into_database_tables, directory_script_runner, writequery 

29from fundamentals.renderer import list_of_dictionaries 

30from HMpTy.mysql import add_htm_ids_to_mysql_database_table 

31 

32class _base_importer(object): 

33 """ 

34 *The base importer object used to import new catalgues into sherlock-catalogues database* 

35 

36 **Key Arguments** 

37 

38 - ``log`` -- logger 

39 - ``settings`` -- the settings dictionary 

40 - ``pathToDataFIle`` -- path to the file containing the data to import 

41 - ``version`` -- version number of the catalogue to be imported (e.g. DR12) 

42 - ``catalogueName`` -- name of the catalogue to be imported 

43 - ``coordinateList`` -- list of coordinates (needed for some streamed tables) 

44 - ``radiusArcsec`` -- the radius in arcsec with which to perform the initial NED conesearch. Default *False* 

45  

46 

47 **Usage** 

48 

49 To use this base class to write a new importer, create your class like so: 

50 

51 ```python 

52 class newImporter(_base_importer): 

53 ... 

54 ``` 

55  

56 """ 

57 # INITIALISATION 

58 

59 def __init__( 

60 self, 

61 log, 

62 settings=False, 

63 pathToDataFile=False, 

64 version=False, 

65 catalogueName="", 

66 coordinateList=[], 

67 radiusArcsec=False 

68 ): 

69 self.log = log 

70 log.debug("instansiating a new '_base_importer' object") 

71 self.settings = settings 

72 self.pathToDataFile = pathToDataFile 

73 self.version = version 

74 self.catalogueName = catalogueName 

75 self.coordinateList = coordinateList 

76 self.radiusArcsec = radiusArcsec 

77 self.myPid = str(os.getpid()) 

78 # xt-self-arg-tmpx 

79 

80 # INITIAL ACTIONS 

81 # SETUP DATABASE CONNECTIONS 

82 # SETUP ALL DATABASE CONNECTIONS 

83 from sherlock import database 

84 db = database( 

85 log=self.log, 

86 settings=self.settings 

87 ) 

88 dbConns, dbVersions = db.connect() 

89 self.transientsDbConn = dbConns["transients"] 

90 self.cataloguesDbConn = dbConns["catalogues"] 

91 

92 # OPEN THE FILE TO IMPORT THE DATA FROM 

93 if pathToDataFile: 

94 pathToReadFile = pathToDataFile 

95 try: 

96 self.log.debug("attempting to open the file %s" % 

97 (pathToReadFile,)) 

98 readFile = codecs.open(pathToReadFile, mode='r') 

99 self.catData = readFile.read() 

100 readFile.close() 

101 except IOError as e: 

102 message = 'could not open the file %s' % (pathToReadFile,) 

103 self.log.critical(message) 

104 raise IOError(message) 

105 readFile.close() 

106 else: 

107 self.catData = None 

108 

109 # GET THE VERSION TO APPEND TO THE DATABASE TABLE NAME FOR THE 

110 # CATALOGUE 

111 if self.version: 

112 self.version = "_v" + \ 

113 self.version.replace(" ", "").replace( 

114 "v", "").replace(".", "_") 

115 else: 

116 self.version = "" 

117 version = self.version 

118 

119 # BUILD THE DATABASE TABLE NAME 

120 self.dbTableName = "tcs_cat_%(catalogueName)s%(version)s" % locals() 

121 

122 # SOME DEFAULT OBJECT ATTRIBUTES THAT CAN BE SUPERSEDED 

123 self.primaryIdColumnName = "primaryId" 

124 self.databaseInsertbatchSize = 2500 

125 self.raColName = "raDeg" 

126 self.declColName = "decDeg" 

127 self.uniqueKeyList = [self.raColName, "decDeg"] 

128 

129 # DATETIME REGEX - EXPENSIVE OPERATION, LET"S JUST DO IT ONCE 

130 self.reDatetime = re.compile('^[0-9]{4}-[0-9]{2}-[0-9]{2}T') 

131 

132 return None 

133 

134 def add_data_to_database_table( 

135 self, 

136 dictList, 

137 createStatement=False): 

138 """*Import data in the list of dictionaries in the requested database table* 

139 

140 Also adds HTMIDs and updates the sherlock-catalogue database helper table with the time-stamp of when the imported catlogue was last updated 

141 

142 **Key Arguments** 

143 

144 - ``dictList`` - a list of dictionaries containing all the rows in the catalogue to be imported 

145 - ``createStatement`` - the table's mysql create statement (used to generate table if it does not yet exist in database). Default *False* 

146  

147 

148 **Usage** 

149 

150 ```python 

151 self.add_data_to_database_table( 

152 dictList=dictList, 

153 createStatement=createStatement 

154 ) 

155 ``` 

156  

157 

158 .. todo :: 

159 

160 - Write a checklist for creating a new sherlock database importer 

161 """ 

162 self.log.debug('starting the ``add_data_to_database_table`` method') 

163 

164 if len(dictList) == 0: 

165 return 

166 

167 myPid = self.myPid 

168 dbTableName = self.dbTableName 

169 

170 if createStatement: 

171 writequery( 

172 log=self.log, 

173 sqlQuery=createStatement, 

174 dbConn=self.cataloguesDbConn, 

175 ) 

176 

177 insert_list_of_dictionaries_into_database_tables( 

178 dbConn=self.cataloguesDbConn, 

179 log=self.log, 

180 dictList=dictList, 

181 dbTableName=dbTableName, 

182 uniqueKeyList=[], 

183 dateModified=True, 

184 dateCreated=True, 

185 batchSize=10000, 

186 replace=True, 

187 dbSettings=self.settings["database settings"][ 

188 "static catalogues"] 

189 ) 

190 

191 self._add_htmids_to_database_table() 

192 

193 cleaner = database_cleaner( 

194 log=self.log, 

195 settings=self.settings 

196 ) 

197 cleaner._update_tcs_helper_catalogue_tables_info_with_new_tables() 

198 

199 self._update_database_helper_table() 

200 

201 print("""Now: 

202 

203- [ ] edit the `%(dbTableName)s` row in the sherlock catalogues database adding relevant column mappings, catalogue version number etc 

204- [ ] retire any previous version of this catlogue in the database. Renaming the catalogue-table by appending `legacy_` and also change the name in the `tcs_helper_catalogue_tables_info` table 

205- [ ] dupliate views from the previous catalogue version to point towards the new version and then delete the old views 

206- [ ] run the command `sherlock clean [-s <pathToSettingsFile>]` to clean up helper tables 

207- [ ] switch out the old catalogue table/views in your sherlock search algorithms in the yaml settings files 

208- [ ] run a test batch of transients to make sure catalogue is installed as expected 

209 

210""" % locals()) 

211 

212 self.log.debug('completed the ``add_data_to_database_table`` method') 

213 return None 

214 

215 def _add_htmids_to_database_table( 

216 self): 

217 """*Add HTMIDs to database table once all the data has been imported (HTM Levels 10,13,16)* 

218 

219 **Usage** 

220 

221 ```python 

222 self._add_htmids_to_database_table() 

223 ``` 

224  

225 """ 

226 self.log.debug('starting the ``add_htmids_to_database_table`` method') 

227 

228 tableName = self.dbTableName 

229 

230 self.log.info("Adding HTMIds to %(tableName)s" % locals()) 

231 

232 add_htm_ids_to_mysql_database_table( 

233 raColName=self.raColName, 

234 declColName=self.declColName, 

235 tableName=self.dbTableName, 

236 dbConn=self.cataloguesDbConn, 

237 log=self.log, 

238 primaryIdColumnName=self.primaryIdColumnName, 

239 dbSettings=self.settings["database settings"]["static catalogues"] 

240 ) 

241 

242 self.log.debug('completed the ``add_htmids_to_database_table`` method') 

243 return None 

244 

245 def _update_database_helper_table( 

246 self): 

247 """*Update the sherlock catalogues database helper table with the time-stamp of when this catlogue was last updated* 

248 

249 **Usage** 

250 

251 ```python 

252 self._update_database_helper_table() 

253 ``` 

254  

255 """ 

256 self.log.debug('starting the ``_update_database_helper_table`` method') 

257 

258 tableName = self.dbTableName 

259 

260 sqlQuery = u""" 

261 update tcs_helper_catalogue_tables_info set last_updated = now() where table_name = "%(tableName)s"; 

262 """ % locals() 

263 

264 writequery( 

265 log=self.log, 

266 sqlQuery=sqlQuery, 

267 dbConn=self.cataloguesDbConn, 

268 ) 

269 

270 self.log.debug( 

271 'completed the ``_update_database_helper_table`` method') 

272 return None 

273 

274 # use the tab-trigger below for new method 

275 # xt-class-method