Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1#!/usr/local/bin/python
2# encoding: utf-8
3"""
4*The base importer for sherlock catalogue imports*
6:Author:
7 David Young
9.. todo ::
11 - document this module
12"""
13from __future__ import print_function
14from builtins import str
15from builtins import object
16import sys
17import os
18os.environ['TERM'] = 'vt100'
19import readline
20import glob
21import pickle
22import codecs
23import re
24import string
25from sherlock.database_cleaner import database_cleaner
26from datetime import datetime, date, time
27from docopt import docopt
28from fundamentals.mysql import insert_list_of_dictionaries_into_database_tables, directory_script_runner, writequery
29from fundamentals.renderer import list_of_dictionaries
30from HMpTy.mysql import add_htm_ids_to_mysql_database_table
32class _base_importer(object):
33 """
34 *The base importer object used to import new catalgues into sherlock-catalogues database*
36 **Key Arguments**
38 - ``log`` -- logger
39 - ``settings`` -- the settings dictionary
40 - ``pathToDataFIle`` -- path to the file containing the data to import
41 - ``version`` -- version number of the catalogue to be imported (e.g. DR12)
42 - ``catalogueName`` -- name of the catalogue to be imported
43 - ``coordinateList`` -- list of coordinates (needed for some streamed tables)
44 - ``radiusArcsec`` -- the radius in arcsec with which to perform the initial NED conesearch. Default *False*
47 **Usage**
49 To use this base class to write a new importer, create your class like so:
51 ```python
52 class newImporter(_base_importer):
53 ...
54 ```
56 """
57 # INITIALISATION
59 def __init__(
60 self,
61 log,
62 settings=False,
63 pathToDataFile=False,
64 version=False,
65 catalogueName="",
66 coordinateList=[],
67 radiusArcsec=False
68 ):
69 self.log = log
70 log.debug("instansiating a new '_base_importer' object")
71 self.settings = settings
72 self.pathToDataFile = pathToDataFile
73 self.version = version
74 self.catalogueName = catalogueName
75 self.coordinateList = coordinateList
76 self.radiusArcsec = radiusArcsec
77 self.myPid = str(os.getpid())
78 # xt-self-arg-tmpx
80 # INITIAL ACTIONS
81 # SETUP DATABASE CONNECTIONS
82 # SETUP ALL DATABASE CONNECTIONS
83 from sherlock import database
84 db = database(
85 log=self.log,
86 settings=self.settings
87 )
88 dbConns, dbVersions = db.connect()
89 self.transientsDbConn = dbConns["transients"]
90 self.cataloguesDbConn = dbConns["catalogues"]
92 # OPEN THE FILE TO IMPORT THE DATA FROM
93 if pathToDataFile:
94 pathToReadFile = pathToDataFile
95 try:
96 self.log.debug("attempting to open the file %s" %
97 (pathToReadFile,))
98 readFile = codecs.open(pathToReadFile, mode='r')
99 self.catData = readFile.read()
100 readFile.close()
101 except IOError as e:
102 message = 'could not open the file %s' % (pathToReadFile,)
103 self.log.critical(message)
104 raise IOError(message)
105 readFile.close()
106 else:
107 self.catData = None
109 # GET THE VERSION TO APPEND TO THE DATABASE TABLE NAME FOR THE
110 # CATALOGUE
111 if self.version:
112 self.version = "_v" + \
113 self.version.replace(" ", "").replace(
114 "v", "").replace(".", "_")
115 else:
116 self.version = ""
117 version = self.version
119 # BUILD THE DATABASE TABLE NAME
120 self.dbTableName = "tcs_cat_%(catalogueName)s%(version)s" % locals()
122 # SOME DEFAULT OBJECT ATTRIBUTES THAT CAN BE SUPERSEDED
123 self.primaryIdColumnName = "primaryId"
124 self.databaseInsertbatchSize = 2500
125 self.raColName = "raDeg"
126 self.declColName = "decDeg"
127 self.uniqueKeyList = [self.raColName, "decDeg"]
129 # DATETIME REGEX - EXPENSIVE OPERATION, LET"S JUST DO IT ONCE
130 self.reDatetime = re.compile('^[0-9]{4}-[0-9]{2}-[0-9]{2}T')
132 return None
134 def add_data_to_database_table(
135 self,
136 dictList,
137 createStatement=False):
138 """*Import data in the list of dictionaries in the requested database table*
140 Also adds HTMIDs and updates the sherlock-catalogue database helper table with the time-stamp of when the imported catlogue was last updated
142 **Key Arguments**
144 - ``dictList`` - a list of dictionaries containing all the rows in the catalogue to be imported
145 - ``createStatement`` - the table's mysql create statement (used to generate table if it does not yet exist in database). Default *False*
148 **Usage**
150 ```python
151 self.add_data_to_database_table(
152 dictList=dictList,
153 createStatement=createStatement
154 )
155 ```
158 .. todo ::
160 - Write a checklist for creating a new sherlock database importer
161 """
162 self.log.debug('starting the ``add_data_to_database_table`` method')
164 if len(dictList) == 0:
165 return
167 myPid = self.myPid
168 dbTableName = self.dbTableName
170 if createStatement:
171 writequery(
172 log=self.log,
173 sqlQuery=createStatement,
174 dbConn=self.cataloguesDbConn,
175 )
177 insert_list_of_dictionaries_into_database_tables(
178 dbConn=self.cataloguesDbConn,
179 log=self.log,
180 dictList=dictList,
181 dbTableName=dbTableName,
182 uniqueKeyList=[],
183 dateModified=True,
184 dateCreated=True,
185 batchSize=10000,
186 replace=True,
187 dbSettings=self.settings["database settings"][
188 "static catalogues"]
189 )
191 self._add_htmids_to_database_table()
193 cleaner = database_cleaner(
194 log=self.log,
195 settings=self.settings
196 )
197 cleaner._update_tcs_helper_catalogue_tables_info_with_new_tables()
199 self._update_database_helper_table()
201 print("""Now:
203- [ ] edit the `%(dbTableName)s` row in the sherlock catalogues database adding relevant column mappings, catalogue version number etc
204- [ ] retire any previous version of this catlogue in the database. Renaming the catalogue-table by appending `legacy_` and also change the name in the `tcs_helper_catalogue_tables_info` table
205- [ ] dupliate views from the previous catalogue version to point towards the new version and then delete the old views
206- [ ] run the command `sherlock clean [-s <pathToSettingsFile>]` to clean up helper tables
207- [ ] switch out the old catalogue table/views in your sherlock search algorithms in the yaml settings files
208- [ ] run a test batch of transients to make sure catalogue is installed as expected
210""" % locals())
212 self.log.debug('completed the ``add_data_to_database_table`` method')
213 return None
215 def _add_htmids_to_database_table(
216 self):
217 """*Add HTMIDs to database table once all the data has been imported (HTM Levels 10,13,16)*
219 **Usage**
221 ```python
222 self._add_htmids_to_database_table()
223 ```
225 """
226 self.log.debug('starting the ``add_htmids_to_database_table`` method')
228 tableName = self.dbTableName
230 self.log.info("Adding HTMIds to %(tableName)s" % locals())
232 add_htm_ids_to_mysql_database_table(
233 raColName=self.raColName,
234 declColName=self.declColName,
235 tableName=self.dbTableName,
236 dbConn=self.cataloguesDbConn,
237 log=self.log,
238 primaryIdColumnName=self.primaryIdColumnName,
239 dbSettings=self.settings["database settings"]["static catalogues"]
240 )
242 self.log.debug('completed the ``add_htmids_to_database_table`` method')
243 return None
245 def _update_database_helper_table(
246 self):
247 """*Update the sherlock catalogues database helper table with the time-stamp of when this catlogue was last updated*
249 **Usage**
251 ```python
252 self._update_database_helper_table()
253 ```
255 """
256 self.log.debug('starting the ``_update_database_helper_table`` method')
258 tableName = self.dbTableName
260 sqlQuery = u"""
261 update tcs_helper_catalogue_tables_info set last_updated = now() where table_name = "%(tableName)s";
262 """ % locals()
264 writequery(
265 log=self.log,
266 sqlQuery=sqlQuery,
267 dbConn=self.cataloguesDbConn,
268 )
270 self.log.debug(
271 'completed the ``_update_database_helper_table`` method')
272 return None
274 # use the tab-trigger below for new method
275 # xt-class-method