Coverage for sherlock/imports/_base_importer.py: 95%
91 statements
« prev ^ index » next coverage.py v7.2.2, created at 2023-10-10 13:58 +0000
« prev ^ index » next coverage.py v7.2.2, created at 2023-10-10 13:58 +0000
1#!/usr/local/bin/python
2# encoding: utf-8
3"""
4*The base importer for sherlock catalogue imports*
6:Author:
7 David Young
9.. todo ::
11 - document this module
12"""
13from __future__ import print_function
14from builtins import str
15from builtins import object
16import sys
17import os
18os.environ['TERM'] = 'vt100'
19import readline
20import glob
21import pickle
22import codecs
23import re
24import string
25from sherlock.database_cleaner import database_cleaner
26from datetime import datetime, date, time
27from docopt import docopt
28from fundamentals.mysql import insert_list_of_dictionaries_into_database_tables, directory_script_runner, writequery
29from fundamentals.renderer import list_of_dictionaries
30from HMpTy.mysql import add_htm_ids_to_mysql_database_table
33class _base_importer(object):
34 """
35 *The base importer object used to import new catalgues into sherlock-catalogues database*
37 **Key Arguments**
39 - ``log`` -- logger
40 - ``settings`` -- the settings dictionary
41 - ``pathToDataFIle`` -- path to the file containing the data to import
42 - ``version`` -- version number of the catalogue to be imported (e.g. DR12)
43 - ``catalogueName`` -- name of the catalogue to be imported
44 - ``coordinateList`` -- list of coordinates (needed for some streamed tables)
45 - ``radiusArcsec`` -- the radius in arcsec with which to perform the initial NED conesearch. Default *False*
48 **Usage**
50 To use this base class to write a new importer, create your class like so:
52 ```python
53 class newImporter(_base_importer):
54 ...
55 ```
57 """
58 # INITIALISATION
60 def __init__(
61 self,
62 log,
63 settings=False,
64 pathToDataFile=False,
65 version=False,
66 catalogueName="",
67 coordinateList=[],
68 radiusArcsec=False
69 ):
70 self.log = log
71 log.debug("instansiating a new '_base_importer' object")
72 self.settings = settings
73 self.pathToDataFile = pathToDataFile
74 self.version = version
75 self.catalogueName = catalogueName
76 self.coordinateList = coordinateList
77 self.radiusArcsec = radiusArcsec
78 self.myPid = str(os.getpid())
79 # xt-self-arg-tmpx
81 # INITIAL ACTIONS
82 # SETUP DATABASE CONNECTIONS
83 # SETUP ALL DATABASE CONNECTIONS
84 from sherlock import database
85 db = database(
86 log=self.log,
87 settings=self.settings
88 )
89 dbConns, dbVersions = db.connect()
90 self.transientsDbConn = dbConns["transients"]
91 self.cataloguesDbConn = dbConns["catalogues"]
93 # OPEN THE FILE TO IMPORT THE DATA FROM
94 if pathToDataFile:
95 pathToReadFile = pathToDataFile
96 try:
97 self.log.debug("attempting to open the file %s" %
98 (pathToReadFile,))
99 readFile = codecs.open(pathToReadFile, mode='r')
100 self.catData = readFile.read()
101 readFile.close()
102 except IOError as e:
103 message = 'could not open the file %s' % (pathToReadFile,)
104 self.log.critical(message)
105 raise IOError(message)
106 readFile.close()
107 else:
108 self.catData = None
110 # GET THE VERSION TO APPEND TO THE DATABASE TABLE NAME FOR THE
111 # CATALOGUE
112 if self.version:
113 self.version = "_v" + \
114 self.version.replace(" ", "").replace(
115 "v", "").replace(".", "_")
116 else:
117 self.version = ""
118 version = self.version
120 # BUILD THE DATABASE TABLE NAME
121 self.dbTableName = "tcs_cat_%(catalogueName)s%(version)s" % locals()
123 # SOME DEFAULT OBJECT ATTRIBUTES THAT CAN BE SUPERSEDED
124 self.primaryIdColumnName = "primaryId"
125 self.databaseInsertbatchSize = 2500
126 self.raColName = "raDeg"
127 self.declColName = "decDeg"
128 self.uniqueKeyList = [self.raColName, "decDeg"]
130 # DATETIME REGEX - EXPENSIVE OPERATION, LET"S JUST DO IT ONCE
131 self.reDatetime = re.compile('^[0-9]{4}-[0-9]{2}-[0-9]{2}T')
133 return None
135 def add_data_to_database_table(
136 self,
137 dictList,
138 createStatement=False):
139 """*Import data in the list of dictionaries in the requested database table*
141 Also adds HTMIDs and updates the sherlock-catalogue database helper table with the time-stamp of when the imported catlogue was last updated
143 **Key Arguments**
145 - ``dictList`` - a list of dictionaries containing all the rows in the catalogue to be imported
146 - ``createStatement`` - the table's mysql create statement (used to generate table if it does not yet exist in database). Default *False*
149 **Usage**
151 ```python
152 self.add_data_to_database_table(
153 dictList=dictList,
154 createStatement=createStatement
155 )
156 ```
159 .. todo ::
161 - Write a checklist for creating a new sherlock database importer
162 """
163 self.log.debug('starting the ``add_data_to_database_table`` method')
165 if len(dictList) == 0:
166 return
168 myPid = self.myPid
169 dbTableName = self.dbTableName
171 if createStatement:
172 writequery(
173 log=self.log,
174 sqlQuery=createStatement,
175 dbConn=self.cataloguesDbConn,
176 )
178 insert_list_of_dictionaries_into_database_tables(
179 dbConn=self.cataloguesDbConn,
180 log=self.log,
181 dictList=dictList,
182 dbTableName=dbTableName,
183 uniqueKeyList=[],
184 dateModified=True,
185 dateCreated=True,
186 batchSize=10000,
187 replace=True,
188 dbSettings=self.settings["database settings"][
189 "static catalogues"]
190 )
192 self._add_htmids_to_database_table()
194 cleaner = database_cleaner(
195 log=self.log,
196 settings=self.settings
197 )
198 cleaner._update_tcs_helper_catalogue_tables_info_with_new_tables()
200 self._update_database_helper_table()
202 if "ned_stream" not in dbTableName:
203 print("""Now:
205 - [ ] edit the `%(dbTableName)s` row in the sherlock catalogues database adding relevant column mappings, catalogue version number etc
206 - [ ] retire any previous version of this catlogue in the database. Renaming the catalogue-table by appending `legacy_` and also change the name in the `tcs_helper_catalogue_tables_info` table
207 - [ ] dupliate views from the previous catalogue version to point towards the new version and then delete the old views
208 - [ ] run the command `sherlock clean [-s <pathToSettingsFile>]` to clean up helper tables
209 - [ ] switch out the old catalogue table/views in your sherlock search algorithms in the yaml settings files
210 - [ ] run a test batch of transients to make sure catalogue is installed as expected
212 """ % locals())
214 self.log.debug('completed the ``add_data_to_database_table`` method')
215 return None
217 def _add_htmids_to_database_table(
218 self):
219 """*Add HTMIDs to database table once all the data has been imported (HTM Levels 10,13,16)*
221 **Usage**
223 ```python
224 self._add_htmids_to_database_table()
225 ```
227 """
228 self.log.debug('starting the ``add_htmids_to_database_table`` method')
230 tableName = self.dbTableName
232 self.log.info("Adding HTMIds to %(tableName)s" % locals())
234 add_htm_ids_to_mysql_database_table(
235 raColName=self.raColName,
236 declColName=self.declColName,
237 tableName=self.dbTableName,
238 dbConn=self.cataloguesDbConn,
239 log=self.log,
240 primaryIdColumnName=self.primaryIdColumnName,
241 dbSettings=self.settings["database settings"]["static catalogues"]
242 )
244 self.log.debug('completed the ``add_htmids_to_database_table`` method')
245 return None
247 def _update_database_helper_table(
248 self):
249 """*Update the sherlock catalogues database helper table with the time-stamp of when this catlogue was last updated*
251 **Usage**
253 ```python
254 self._update_database_helper_table()
255 ```
257 """
258 self.log.debug('starting the ``_update_database_helper_table`` method')
260 tableName = self.dbTableName
262 sqlQuery = u"""
263 update tcs_helper_catalogue_tables_info set last_updated = now() where table_name = "%(tableName)s";
264 """ % locals()
266 writequery(
267 log=self.log,
268 sqlQuery=sqlQuery,
269 dbConn=self.cataloguesDbConn,
270 )
272 self.log.debug(
273 'completed the ``_update_database_helper_table`` method')
274 return None
276 # use the tab-trigger below for new method
277 # xt-class-method