Coverage for fundamentals/mysql/convert_dictionary_to_mysql_table.py : 0%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1#!/usr/local/bin/python
2# encoding: utf-8
3"""
4*Convert a python dictionary into rows of a mysql table*
6:Author:
7 David Young
8"""
9from builtins import zip
10from builtins import str
11from builtins import range
12import sys
13import os
14os.environ['TERM'] = 'vt100'
15import re
16import yaml
17import time
18import datetime
19import collections as c
20from fundamentals import tools, times
21from fundamentals.mysql import writequery, table_exists, readquery
22import six
25def convert_dictionary_to_mysql_table(
26 log,
27 dictionary,
28 dbTableName,
29 uniqueKeyList=[],
30 dbConn=False,
31 createHelperTables=False,
32 dateModified=False,
33 returnInsertOnly=False,
34 replace=False,
35 batchInserts=True,
36 reDatetime=False,
37 skipChecks=False,
38 dateCreated=True):
39 """convert dictionary to mysql table
41 **Key Arguments**
43 - ``log`` -- logger
44 - ``dictionary`` -- python dictionary
45 - ``dbConn`` -- the db connection
46 - ``dbTableName`` -- name of the table you wish to add the data to (or create if it does not exist)
47 - ``uniqueKeyList`` - a lists column names that need combined to create the primary key
48 - ``createHelperTables`` -- create some helper tables with the main table, detailing original keywords etc
49 - ``returnInsertOnly`` -- returns only the insert command (does not execute it)
50 - ``dateModified`` -- add a modification date and updated flag to the mysql table
51 - ``replace`` -- use replace instead of mysql insert statements (useful when updates are required)
52 - ``batchInserts`` -- if returning insert statements return separate insert commands and value tuples
55 - ``reDatetime`` -- compiled regular expression matching datetime (passing this in cuts down on execution time as it doesn't have to be recompiled everytime during multiple iterations of ``convert_dictionary_to_mysql_table``)
56 - ``skipChecks`` -- skip reliability checks. Less robust but a little faster.
57 - ``dateCreated`` -- add a timestamp for dateCreated?
59 **Return**
61 - ``returnInsertOnly`` -- the insert statement if requested
64 **Usage**
66 To add a python dictionary to a database table, creating the table and/or columns if they don't yet exist:
68 ```python
69 from fundamentals.mysql import convert_dictionary_to_mysql_table
70 dictionary = {"a newKey": "cool", "and another": "super cool",
71 "uniquekey1": "cheese", "uniqueKey2": "burgers"}
73 convert_dictionary_to_mysql_table(
74 dbConn=dbConn,
75 log=log,
76 dictionary=dictionary,
77 dbTableName="testing_table",
78 uniqueKeyList=["uniquekey1", "uniqueKey2"],
79 dateModified=False,
80 returnInsertOnly=False,
81 replace=True
82 )
83 ```
85 Or just return the insert statement with a list of value tuples, i.e. do not execute the command on the database:
87 insertCommand, valueTuple = convert_dictionary_to_mysql_table(
88 dbConn=dbConn,
89 log=log,
90 dictionary=dictionary,
91 dbTableName="testing_table",
92 uniqueKeyList=["uniquekey1", "uniqueKey2"],
93 dateModified=False,
94 returnInsertOnly=True,
95 replace=False,
96 batchInserts=True
97 )
99 print(insertCommand, valueTuple)
101 # OUT: 'INSERT IGNORE INTO `testing_table`
102 # (a_newKey,and_another,dateCreated,uniqueKey2,uniquekey1) VALUES
103 # (%s, %s, %s, %s, %s)', ('cool', 'super cool',
104 # '2016-06-21T12:08:59', 'burgers', 'cheese')
106 You can also return a list of single insert statements using ``batchInserts = False``. Using ``replace = True`` will also add instructions about how to replace duplicate entries in the database table if found:
108 inserts = convert_dictionary_to_mysql_table(
109 dbConn=dbConn,
110 log=log,
111 dictionary=dictionary,
112 dbTableName="testing_table",
113 uniqueKeyList=["uniquekey1", "uniqueKey2"],
114 dateModified=False,
115 returnInsertOnly=True,
116 replace=True,
117 batchInserts=False
118 )
120 print(inserts)
122 # OUT: INSERT INTO `testing_table` (a_newKey,and_another,dateCreated,uniqueKey2,uniquekey1)
123 # VALUES ("cool" ,"super cool" ,"2016-09-14T13:12:08" ,"burgers" ,"cheese")
124 # ON DUPLICATE KEY UPDATE a_newKey="cool", and_another="super
125 # cool", dateCreated="2016-09-14T13:12:08", uniqueKey2="burgers",
126 # uniquekey1="cheese"
128 """
130 log.debug('starting the ``convert_dictionary_to_mysql_table`` function')
132 if not reDatetime:
133 reDatetime = re.compile('^[0-9]{4}-[0-9]{2}-[0-9]{2}T')
135 if not replace:
136 insertVerb = "INSERT"
137 else:
138 insertVerb = "INSERT IGNORE"
140 if returnInsertOnly == False:
141 # TEST THE ARGUMENTS
142 if str(type(dbConn).__name__) != "Connection":
143 message = 'Please use a valid MySQL DB connection.'
144 log.critical(message)
145 raise TypeError(message)
147 if not isinstance(dictionary, dict):
148 message = 'Please make sure "dictionary" argument is a dict type.'
149 log.critical(message)
150 raise TypeError(message)
152 if not isinstance(uniqueKeyList, list):
153 message = 'Please make sure "uniqueKeyList" is a list'
154 log.critical(message)
155 raise TypeError(message)
157 for i in uniqueKeyList:
158 if i not in list(dictionary.keys()):
159 message = 'Please make sure values in "uniqueKeyList" are present in the "dictionary" you are tring to convert'
160 log.critical(message)
161 raise ValueError(message)
163 for k, v in list(dictionary.items()):
164 # log.debug('k: %s, v: %s' % (k, v,))
165 if isinstance(v, list) and len(v) != 2:
166 message = 'Please make sure the list values in "dictionary" 2 items in length'
167 log.critical("%s: in %s we have a %s (%s)" %
168 (message, k, v, type(v)))
169 raise ValueError(message)
170 if isinstance(v, list):
171 if not (isinstance(v[0], six.string_types) or isinstance(v[0], int) or isinstance(v[0], bool) or isinstance(v[0], float) or isinstance(v[0], int) or isinstance(v[0], datetime.date) or v[0] == None):
172 message = 'Please make sure values in "dictionary" are of an appropriate value to add to the database, must be str, float, int or bool'
173 log.critical("%s: in %s we have a %s (%s)" %
174 (message, k, v, type(v)))
175 raise ValueError(message)
176 else:
177 if not (isinstance(v, six.string_types) or isinstance(v, int) or isinstance(v, bool) or isinstance(v, float) or isinstance(v, datetime.date) or v == None or "int" in str(type(v))):
178 this = type(v)
179 message = 'Please make sure values in "dictionary" are of an appropriate value to add to the database, must be str, float, int or bool : %(k)s is a %(this)s' % locals(
180 )
181 log.critical("%s: in %s we have a %s (%s)" %
182 (message, k, v, type(v)))
183 raise ValueError(message)
185 if not isinstance(createHelperTables, bool):
186 message = 'Please make sure "createHelperTables" is a True or False'
187 log.critical(message)
188 raise TypeError(message)
190 # TEST IF TABLE EXISTS
191 if not skipChecks:
192 tableExists = table_exists.table_exists(
193 dbConn=dbConn,
194 log=log,
195 dbTableName=dbTableName
196 )
197 else:
198 tableExists = False
200 # CREATE THE TABLE IF IT DOES NOT EXIST
201 if tableExists is False:
202 sqlQuery = """
203 CREATE TABLE IF NOT EXISTS `%(dbTableName)s`
204 (`primaryId` bigint(20) NOT NULL AUTO_INCREMENT COMMENT 'An internal counter',
205 `dateCreated` DATETIME NULL DEFAULT CURRENT_TIMESTAMP,
206 `dateLastModified` DATETIME NULL DEFAULT CURRENT_TIMESTAMP,
207 `updated` tinyint(4) DEFAULT '0',
208 PRIMARY KEY (`primaryId`))
209 ENGINE=MyISAM AUTO_INCREMENT=0 DEFAULT CHARSET=latin1;
210 """ % locals()
211 writequery(
212 log=log,
213 sqlQuery=sqlQuery,
214 dbConn=dbConn,
216 )
218 qCreateColumn = ''
219 formattedKey = ''
220 formattedKeyList = []
221 myValues = []
223 # ADD EXTRA COLUMNS TO THE DICTIONARY todo: do I need this?
224 if dateModified:
225 dictionary['dateLastModified'] = [
226 str(times.get_now_sql_datetime()), "date row was modified"]
227 if replace == False:
228 dictionary['updated'] = [0, "this row has been updated"]
229 else:
230 dictionary['updated'] = [1, "this row has been updated"]
232 # ITERATE THROUGH THE DICTIONARY AND GENERATE THE TABLE COLUMN WITH THE
233 # NAME OF THE KEY, IF IT DOES NOT EXIST
234 count = len(dictionary)
235 i = 1
236 for (key, value) in list(dictionary.items()):
237 if (isinstance(value, list) and value[0] is None):
238 del dictionary[key]
239 # SORT THE DICTIONARY BY KEY
240 odictionary = c.OrderedDict(sorted(dictionary.items()))
241 for (key, value) in list(odictionary.items()):
243 formattedKey = key.replace(" ", "_").replace("-", "_")
244 # DEC A KEYWORD IN MYSQL - NEED TO CHANGE BEFORE INGEST
245 if formattedKey == u"dec":
246 formattedKey = u"decl"
247 if formattedKey == u"DEC":
248 formattedKey = u"DECL"
250 formattedKeyList.extend([formattedKey])
251 if len(key) > 0:
252 # CONVERT LIST AND FEEDPARSER VALUES TO YAML (SO I CAN PASS IT AS A
253 # STRING TO MYSQL)
254 if isinstance(value, list) and (isinstance(value[0], list)):
255 value[0] = yaml.dump(value[0])
256 value[0] = str(value[0])
257 # REMOVE CHARACTERS THAT COLLIDE WITH MYSQL
258 # JOIN THE VALUES TOGETHER IN A LIST - EASIER TO GENERATE THE MYSQL
259 # COMMAND LATER
260 if isinstance(value, str):
261 value = value.replace('\\', '\\\\')
262 value = value.replace('"', '\\"')
263 try:
264 udata = value.decode("utf-8", "ignore")
265 value = udata.encode("ascii", "ignore")
266 except:
267 pass
269 # log.debug('udata: %(udata)s' % locals())
271 if isinstance(value, list) and isinstance(value[0], str):
272 myValues.extend(['%s' % value[0].strip()])
273 elif isinstance(value, list):
274 myValues.extend(['%s' % (value[0], )])
275 else:
276 myValues.extend(['%s' % (value, )])
278 if returnInsertOnly == False:
279 # CHECK IF COLUMN EXISTS YET
280 colExists = \
281 "SELECT * FROM information_schema.COLUMNS WHERE TABLE_SCHEMA=DATABASE() AND COLUMN_NAME='" + \
282 formattedKey + "'AND TABLE_NAME='" + dbTableName + """'"""
283 try:
284 # log.debug('checking if the column '+formattedKey+' exists
285 # in the '+dbTableName+' table')
287 rows = readquery(
288 log=log,
289 sqlQuery=colExists,
290 dbConn=dbConn,
291 )
292 except Exception as e:
293 log.error('something went wrong' + str(e) + '\n')
295 # IF COLUMN DOESN'T EXIT - GENERATE IT
296 if len(rows) == 0:
297 qCreateColumn = """ALTER TABLE `%s` ADD `%s""" % (
298 dbTableName, formattedKey)
299 if not isinstance(value, list):
300 value = [value]
301 if reDatetime.search(str(value[0])):
302 # log.debug('Ok - a datetime string was found')
303 qCreateColumn += '` datetime DEFAULT NULL'
304 elif formattedKey == 'updated_parsed' or formattedKey == 'published_parsed' or formattedKey \
305 == 'feedName' or formattedKey == 'title':
306 qCreateColumn += '` varchar(100) DEFAULT NULL'
307 elif isinstance(value[0], ("".__class__, u"".__class__)) and len(value[0]) < 30:
308 qCreateColumn += '` varchar(100) DEFAULT NULL'
309 elif isinstance(value[0], ("".__class__, u"".__class__)) and len(value[0]) >= 30 and len(value[0]) < 80:
310 qCreateColumn += '` varchar(100) DEFAULT NULL'
311 elif isinstance(value[0], ("".__class__, u"".__class__)):
312 columnLength = 450 + len(value[0]) * 2
313 qCreateColumn += '` varchar(' + str(
314 columnLength) + ') DEFAULT NULL'
315 elif isinstance(value[0], int) and not isinstance(value[0], bool) and abs(value[0]) <= 9:
316 qCreateColumn += '` tinyint DEFAULT NULL'
317 elif isinstance(value[0], int) and not isinstance(value[0], bool):
318 qCreateColumn += '` int DEFAULT NULL'
319 elif (isinstance(value[0], float) or isinstance(value[0], int)) and not isinstance(value[0], bool):
320 qCreateColumn += '` double DEFAULT NULL'
321 elif isinstance(value[0], bool):
322 qCreateColumn += '` tinyint DEFAULT NULL'
323 elif isinstance(value[0], list):
324 qCreateColumn += '` varchar(1024) DEFAULT NULL'
325 else:
326 # log.debug('Do not know what format to add this key in
327 # MySQL - removing from dictionary: %s, %s'
328 # % (key, type(value[0])))
329 formattedKeyList.pop()
330 myValues.pop()
331 qCreateColumn = None
332 if qCreateColumn:
333 # ADD COMMENT TO GIVE THE ORGINAL KEYWORD IF formatted FOR
334 # MYSQL
335 if key is not formattedKey:
336 qCreateColumn += " COMMENT 'original keyword: " + \
337 key + """'"""
338 # CREATE THE COLUMN IF IT DOES NOT EXIST
339 try:
340 log.info('creating the ' +
341 formattedKey + ' column in the ' + dbTableName + ' table')
342 writequery(
343 log=log,
344 sqlQuery=qCreateColumn,
345 dbConn=dbConn
346 )
348 except Exception as e:
349 # log.debug('qCreateColumn: %s' % (qCreateColumn,
350 # ))
351 log.error('could not create the ' + formattedKey + ' column in the ' + dbTableName
352 + ' table -- ' + str(e) + '\n')
354 if returnInsertOnly == False:
355 # GENERATE THE INDEX NAME - THEN CREATE INDEX IF IT DOES NOT YET EXIST
356 if len(uniqueKeyList):
357 for i in range(len(uniqueKeyList)):
358 uniqueKeyList[i] = uniqueKeyList[
359 i].replace(" ", "_").replace("-", "_")
360 if uniqueKeyList[i] == u"dec":
361 uniqueKeyList[i] = u"decl"
362 if uniqueKeyList[i] == u"DEC":
363 uniqueKeyList[i] = u"DECL"
365 indexName = uniqueKeyList[0].replace(" ", "_").replace("-", "_")
366 for i in range(len(uniqueKeyList) - 1):
367 indexName += '_' + uniqueKeyList[i + 1]
369 indexName = indexName.lower().replace(" ", " ").replace(" ", "_")
371 sqlQuery = u"""SELECT COUNT(*) FROM INFORMATION_SCHEMA.STATISTICS WHERE TABLE_SCHEMA = DATABASE() AND TABLE_NAME = '""" + \
372 dbTableName + """' AND INDEX_NAME = '""" + indexName + """'"""
373 rows = readquery(
374 log=log,
375 sqlQuery=sqlQuery,
376 dbConn=dbConn,
377 quiet=False
378 )
380 exists = rows[0]['COUNT(*)']
381 # log.debug('uniqueKeyList: %s' % (uniqueKeyList,))
382 if exists == 0:
383 if isinstance(uniqueKeyList, list):
384 uniqueKeyList = ','.join(uniqueKeyList)
386 addUniqueKey = 'ALTER TABLE `' + dbTableName + \
387 '` ADD unique ' + indexName + \
388 """ (""" + uniqueKeyList + ')'
389 # log.debug('HERE IS THE COMMAND:'+addUniqueKey)
390 writequery(
391 log=log,
392 sqlQuery=addUniqueKey,
393 dbConn=dbConn
394 )
396 if returnInsertOnly == True and batchInserts == True:
397 myKeys = '`,`'.join(formattedKeyList)
398 valueString = ("%s, " * len(myValues))[:-2]
399 insertCommand = insertVerb + """ INTO `""" + dbTableName + \
400 """` (`""" + myKeys + """`, dateCreated) VALUES (""" + \
401 valueString + """, NOW())"""
402 mv = []
403 mv[:] = [None if m == u"None" else m for m in myValues]
404 valueTuple = tuple(mv)
406 dup = ""
407 if replace:
408 dup = " ON DUPLICATE KEY UPDATE "
409 for k, v in zip(formattedKeyList, mv):
410 dup = """%(dup)s %(k)s=values(%(k)s),""" % locals()
412 insertCommand = insertCommand + dup
414 insertCommand = insertCommand.replace('\\""', '\\" "')
415 insertCommand = insertCommand.replace('""', "null")
416 insertCommand = insertCommand.replace('!!python/unicode:', '')
417 insertCommand = insertCommand.replace('!!python/unicode', '')
418 insertCommand = insertCommand.replace('"None"', 'null')
419 insertCommand = insertCommand.replace('"null"', 'null')
421 if not dateCreated:
422 insertCommand = insertCommand.replace(
423 ", dateCreated)", ")").replace(", NOW())", ")")
425 return insertCommand, valueTuple
427 # GENERATE THE INSERT COMMAND - IGNORE DUPLICATE ENTRIES
428 myKeys = '`,`'.join(formattedKeyList)
429 myValues = '" ,"'.join(myValues)
430 # log.debug(myValues+" ------ PRESTRIP")
431 # REMOVE SOME CONVERSION NOISE
432 myValues = myValues.replace('time.struct_time', '')
433 myValues = myValues.replace(
434 '- !!python/object/new:feedparser.FeedParserDict', '')
435 myValues = myValues.replace(
436 '!!python/object/new:feedparser.FeedParserDict', '')
437 myValues = myValues.replace('dictitems:', '')
438 myValues = myValues.replace('dictitems', '')
439 myValues = myValues.replace('!!python/unicode:', '')
440 myValues = myValues.replace('!!python/unicode', '')
441 myValues = myValues.replace('"None"', 'null')
442 myValues = myValues.replace('"null"', 'null')
443 # myValues = myValues.replace('"None', 'null')
445 if myValues[-4:] != 'null':
446 myValues += '"'
448 dup = ""
449 if replace:
450 dupValues = ('"' + myValues).split(" ,")
451 dupKeys = formattedKeyList
452 dup = dup + " ON DUPLICATE KEY UPDATE "
453 for k, v in zip(dupKeys, dupValues):
454 dup = """%(dup)s `%(k)s`=%(v)s,""" % locals()
456 if dateModified:
457 dup = """%(dup)s updated=IF(""" % locals()
458 for k, v in zip(dupKeys, dupValues):
459 if v == "null":
460 dup = """%(dup)s `%(k)s` is %(v)s AND """ % locals()
461 else:
462 dup = """%(dup)s `%(k)s`=%(v)s AND """ % locals()
463 dup = dup[:-5] + ", 0, 1), dateLastModified=IF("
464 for k, v in zip(dupKeys, dupValues):
465 if v == "null":
466 dup = """%(dup)s `%(k)s` is %(v)s AND """ % locals()
467 else:
468 dup = """%(dup)s `%(k)s`=%(v)s AND """ % locals()
469 dup = dup[:-5] + ", dateLastModified, NOW())"
470 else:
471 dup = dup[:-1]
473 # log.debug(myValues+" ------ POSTSTRIP")
474 addValue = insertVerb + """ INTO `""" + dbTableName + \
475 """` (`""" + myKeys + """`, dateCreated) VALUES (\"""" + \
476 myValues + """, NOW()) %(dup)s """ % locals()
478 if not dateCreated:
479 addValue = addValue.replace(
480 ", dateCreated)", ")").replace(", NOW())", ")", 1)
482 addValue = addValue.replace('\\""', '\\" "')
483 addValue = addValue.replace('""', "null")
484 addValue = addValue.replace('!!python/unicode:', '')
485 addValue = addValue.replace('!!python/unicode', '')
486 addValue = addValue.replace('"None"', 'null')
487 addValue = addValue.replace('"null"', 'null')
488 # log.debug(addValue)
490 if returnInsertOnly == True:
491 return addValue
493 message = ""
494 try:
495 # log.debug('adding new data to the %s table; query: %s' %
496 # (dbTableName, addValue))"
497 writequery(
498 log=log,
499 sqlQuery=addValue,
500 dbConn=dbConn
501 )
503 except Exception as e:
504 message = "could not add new data added to the table '" + \
505 dbTableName + "' : " + str(e) + '\n'
506 log.error(message)
507 print(message)
508 raise Exception
510 log.debug('completed the ``convert_dictionary_to_mysql_table`` function')
511 return None, None