Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1#!/usr/local/bin/python 

2# encoding: utf-8 

3""" 

4*Convert a python dictionary into rows of a mysql table* 

5 

6:Author: 

7 David Young 

8""" 

9from builtins import zip 

10from builtins import str 

11from builtins import range 

12import sys 

13import os 

14os.environ['TERM'] = 'vt100' 

15import re 

16import yaml 

17import time 

18import datetime 

19import collections as c 

20from fundamentals import tools, times 

21from fundamentals.mysql import writequery, table_exists, readquery 

22import six 

23 

24 

25def convert_dictionary_to_mysql_table( 

26 log, 

27 dictionary, 

28 dbTableName, 

29 uniqueKeyList=[], 

30 dbConn=False, 

31 createHelperTables=False, 

32 dateModified=False, 

33 returnInsertOnly=False, 

34 replace=False, 

35 batchInserts=True, 

36 reDatetime=False, 

37 skipChecks=False, 

38 dateCreated=True): 

39 """convert dictionary to mysql table 

40 

41 **Key Arguments** 

42 

43 - ``log`` -- logger 

44 - ``dictionary`` -- python dictionary 

45 - ``dbConn`` -- the db connection 

46 - ``dbTableName`` -- name of the table you wish to add the data to (or create if it does not exist) 

47 - ``uniqueKeyList`` - a lists column names that need combined to create the primary key 

48 - ``createHelperTables`` -- create some helper tables with the main table, detailing original keywords etc 

49 - ``returnInsertOnly`` -- returns only the insert command (does not execute it) 

50 - ``dateModified`` -- add a modification date and updated flag to the mysql table 

51 - ``replace`` -- use replace instead of mysql insert statements (useful when updates are required) 

52 - ``batchInserts`` -- if returning insert statements return separate insert commands and value tuples 

53 

54 

55 - ``reDatetime`` -- compiled regular expression matching datetime (passing this in cuts down on execution time as it doesn't have to be recompiled everytime during multiple iterations of ``convert_dictionary_to_mysql_table``) 

56 - ``skipChecks`` -- skip reliability checks. Less robust but a little faster. 

57 - ``dateCreated`` -- add a timestamp for dateCreated? 

58 

59 **Return** 

60 

61 - ``returnInsertOnly`` -- the insert statement if requested 

62 

63 

64 **Usage** 

65 

66 To add a python dictionary to a database table, creating the table and/or columns if they don't yet exist: 

67 

68 ```python 

69 from fundamentals.mysql import convert_dictionary_to_mysql_table 

70 dictionary = {"a newKey": "cool", "and another": "super cool", 

71 "uniquekey1": "cheese", "uniqueKey2": "burgers"} 

72 

73 convert_dictionary_to_mysql_table( 

74 dbConn=dbConn, 

75 log=log, 

76 dictionary=dictionary, 

77 dbTableName="testing_table", 

78 uniqueKeyList=["uniquekey1", "uniqueKey2"], 

79 dateModified=False, 

80 returnInsertOnly=False, 

81 replace=True 

82 ) 

83 ``` 

84 

85 Or just return the insert statement with a list of value tuples, i.e. do not execute the command on the database: 

86 

87 insertCommand, valueTuple = convert_dictionary_to_mysql_table( 

88 dbConn=dbConn, 

89 log=log, 

90 dictionary=dictionary, 

91 dbTableName="testing_table", 

92 uniqueKeyList=["uniquekey1", "uniqueKey2"], 

93 dateModified=False, 

94 returnInsertOnly=True, 

95 replace=False, 

96 batchInserts=True 

97 ) 

98 

99 print(insertCommand, valueTuple) 

100 

101 # OUT: 'INSERT IGNORE INTO `testing_table` 

102 # (a_newKey,and_another,dateCreated,uniqueKey2,uniquekey1) VALUES 

103 # (%s, %s, %s, %s, %s)', ('cool', 'super cool', 

104 # '2016-06-21T12:08:59', 'burgers', 'cheese') 

105 

106 You can also return a list of single insert statements using ``batchInserts = False``. Using ``replace = True`` will also add instructions about how to replace duplicate entries in the database table if found: 

107 

108 inserts = convert_dictionary_to_mysql_table( 

109 dbConn=dbConn, 

110 log=log, 

111 dictionary=dictionary, 

112 dbTableName="testing_table", 

113 uniqueKeyList=["uniquekey1", "uniqueKey2"], 

114 dateModified=False, 

115 returnInsertOnly=True, 

116 replace=True, 

117 batchInserts=False 

118 ) 

119 

120 print(inserts) 

121 

122 # OUT: INSERT INTO `testing_table` (a_newKey,and_another,dateCreated,uniqueKey2,uniquekey1) 

123 # VALUES ("cool" ,"super cool" ,"2016-09-14T13:12:08" ,"burgers" ,"cheese") 

124 # ON DUPLICATE KEY UPDATE a_newKey="cool", and_another="super 

125 # cool", dateCreated="2016-09-14T13:12:08", uniqueKey2="burgers", 

126 # uniquekey1="cheese" 

127 

128 """ 

129 

130 log.debug('starting the ``convert_dictionary_to_mysql_table`` function') 

131 

132 if not reDatetime: 

133 reDatetime = re.compile('^[0-9]{4}-[0-9]{2}-[0-9]{2}T') 

134 

135 if not replace: 

136 insertVerb = "INSERT" 

137 else: 

138 insertVerb = "INSERT IGNORE" 

139 

140 if returnInsertOnly == False: 

141 # TEST THE ARGUMENTS 

142 if str(type(dbConn).__name__) != "Connection": 

143 message = 'Please use a valid MySQL DB connection.' 

144 log.critical(message) 

145 raise TypeError(message) 

146 

147 if not isinstance(dictionary, dict): 

148 message = 'Please make sure "dictionary" argument is a dict type.' 

149 log.critical(message) 

150 raise TypeError(message) 

151 

152 if not isinstance(uniqueKeyList, list): 

153 message = 'Please make sure "uniqueKeyList" is a list' 

154 log.critical(message) 

155 raise TypeError(message) 

156 

157 for i in uniqueKeyList: 

158 if i not in list(dictionary.keys()): 

159 message = 'Please make sure values in "uniqueKeyList" are present in the "dictionary" you are tring to convert' 

160 log.critical(message) 

161 raise ValueError(message) 

162 

163 for k, v in list(dictionary.items()): 

164 # log.debug('k: %s, v: %s' % (k, v,)) 

165 if isinstance(v, list) and len(v) != 2: 

166 message = 'Please make sure the list values in "dictionary" 2 items in length' 

167 log.critical("%s: in %s we have a %s (%s)" % 

168 (message, k, v, type(v))) 

169 raise ValueError(message) 

170 if isinstance(v, list): 

171 if not (isinstance(v[0], six.string_types) or isinstance(v[0], int) or isinstance(v[0], bool) or isinstance(v[0], float) or isinstance(v[0], int) or isinstance(v[0], datetime.date) or v[0] == None): 

172 message = 'Please make sure values in "dictionary" are of an appropriate value to add to the database, must be str, float, int or bool' 

173 log.critical("%s: in %s we have a %s (%s)" % 

174 (message, k, v, type(v))) 

175 raise ValueError(message) 

176 else: 

177 if not (isinstance(v, six.string_types) or isinstance(v, int) or isinstance(v, bool) or isinstance(v, float) or isinstance(v, datetime.date) or v == None or "int" in str(type(v))): 

178 this = type(v) 

179 message = 'Please make sure values in "dictionary" are of an appropriate value to add to the database, must be str, float, int or bool : %(k)s is a %(this)s' % locals( 

180 ) 

181 log.critical("%s: in %s we have a %s (%s)" % 

182 (message, k, v, type(v))) 

183 raise ValueError(message) 

184 

185 if not isinstance(createHelperTables, bool): 

186 message = 'Please make sure "createHelperTables" is a True or False' 

187 log.critical(message) 

188 raise TypeError(message) 

189 

190 # TEST IF TABLE EXISTS 

191 if not skipChecks: 

192 tableExists = table_exists.table_exists( 

193 dbConn=dbConn, 

194 log=log, 

195 dbTableName=dbTableName 

196 ) 

197 else: 

198 tableExists = False 

199 

200 # CREATE THE TABLE IF IT DOES NOT EXIST 

201 if tableExists is False: 

202 sqlQuery = """ 

203 CREATE TABLE IF NOT EXISTS `%(dbTableName)s` 

204 (`primaryId` bigint(20) NOT NULL AUTO_INCREMENT COMMENT 'An internal counter', 

205 `dateCreated` DATETIME NULL DEFAULT CURRENT_TIMESTAMP, 

206 `dateLastModified` DATETIME NULL DEFAULT CURRENT_TIMESTAMP, 

207 `updated` tinyint(4) DEFAULT '0', 

208 PRIMARY KEY (`primaryId`)) 

209 ENGINE=MyISAM AUTO_INCREMENT=0 DEFAULT CHARSET=latin1; 

210 """ % locals() 

211 writequery( 

212 log=log, 

213 sqlQuery=sqlQuery, 

214 dbConn=dbConn, 

215 

216 ) 

217 

218 qCreateColumn = '' 

219 formattedKey = '' 

220 formattedKeyList = [] 

221 myValues = [] 

222 

223 # ADD EXTRA COLUMNS TO THE DICTIONARY todo: do I need this? 

224 if dateModified: 

225 dictionary['dateLastModified'] = [ 

226 str(times.get_now_sql_datetime()), "date row was modified"] 

227 if replace == False: 

228 dictionary['updated'] = [0, "this row has been updated"] 

229 else: 

230 dictionary['updated'] = [1, "this row has been updated"] 

231 

232 # ITERATE THROUGH THE DICTIONARY AND GENERATE THE TABLE COLUMN WITH THE 

233 # NAME OF THE KEY, IF IT DOES NOT EXIST 

234 count = len(dictionary) 

235 i = 1 

236 for (key, value) in list(dictionary.items()): 

237 if (isinstance(value, list) and value[0] is None): 

238 del dictionary[key] 

239 # SORT THE DICTIONARY BY KEY 

240 odictionary = c.OrderedDict(sorted(dictionary.items())) 

241 for (key, value) in list(odictionary.items()): 

242 

243 formattedKey = key.replace(" ", "_").replace("-", "_") 

244 # DEC A KEYWORD IN MYSQL - NEED TO CHANGE BEFORE INGEST 

245 if formattedKey == u"dec": 

246 formattedKey = u"decl" 

247 if formattedKey == u"DEC": 

248 formattedKey = u"DECL" 

249 

250 formattedKeyList.extend([formattedKey]) 

251 if len(key) > 0: 

252 # CONVERT LIST AND FEEDPARSER VALUES TO YAML (SO I CAN PASS IT AS A 

253 # STRING TO MYSQL) 

254 if isinstance(value, list) and (isinstance(value[0], list)): 

255 value[0] = yaml.dump(value[0]) 

256 value[0] = str(value[0]) 

257 # REMOVE CHARACTERS THAT COLLIDE WITH MYSQL 

258 # JOIN THE VALUES TOGETHER IN A LIST - EASIER TO GENERATE THE MYSQL 

259 # COMMAND LATER 

260 if isinstance(value, str): 

261 value = value.replace('\\', '\\\\') 

262 value = value.replace('"', '\\"') 

263 try: 

264 udata = value.decode("utf-8", "ignore") 

265 value = udata.encode("ascii", "ignore") 

266 except: 

267 pass 

268 

269 # log.debug('udata: %(udata)s' % locals()) 

270 

271 if isinstance(value, list) and isinstance(value[0], str): 

272 myValues.extend(['%s' % value[0].strip()]) 

273 elif isinstance(value, list): 

274 myValues.extend(['%s' % (value[0], )]) 

275 else: 

276 myValues.extend(['%s' % (value, )]) 

277 

278 if returnInsertOnly == False: 

279 # CHECK IF COLUMN EXISTS YET 

280 colExists = \ 

281 "SELECT * FROM information_schema.COLUMNS WHERE TABLE_SCHEMA=DATABASE() AND COLUMN_NAME='" + \ 

282 formattedKey + "'AND TABLE_NAME='" + dbTableName + """'""" 

283 try: 

284 # log.debug('checking if the column '+formattedKey+' exists 

285 # in the '+dbTableName+' table') 

286 

287 rows = readquery( 

288 log=log, 

289 sqlQuery=colExists, 

290 dbConn=dbConn, 

291 ) 

292 except Exception as e: 

293 log.error('something went wrong' + str(e) + '\n') 

294 

295 # IF COLUMN DOESN'T EXIT - GENERATE IT 

296 if len(rows) == 0: 

297 qCreateColumn = """ALTER TABLE `%s` ADD `%s""" % ( 

298 dbTableName, formattedKey) 

299 if not isinstance(value, list): 

300 value = [value] 

301 if reDatetime.search(str(value[0])): 

302 # log.debug('Ok - a datetime string was found') 

303 qCreateColumn += '` datetime DEFAULT NULL' 

304 elif formattedKey == 'updated_parsed' or formattedKey == 'published_parsed' or formattedKey \ 

305 == 'feedName' or formattedKey == 'title': 

306 qCreateColumn += '` varchar(100) DEFAULT NULL' 

307 elif isinstance(value[0], ("".__class__, u"".__class__)) and len(value[0]) < 30: 

308 qCreateColumn += '` varchar(100) DEFAULT NULL' 

309 elif isinstance(value[0], ("".__class__, u"".__class__)) and len(value[0]) >= 30 and len(value[0]) < 80: 

310 qCreateColumn += '` varchar(100) DEFAULT NULL' 

311 elif isinstance(value[0], ("".__class__, u"".__class__)): 

312 columnLength = 450 + len(value[0]) * 2 

313 qCreateColumn += '` varchar(' + str( 

314 columnLength) + ') DEFAULT NULL' 

315 elif isinstance(value[0], int) and not isinstance(value[0], bool) and abs(value[0]) <= 9: 

316 qCreateColumn += '` tinyint DEFAULT NULL' 

317 elif isinstance(value[0], int) and not isinstance(value[0], bool): 

318 qCreateColumn += '` int DEFAULT NULL' 

319 elif (isinstance(value[0], float) or isinstance(value[0], int)) and not isinstance(value[0], bool): 

320 qCreateColumn += '` double DEFAULT NULL' 

321 elif isinstance(value[0], bool): 

322 qCreateColumn += '` tinyint DEFAULT NULL' 

323 elif isinstance(value[0], list): 

324 qCreateColumn += '` varchar(1024) DEFAULT NULL' 

325 else: 

326 # log.debug('Do not know what format to add this key in 

327 # MySQL - removing from dictionary: %s, %s' 

328 # % (key, type(value[0]))) 

329 formattedKeyList.pop() 

330 myValues.pop() 

331 qCreateColumn = None 

332 if qCreateColumn: 

333 # ADD COMMENT TO GIVE THE ORGINAL KEYWORD IF formatted FOR 

334 # MYSQL 

335 if key is not formattedKey: 

336 qCreateColumn += " COMMENT 'original keyword: " + \ 

337 key + """'""" 

338 # CREATE THE COLUMN IF IT DOES NOT EXIST 

339 try: 

340 log.info('creating the ' + 

341 formattedKey + ' column in the ' + dbTableName + ' table') 

342 writequery( 

343 log=log, 

344 sqlQuery=qCreateColumn, 

345 dbConn=dbConn 

346 ) 

347 

348 except Exception as e: 

349 # log.debug('qCreateColumn: %s' % (qCreateColumn, 

350 # )) 

351 log.error('could not create the ' + formattedKey + ' column in the ' + dbTableName 

352 + ' table -- ' + str(e) + '\n') 

353 

354 if returnInsertOnly == False: 

355 # GENERATE THE INDEX NAME - THEN CREATE INDEX IF IT DOES NOT YET EXIST 

356 if len(uniqueKeyList): 

357 for i in range(len(uniqueKeyList)): 

358 uniqueKeyList[i] = uniqueKeyList[ 

359 i].replace(" ", "_").replace("-", "_") 

360 if uniqueKeyList[i] == u"dec": 

361 uniqueKeyList[i] = u"decl" 

362 if uniqueKeyList[i] == u"DEC": 

363 uniqueKeyList[i] = u"DECL" 

364 

365 indexName = uniqueKeyList[0].replace(" ", "_").replace("-", "_") 

366 for i in range(len(uniqueKeyList) - 1): 

367 indexName += '_' + uniqueKeyList[i + 1] 

368 

369 indexName = indexName.lower().replace(" ", " ").replace(" ", "_") 

370 

371 sqlQuery = u"""SELECT COUNT(*) FROM INFORMATION_SCHEMA.STATISTICS WHERE TABLE_SCHEMA = DATABASE() AND TABLE_NAME = '""" + \ 

372 dbTableName + """' AND INDEX_NAME = '""" + indexName + """'""" 

373 rows = readquery( 

374 log=log, 

375 sqlQuery=sqlQuery, 

376 dbConn=dbConn, 

377 quiet=False 

378 ) 

379 

380 exists = rows[0]['COUNT(*)'] 

381 # log.debug('uniqueKeyList: %s' % (uniqueKeyList,)) 

382 if exists == 0: 

383 if isinstance(uniqueKeyList, list): 

384 uniqueKeyList = ','.join(uniqueKeyList) 

385 

386 addUniqueKey = 'ALTER TABLE `' + dbTableName + \ 

387 '` ADD unique ' + indexName + \ 

388 """ (""" + uniqueKeyList + ')' 

389 # log.debug('HERE IS THE COMMAND:'+addUniqueKey) 

390 writequery( 

391 log=log, 

392 sqlQuery=addUniqueKey, 

393 dbConn=dbConn 

394 ) 

395 

396 if returnInsertOnly == True and batchInserts == True: 

397 myKeys = '`,`'.join(formattedKeyList) 

398 valueString = ("%s, " * len(myValues))[:-2] 

399 insertCommand = insertVerb + """ INTO `""" + dbTableName + \ 

400 """` (`""" + myKeys + """`, dateCreated) VALUES (""" + \ 

401 valueString + """, NOW())""" 

402 mv = [] 

403 mv[:] = [None if m == u"None" else m for m in myValues] 

404 valueTuple = tuple(mv) 

405 

406 dup = "" 

407 if replace: 

408 dup = " ON DUPLICATE KEY UPDATE " 

409 for k, v in zip(formattedKeyList, mv): 

410 dup = """%(dup)s %(k)s=values(%(k)s),""" % locals() 

411 

412 insertCommand = insertCommand + dup 

413 

414 insertCommand = insertCommand.replace('\\""', '\\" "') 

415 insertCommand = insertCommand.replace('""', "null") 

416 insertCommand = insertCommand.replace('!!python/unicode:', '') 

417 insertCommand = insertCommand.replace('!!python/unicode', '') 

418 insertCommand = insertCommand.replace('"None"', 'null') 

419 insertCommand = insertCommand.replace('"null"', 'null') 

420 

421 if not dateCreated: 

422 insertCommand = insertCommand.replace( 

423 ", dateCreated)", ")").replace(", NOW())", ")") 

424 

425 return insertCommand, valueTuple 

426 

427 # GENERATE THE INSERT COMMAND - IGNORE DUPLICATE ENTRIES 

428 myKeys = '`,`'.join(formattedKeyList) 

429 myValues = '" ,"'.join(myValues) 

430 # log.debug(myValues+" ------ PRESTRIP") 

431 # REMOVE SOME CONVERSION NOISE 

432 myValues = myValues.replace('time.struct_time', '') 

433 myValues = myValues.replace( 

434 '- !!python/object/new:feedparser.FeedParserDict', '') 

435 myValues = myValues.replace( 

436 '!!python/object/new:feedparser.FeedParserDict', '') 

437 myValues = myValues.replace('dictitems:', '') 

438 myValues = myValues.replace('dictitems', '') 

439 myValues = myValues.replace('!!python/unicode:', '') 

440 myValues = myValues.replace('!!python/unicode', '') 

441 myValues = myValues.replace('"None"', 'null') 

442 myValues = myValues.replace('"null"', 'null') 

443 # myValues = myValues.replace('"None', 'null') 

444 

445 if myValues[-4:] != 'null': 

446 myValues += '"' 

447 

448 dup = "" 

449 if replace: 

450 dupValues = ('"' + myValues).split(" ,") 

451 dupKeys = formattedKeyList 

452 dup = dup + " ON DUPLICATE KEY UPDATE " 

453 for k, v in zip(dupKeys, dupValues): 

454 dup = """%(dup)s `%(k)s`=%(v)s,""" % locals() 

455 

456 if dateModified: 

457 dup = """%(dup)s updated=IF(""" % locals() 

458 for k, v in zip(dupKeys, dupValues): 

459 if v == "null": 

460 dup = """%(dup)s `%(k)s` is %(v)s AND """ % locals() 

461 else: 

462 dup = """%(dup)s `%(k)s`=%(v)s AND """ % locals() 

463 dup = dup[:-5] + ", 0, 1), dateLastModified=IF(" 

464 for k, v in zip(dupKeys, dupValues): 

465 if v == "null": 

466 dup = """%(dup)s `%(k)s` is %(v)s AND """ % locals() 

467 else: 

468 dup = """%(dup)s `%(k)s`=%(v)s AND """ % locals() 

469 dup = dup[:-5] + ", dateLastModified, NOW())" 

470 else: 

471 dup = dup[:-1] 

472 

473 # log.debug(myValues+" ------ POSTSTRIP") 

474 addValue = insertVerb + """ INTO `""" + dbTableName + \ 

475 """` (`""" + myKeys + """`, dateCreated) VALUES (\"""" + \ 

476 myValues + """, NOW()) %(dup)s """ % locals() 

477 

478 if not dateCreated: 

479 addValue = addValue.replace( 

480 ", dateCreated)", ")").replace(", NOW())", ")", 1) 

481 

482 addValue = addValue.replace('\\""', '\\" "') 

483 addValue = addValue.replace('""', "null") 

484 addValue = addValue.replace('!!python/unicode:', '') 

485 addValue = addValue.replace('!!python/unicode', '') 

486 addValue = addValue.replace('"None"', 'null') 

487 addValue = addValue.replace('"null"', 'null') 

488 # log.debug(addValue) 

489 

490 if returnInsertOnly == True: 

491 return addValue 

492 

493 message = "" 

494 try: 

495 # log.debug('adding new data to the %s table; query: %s' % 

496 # (dbTableName, addValue))" 

497 writequery( 

498 log=log, 

499 sqlQuery=addValue, 

500 dbConn=dbConn 

501 ) 

502 

503 except Exception as e: 

504 message = "could not add new data added to the table '" + \ 

505 dbTableName + "' : " + str(e) + '\n' 

506 log.error(message) 

507 print(message) 

508 raise Exception 

509 

510 log.debug('completed the ``convert_dictionary_to_mysql_table`` function') 

511 return None, None