Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1#!/usr/local/bin/python 

2# encoding: utf-8 

3""" 

4*Take key-values from a yaml file including a tablename(s) and add them to a mysql database table* 

5 

6Usage: 

7 yaml2db [-d] -s <pathToSettingsFile> <pathToYaml> 

8 yaml2db [-d] --host=<host> --user=<user> --passwd=<passwd> --dbName=<dbName> <pathToYaml> 

9 

10Options: 

11 

12 pathToYaml path to a single yaml file or directory of yaml files 

13 pathToSettingsFile path to a settings file with logging and database information (yaml file) 

14 --host=<host> the database host 

15 --user=<user> database user 

16 --passwd=<passwd> database user password 

17 --dbName=<dbName> name of the database to add the table content to 

18 

19 -d, --delete delete yaml open(s) once added to datbase 

20 -h, --help show this help message 

21 -v, --version show version 

22 -s, --settings the settings file 

23""" 

24from __future__ import print_function 

25 

26from builtins import object 

27import sys 

28import os 

29import yaml 

30 

31import re 

32import glob 

33import docopt 

34from fundamentals import tools, times 

35from fundamentals.mysql import convert_dictionary_to_mysql_table 

36 

37 

38def main(arguments=None): 

39 """ 

40 The main function used when ``yaml_to_database.py`` when installed as a cl tool 

41 """ 

42 

43 # setup the command-line util settings 

44 su = tools( 

45 arguments=arguments, 

46 docString=__doc__, 

47 logLevel="WARNING", 

48 options_first=False, 

49 projectName=False 

50 ) 

51 arguments, settings, log, dbConn = su.setup() 

52 

53 # unpack remaining cl arguments using `exec` to setup the variable names 

54 # automatically 

55 for arg, val in list(arguments.items()): 

56 if arg[0] == "-": 

57 varname = arg.replace("-", "") + "Flag" 

58 else: 

59 varname = arg.replace("<", "").replace(">", "") 

60 if isinstance(val, str): 

61 exec(varname + " = '%s'" % (val,)) 

62 else: 

63 exec(varname + " = %s" % (val,)) 

64 if arg == "--dbConn": 

65 dbConn = val 

66 log.debug('%s = %s' % (varname, val,)) 

67 

68 if os.path.isfile(pathToYaml): 

69 from fundamentals.mysql import yaml_to_database 

70 # PARSE YAML FILE CONTENTS AND ADD TO DATABASE 

71 yaml2db = yaml_to_database( 

72 log=log, 

73 settings=settings, 

74 dbConn=dbConn 

75 ) 

76 yaml2db.add_yaml_file_content_to_database( 

77 filepath=pathToYaml, 

78 deleteFile=deleteFlag 

79 ) 

80 basename = os.path.basename(pathToYaml) 

81 print("Content of %(basename)s added to database" % locals()) 

82 

83 else: 

84 from fundamentals.mysql import yaml_to_database 

85 yaml2db = yaml_to_database( 

86 log=log, 

87 settings=settings, 

88 dbConn=dbConn, 

89 pathToInputDir=pathToYaml, 

90 deleteFiles=deleteFlag 

91 ) 

92 yaml2db.ingest() 

93 print("Content of %(pathToYaml)s directory added to database" % locals()) 

94 

95 return 

96 

97 

98class yaml_to_database(object): 

99 """ 

100 *Take key-values from yaml files including a tablename(s) and add them to a mysql database table* 

101 

102 **Key Arguments** 

103 

104 - ``log`` -- logger 

105 - ``settings`` -- the settings dictionary 

106 - ``pathToInputDir`` -- path to the directory containing the yaml files that will be added to the database table(s). Default *False* 

107 - ``dbConn`` -- connection to database to add the content to 

108 - ``deleteFiles`` - - delete the yamls files once their content has been added to the database. Default * False* 

109 

110 

111 **Usage** 

112 

113 To setup your logger, settings and database connections, please use the ``fundamentals`` package (`see tutorial here <http://fundamentals.readthedocs.io/en/latest/#tutorial>`_).  

114 

115 To initiate a ``yaml2db`` object, use the following: 

116 

117 ```python 

118 from fundamentals.mysql import yaml_to_database 

119 yaml2db = yaml_to_database( 

120 log=log, 

121 settings=settings, 

122 dbConn=dbConn, 

123 pathToInputDir="/path/to/yaml/directory", 

124 deleteFiles=False 

125 )  

126 ``` 

127 

128 And here's an example of the content in a yaml file that this ``yaml2db`` object can parse: 

129 

130 ```yaml 

131 title: Why you should do most of your text editing in : Sublime Text | Sublime Text Tips 

132 url: http://sublimetexttips.com/why-you-should-do-most-of-your-text-editing-in-sublime-text/?utm_source=drip&utm_medium=email&utm_campaign=editor-proliferation 

133 kind: webpage 

134 subtype: article 

135 table: web_articles,podcasts  

136 ``` 

137 

138 """ 

139 # Initialisation 

140 

141 def __init__( 

142 self, 

143 log, 

144 dbConn, 

145 pathToInputDir=False, 

146 settings=False, 

147 deleteFiles=False 

148 

149 ): 

150 self.log = log 

151 log.debug("instansiating a new 'yaml_to_database' object") 

152 self.settings = settings 

153 self.pathToInputDir = pathToInputDir 

154 self.dbConn = dbConn 

155 self.deleteFiles = deleteFiles 

156 # xt-self-arg-tmpx 

157 

158 return None 

159 

160 def ingest(self): 

161 """ 

162 *ingest the contents of the directory of yaml files into a database* 

163 

164 **Return** 

165 

166 - None 

167 

168 

169 **Usage** 

170 

171 To import an entire directory of yaml files into a database, use the following: 

172 

173 

174 ```python 

175 from fundamentals.mysql import yaml_to_database 

176 yaml2db = yaml_to_database( 

177 log=log, 

178 settings=settings, 

179 dbConn=dbConn, 

180 pathToInputDir="/path/to/yaml/directory", 

181 deleteFiles=False 

182 )  

183 yaml2db.ingest()  

184 ``` 

185 """ 

186 self.log.debug('starting the ``ingest`` method') 

187 

188 for d in os.listdir(self.pathToInputDir): 

189 if os.path.isfile(os.path.join(self.pathToInputDir, d)) and "yaml" in d.lower(): 

190 self.add_yaml_file_content_to_database( 

191 filepath=os.path.join(self.pathToInputDir, d), 

192 deleteFile=self.deleteFiles 

193 ) 

194 

195 self.log.debug('completed the ``ingest`` method') 

196 return None 

197 

198 def add_yaml_file_content_to_database( 

199 self, 

200 filepath, 

201 deleteFile=False 

202 ): 

203 """*given a file to a yaml file, add yaml file content to database* 

204 

205 **Key Arguments** 

206 

207 - ``filepath`` -- the path to the yaml file 

208 - ``deleteFile`` -- delete the yaml file when its content has been added to the database. Default *False* 

209 

210 

211 **Return** 

212 

213 - None 

214 

215 

216 **Usage** 

217 

218 To parse and import the contents of a single yaml file into the database, use the following: 

219 

220 ```python 

221 from fundamentals.mysql import yaml_to_database 

222 # PARSE YAML FILE CONTENTS AND ADD TO DATABASE 

223 yaml2db = yaml_to_database( 

224 log=log, 

225 settings=settings, 

226 dbConn=dbConn 

227 )  

228 yaml2db.add_yaml_file_content_to_database( 

229 filepath=${1:"/path/to/file.yaml"}, 

230 deleteFile=True 

231 ) 

232 ``` 

233 

234 """ 

235 self.log.debug( 

236 'completed the ````add_yaml_file_content_to_database`` method') 

237 

238 import codecs 

239 import requests 

240 import requests.packages.urllib3 

241 requests.packages.urllib3.disable_warnings() 

242 

243 try: 

244 self.log.debug("attempting to open the file %s" % (filepath,)) 

245 readFile = codecs.open(filepath, encoding='utf-8', mode='r') 

246 thisData = readFile.read() 

247 readFile.close() 

248 except IOError as e: 

249 message = 'could not open the file %s' % (filepath,) 

250 self.log.critical(message) 

251 raise IOError(message) 

252 readFile.close() 

253 

254 matchObject = re.finditer( 

255 r'(^|\n)(?P<key>[^\:]*)\:\s(?P<value>.*?)(\n|$)', 

256 thisData, 

257 flags=re.M | re.S # re.S 

258 ) 

259 

260 yamlContent = {} 

261 for match in matchObject: 

262 if match.group("value")[0] == '"' and match.group("value")[-1] == '"': 

263 v = match.group("value")[1:-1] 

264 elif match.group("value")[0] == "'" and match.group("value")[-1] == "'": 

265 v = match.group("value")[1:-1] 

266 else: 

267 v = match.group("value") 

268 yamlContent[match.group("key")] = v 

269 

270 if "table" not in yamlContent: 

271 self.log.warning( 

272 'A table value is need in the yaml content to indicate which database table to add the content to: %(filepath)s' % locals()) 

273 return None 

274 

275 # NOTE THERE MAY BE MORE THAN ONE DATABASE TABLE 

276 dbTablesTmp = yamlContent["table"].split(",") 

277 del yamlContent["table"] 

278 dbTables = [] 

279 dbTables[:] = [d.strip() for d in dbTablesTmp] 

280 

281 # UNSHORTEN URL 

282 try: 

283 r = requests.head(yamlContent["url"], allow_redirects=True) 

284 yamlContent["url"] = r.url 

285 except: 

286 pass 

287 

288 yamlContent["original_yaml_path"] = filepath 

289 

290 if "url" in yamlContent: 

291 uniqueKeyList = ["url"] 

292 else: 

293 uniqueKeyList = [] 

294 

295 for t in dbTables: 

296 convert_dictionary_to_mysql_table( 

297 dbConn=self.dbConn, 

298 log=self.log, 

299 dictionary=yamlContent, 

300 dbTableName=t, 

301 uniqueKeyList=uniqueKeyList, 

302 dateModified=True, 

303 returnInsertOnly=False, 

304 replace=True 

305 ) 

306 if deleteFile: 

307 os.remove(filepath) 

308 

309 self.log.debug( 

310 'completed the ``add_yaml_file_content_to_database`` method') 

311 return None 

312 

313 # use the tab-trigger below for new method 

314 # xt-class-method 

315 

316if __name__ == '__main__': 

317 main()