Coverage for fundamentals/mysql/yaml_to_database.py : 0%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1#!/usr/local/bin/python
2# encoding: utf-8
3"""
4*Take key-values from a yaml file including a tablename(s) and add them to a mysql database table*
6Usage:
7 yaml2db [-d] -s <pathToSettingsFile> <pathToYaml>
8 yaml2db [-d] --host=<host> --user=<user> --passwd=<passwd> --dbName=<dbName> <pathToYaml>
10Options:
12 pathToYaml path to a single yaml file or directory of yaml files
13 pathToSettingsFile path to a settings file with logging and database information (yaml file)
14 --host=<host> the database host
15 --user=<user> database user
16 --passwd=<passwd> database user password
17 --dbName=<dbName> name of the database to add the table content to
19 -d, --delete delete yaml open(s) once added to datbase
20 -h, --help show this help message
21 -v, --version show version
22 -s, --settings the settings file
23"""
24from __future__ import print_function
26from builtins import object
27import sys
28import os
29import yaml
31import re
32import glob
33import docopt
34from fundamentals import tools, times
35from fundamentals.mysql import convert_dictionary_to_mysql_table
38def main(arguments=None):
39 """
40 The main function used when ``yaml_to_database.py`` when installed as a cl tool
41 """
43 # setup the command-line util settings
44 su = tools(
45 arguments=arguments,
46 docString=__doc__,
47 logLevel="WARNING",
48 options_first=False,
49 projectName=False
50 )
51 arguments, settings, log, dbConn = su.setup()
53 # unpack remaining cl arguments using `exec` to setup the variable names
54 # automatically
55 for arg, val in list(arguments.items()):
56 if arg[0] == "-":
57 varname = arg.replace("-", "") + "Flag"
58 else:
59 varname = arg.replace("<", "").replace(">", "")
60 if isinstance(val, str):
61 exec(varname + " = '%s'" % (val,))
62 else:
63 exec(varname + " = %s" % (val,))
64 if arg == "--dbConn":
65 dbConn = val
66 log.debug('%s = %s' % (varname, val,))
68 if os.path.isfile(pathToYaml):
69 from fundamentals.mysql import yaml_to_database
70 # PARSE YAML FILE CONTENTS AND ADD TO DATABASE
71 yaml2db = yaml_to_database(
72 log=log,
73 settings=settings,
74 dbConn=dbConn
75 )
76 yaml2db.add_yaml_file_content_to_database(
77 filepath=pathToYaml,
78 deleteFile=deleteFlag
79 )
80 basename = os.path.basename(pathToYaml)
81 print("Content of %(basename)s added to database" % locals())
83 else:
84 from fundamentals.mysql import yaml_to_database
85 yaml2db = yaml_to_database(
86 log=log,
87 settings=settings,
88 dbConn=dbConn,
89 pathToInputDir=pathToYaml,
90 deleteFiles=deleteFlag
91 )
92 yaml2db.ingest()
93 print("Content of %(pathToYaml)s directory added to database" % locals())
95 return
98class yaml_to_database(object):
99 """
100 *Take key-values from yaml files including a tablename(s) and add them to a mysql database table*
102 **Key Arguments**
104 - ``log`` -- logger
105 - ``settings`` -- the settings dictionary
106 - ``pathToInputDir`` -- path to the directory containing the yaml files that will be added to the database table(s). Default *False*
107 - ``dbConn`` -- connection to database to add the content to
108 - ``deleteFiles`` - - delete the yamls files once their content has been added to the database. Default * False*
111 **Usage**
113 To setup your logger, settings and database connections, please use the ``fundamentals`` package (`see tutorial here <http://fundamentals.readthedocs.io/en/latest/#tutorial>`_).
115 To initiate a ``yaml2db`` object, use the following:
117 ```python
118 from fundamentals.mysql import yaml_to_database
119 yaml2db = yaml_to_database(
120 log=log,
121 settings=settings,
122 dbConn=dbConn,
123 pathToInputDir="/path/to/yaml/directory",
124 deleteFiles=False
125 )
126 ```
128 And here's an example of the content in a yaml file that this ``yaml2db`` object can parse:
130 ```yaml
131 title: Why you should do most of your text editing in : Sublime Text | Sublime Text Tips
132 url: http://sublimetexttips.com/why-you-should-do-most-of-your-text-editing-in-sublime-text/?utm_source=drip&utm_medium=email&utm_campaign=editor-proliferation
133 kind: webpage
134 subtype: article
135 table: web_articles,podcasts
136 ```
138 """
139 # Initialisation
141 def __init__(
142 self,
143 log,
144 dbConn,
145 pathToInputDir=False,
146 settings=False,
147 deleteFiles=False
149 ):
150 self.log = log
151 log.debug("instansiating a new 'yaml_to_database' object")
152 self.settings = settings
153 self.pathToInputDir = pathToInputDir
154 self.dbConn = dbConn
155 self.deleteFiles = deleteFiles
156 # xt-self-arg-tmpx
158 return None
160 def ingest(self):
161 """
162 *ingest the contents of the directory of yaml files into a database*
164 **Return**
166 - None
169 **Usage**
171 To import an entire directory of yaml files into a database, use the following:
174 ```python
175 from fundamentals.mysql import yaml_to_database
176 yaml2db = yaml_to_database(
177 log=log,
178 settings=settings,
179 dbConn=dbConn,
180 pathToInputDir="/path/to/yaml/directory",
181 deleteFiles=False
182 )
183 yaml2db.ingest()
184 ```
185 """
186 self.log.debug('starting the ``ingest`` method')
188 for d in os.listdir(self.pathToInputDir):
189 if os.path.isfile(os.path.join(self.pathToInputDir, d)) and "yaml" in d.lower():
190 self.add_yaml_file_content_to_database(
191 filepath=os.path.join(self.pathToInputDir, d),
192 deleteFile=self.deleteFiles
193 )
195 self.log.debug('completed the ``ingest`` method')
196 return None
198 def add_yaml_file_content_to_database(
199 self,
200 filepath,
201 deleteFile=False
202 ):
203 """*given a file to a yaml file, add yaml file content to database*
205 **Key Arguments**
207 - ``filepath`` -- the path to the yaml file
208 - ``deleteFile`` -- delete the yaml file when its content has been added to the database. Default *False*
211 **Return**
213 - None
216 **Usage**
218 To parse and import the contents of a single yaml file into the database, use the following:
220 ```python
221 from fundamentals.mysql import yaml_to_database
222 # PARSE YAML FILE CONTENTS AND ADD TO DATABASE
223 yaml2db = yaml_to_database(
224 log=log,
225 settings=settings,
226 dbConn=dbConn
227 )
228 yaml2db.add_yaml_file_content_to_database(
229 filepath=${1:"/path/to/file.yaml"},
230 deleteFile=True
231 )
232 ```
234 """
235 self.log.debug(
236 'completed the ````add_yaml_file_content_to_database`` method')
238 import codecs
239 import requests
240 import requests.packages.urllib3
241 requests.packages.urllib3.disable_warnings()
243 try:
244 self.log.debug("attempting to open the file %s" % (filepath,))
245 readFile = codecs.open(filepath, encoding='utf-8', mode='r')
246 thisData = readFile.read()
247 readFile.close()
248 except IOError as e:
249 message = 'could not open the file %s' % (filepath,)
250 self.log.critical(message)
251 raise IOError(message)
252 readFile.close()
254 matchObject = re.finditer(
255 r'(^|\n)(?P<key>[^\:]*)\:\s(?P<value>.*?)(\n|$)',
256 thisData,
257 flags=re.M | re.S # re.S
258 )
260 yamlContent = {}
261 for match in matchObject:
262 if match.group("value")[0] == '"' and match.group("value")[-1] == '"':
263 v = match.group("value")[1:-1]
264 elif match.group("value")[0] == "'" and match.group("value")[-1] == "'":
265 v = match.group("value")[1:-1]
266 else:
267 v = match.group("value")
268 yamlContent[match.group("key")] = v
270 if "table" not in yamlContent:
271 self.log.warning(
272 'A table value is need in the yaml content to indicate which database table to add the content to: %(filepath)s' % locals())
273 return None
275 # NOTE THERE MAY BE MORE THAN ONE DATABASE TABLE
276 dbTablesTmp = yamlContent["table"].split(",")
277 del yamlContent["table"]
278 dbTables = []
279 dbTables[:] = [d.strip() for d in dbTablesTmp]
281 # UNSHORTEN URL
282 try:
283 r = requests.head(yamlContent["url"], allow_redirects=True)
284 yamlContent["url"] = r.url
285 except:
286 pass
288 yamlContent["original_yaml_path"] = filepath
290 if "url" in yamlContent:
291 uniqueKeyList = ["url"]
292 else:
293 uniqueKeyList = []
295 for t in dbTables:
296 convert_dictionary_to_mysql_table(
297 dbConn=self.dbConn,
298 log=self.log,
299 dictionary=yamlContent,
300 dbTableName=t,
301 uniqueKeyList=uniqueKeyList,
302 dateModified=True,
303 returnInsertOnly=False,
304 replace=True
305 )
306 if deleteFile:
307 os.remove(filepath)
309 self.log.debug(
310 'completed the ``add_yaml_file_content_to_database`` method')
311 return None
313 # use the tab-trigger below for new method
314 # xt-class-method
316if __name__ == '__main__':
317 main()