Coverage for fundamentals/download/_fetch.py : 0%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1#!/usr/local/bin/python
2# encoding: utf-8
3"""
4*Retrieve an HTML document or file from the web at a given URL*
6:Author:
7 David Young
8"""
9from __future__ import print_function
10from future import standard_library
11standard_library.install_aliases()
12from builtins import str
13import sys
14import os
15os.environ['TERM'] = 'vt100'
16from fundamentals import tools
19def _fetch(url,):
20 """
21 *Retrieve an HTML document or file from the web at a given URL*
23 **Key Arguments**
26 - ``url`` -- the URL of the document or file
28 **Return**
31 - ``url`` -- the URL of the document or file, or None if an error occured
32 - ``body`` -- the text content of the HTML document.
33 """
34 import coloredlogs
35 import logging as log
36 import socket
37 from eventlet import Timeout
38 import urllib
39 import sys
41 # TRY AND DOWNLOAD X TIMES BEFORE QUITING
42 tries = 10
43 count = 1
44 downloaded = False
45 while count < tries and downloaded == False:
46 try:
47 log.debug('downloading ' + url.get_full_url())
48 body = urllib.request.urlopen(url).read()
49 downloaded = True
50 except socket.timeout as e:
51 print("timeout on URL, trying again")
52 count += 1
53 except Exception as e:
54 if "[Errno 60]" in str(e):
55 log.warning('timeout on URL, trying again' % locals())
56 count += 1
57 if "Error 502" in str(e):
58 log.warning('proxy error on URL, trying again' % locals())
59 count += 1
60 else:
61 log.warning(
62 "could not download " + url.get_full_url() + " : " + str(e) + "\n")
63 url = None
64 body = None
65 downloaded = True
67 return url, body