Coverage for fundamentals/download/extract_filename_from_url.py : 0%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1#!/usr/local/bin/python
2# encoding: utf-8
3"""
4*Try and extract the name of the document located at the given URL*
6:Author:
7 David Young
8"""
9from builtins import str
10import sys
11import os
12os.environ['TERM'] = 'vt100'
13from fundamentals import tools
15def extract_filename_from_url(log, url):
16 """
17 *get the filename from a URL.*
19 *Will return 'untitled.html', if no filename is found.*
21 **Key Arguments**
23 - ``url`` -- the url to extract filename from
26 Returns:
27 - ``filename`` -- the filename
29 **Usage**
31 ```python
32 from fundamentals.download import extract_filename_from_url
33 name = extract_filename_from_url(
34 log=log,
35 url="https://en.wikipedia.org/wiki/Docstring"
36 )
37 print name
38 # OUT: Docstring.html
39 ```
41 """
42 import re
43 # EXTRACT THE FILENAME FROM THE URL
44 try:
45 log.debug("extracting filename from url " + url)
46 reEoURL = re.compile('([\w\.\_\-]*)$')
47 filename = reEoURL.findall(url)[0]
48 # log.debug(filename)
49 if(len(filename) == 0):
50 filename = 'untitled.html'
51 if not (re.search('\.', filename)):
52 filename = filename + '.html'
53 except Exception as e:
54 filename = None
55 # print url
56 log.warning("could not extracting filename from url : " + str(e) + "\n")
58 return filename