Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1#!/usr/local/bin/python 

2# encoding: utf-8 

3""" 

4*Try and extract the name of the document located at the given URL* 

5 

6:Author: 

7 David Young 

8""" 

9from builtins import str 

10import sys 

11import os 

12os.environ['TERM'] = 'vt100' 

13from fundamentals import tools 

14 

15def extract_filename_from_url(log, url): 

16 """ 

17 *get the filename from a URL.* 

18 

19 *Will return 'untitled.html', if no filename is found.* 

20 

21 **Key Arguments** 

22 

23 - ``url`` -- the url to extract filename from 

24  

25 

26 Returns: 

27 - ``filename`` -- the filename 

28 

29 **Usage** 

30 

31 ```python 

32 from fundamentals.download import extract_filename_from_url 

33 name = extract_filename_from_url( 

34 log=log, 

35 url="https://en.wikipedia.org/wiki/Docstring" 

36 ) 

37 print name 

38 # OUT: Docstring.html 

39 ``` 

40  

41 """ 

42 import re 

43 # EXTRACT THE FILENAME FROM THE URL 

44 try: 

45 log.debug("extracting filename from url " + url) 

46 reEoURL = re.compile('([\w\.\_\-]*)$') 

47 filename = reEoURL.findall(url)[0] 

48 # log.debug(filename) 

49 if(len(filename) == 0): 

50 filename = 'untitled.html' 

51 if not (re.search('\.', filename)): 

52 filename = filename + '.html' 

53 except Exception as e: 

54 filename = None 

55 # print url 

56 log.warning("could not extracting filename from url : " + str(e) + "\n") 

57 

58 return filename