Skip to content

Instantly share code, notes, and snippets.

@MaxDragonheart
Created April 20, 2020 07:45
Show Gist options
  • Save MaxDragonheart/8ec3f59f458686af1abe827114a0ea57 to your computer and use it in GitHub Desktop.
Save MaxDragonheart/8ec3f59f458686af1abe827114a0ea57 to your computer and use it in GitHub Desktop.
Script for download the list of public urls from a website
from datetime import datetime
from urllib.request import urlopen
import re
########################Python 3.6.7
def choice_one():
'''
This function write the urls into a txt file
'''
print('Give a name at your file. The file will have extension \'.txt\':')
file = open(str(input()+'.txt'), 'a+')
file.write('\n>>>>>URLS REPORT\n')
file.write('In this file there are the urls from ( '+ url + ' )\n')
file.write('Number of urls is '+ str(len(list)) + '\n')
file.write('Process start: ' + str(processing_time_start) + '\n')
file.write('Process duration: ' + str(elaboration_time) + '\n')
file.write('\n###############\n\n')
for link in list:
file.write(str(link+'\n'))
# file.write(str(links))
file.write('\n###############\n\n')
file.write('\nEND REPORT\n')
file.close()
print('\n\nA file with all datas was created')
print('\nMy job is end.\nSe you soon! :)\n')
print('\nHello!\nType the url: ')
url = str(input())
processing_time_start = datetime.now()
print('\nUrls in download...')
#connect to a URL
website = urlopen(url)
#read html code
html = website.read()
html = html.decode('ISO-8859-1')
#use re.findall to get all the links
links = re.findall('"((http|ftp)s?://.*?)"', html)
#take the first element of every tuple and put it into a sorted list without duplicates
list = sorted(set([x[0] for x in links]))
print('\nDone!\nSee below! ;)\n\n')
for link in list:
print(link+'\n')
print('################# List end here')
print('################# The list contain '+ str(len(list)) +' elements\n')
processing_time_end = datetime.now()
elaboration_time = processing_time_end - processing_time_start
print('\nI can print all urls into a file. Do you want that I do this?\n Type 1 for Yes, 0 for No')
print('\nYour choice: ')
fork = int(input())
if fork == 0:
print('\nMy job is end.\nProcess duration: ' + str(elaboration_time) + '\n\nSe you soon! :)\n')
elif fork == 1:
choice_one()
else:
print('\nERROR: You can type 0 or 1 only! :(\n')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment