Skip to content

Instantly share code, notes, and snippets.

@manugarri
Created July 1, 2014 23:25
Show Gist options
  • Save manugarri/59b7a0e12bd5b4dafff0 to your computer and use it in GitHub Desktop.
Save manugarri/59b7a0e12bd5b4dafff0 to your computer and use it in GitHub Desktop.
Fuck Boe (requires python and requests library)
import requests
def get_file(url):
response = requests.get(url)
filename = url.split('/')[-1]
print url
print('SAVING FILE {}'.format(filename))
with open(filename, 'wb') as file:
file.write(response.content)
file.close()
def get_url(row):
try:
row = row.replace('Disallow: ','')
row = 'http://www.boe.es' + row
return row
except exception as e:
print e,' ', row
def get_url(row):
try:
row = row.replace('Disallow: ','')
row = 'http://www.boe.es' + row
return row
except exception as e:
print e,' ', row
def download_interesting_file(row):
if not 'pdf' in row:
return None
try:
url = get_url(row)
get_file(url)
except Exception as e:
print('ERROR', row)
print e
def fuck_boe():
amazing_url = 'http://www.boe.es/robots.txt'
links = requests.get(amazing_url)
links = links.content.splitlines()
map(download_interesting_file, links[:100])
if __name__ == '__main__':
fuck_boe()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment