/get_spanish_links.py

## get_spanish_links.py
import requests
from lxml import etree

root= 'http://www.agenciatributaria.es'
homepage = '/AEAT.internet/Inicio/La_Agencia_Tributaria/Memorias_y_estadisticas_tributarias/Estadisticas/Comercio_exterior/Datos_estadisticos/Descarga_de_Datos_Estadisticos/Descarga_de_datos_mensuales_maxima_desagregacion_en_Euros__centimos_/Descarga_de_datos_mensuales_maxima_desagregacion_en_Euros__centimos_.shtml?mobileView=false'


def get_contenido_links(root, url):
    if not url[0:4]=='http':
        url = root + url
    html = requests.get(url, 'utf-8').text
    parsed = etree.HTML(html)
    return [link for link in parsed.xpath("//div[@class='contenido']//a")]


def get_years(root, url):
    return [year.attrib['href'] for year in get_contenido_links(root, url) if len(year.text)==4]


for year in get_years(root, homepage):
    for month in get_contenido_links(root, year):
        for zipfile in get_contenido_links(root, month.attrib['href']):
            print root+zipfile.attrib['href']
	import requests
	from lxml import etree

	root= 'http://www.agenciatributaria.es'
	homepage = '/AEAT.internet/Inicio/La_Agencia_Tributaria/Memorias_y_estadisticas_tributarias/Estadisticas/Comercio_exterior/Datos_estadisticos/Descarga_de_Datos_Estadisticos/Descarga_de_datos_mensuales_maxima_desagregacion_en_Euros__centimos_/Descarga_de_datos_mensuales_maxima_desagregacion_en_Euros__centimos_.shtml?mobileView=false'


	def get_contenido_links(root, url):
	if not url[0:4]=='http':
	url = root + url
	html = requests.get(url, 'utf-8').text
	parsed = etree.HTML(html)
	return [link for link in parsed.xpath("//div[@class='contenido']//a")]


	def get_years(root, url):
	return [year.attrib['href'] for year in get_contenido_links(root, url) if len(year.text)==4]


	for year in get_years(root, homepage):
	for month in get_contenido_links(root, year):
	for zipfile in get_contenido_links(root, month.attrib['href']):
	print root+zipfile.attrib['href']