ThomasG77/get_ortho_nantes.py

## get_ortho_nantes.py
#!/usr/bin/env python
# -*- coding: UTF-8 -*-

import requests, re, os

def download_file(url, fileName=None):
    def getFileName(url, req):
        h = req.headers
        file = None;
        if h.has_key("content-disposition") and h.get("content-disposition")!= None and 'filename=' in h.get("content-disposition"):
            file = re.findall("filename=(\S+)", h.get("content-disposition"))[-1]
            file = file.strip("\"'")
            if "/" in file:
                file = file.split("/")[-1]
        else:
            file = url.split("/")[-1]
        return file
    try:
        r = requests.get(url)
        fileName = getFileName(url, r)
        if fileName == None:
            print "You must set a fileName, unable to auto retrieve it"
        with open(fileName, 'wb') as f:
            f.write(r.content)
        return fileName
    except:
        print "You got an error"
        pass

def extract(zipfilepath, extractiondir):
    """"Extract files and dir from zip from zipfilepath to extractiondir
    """
    import zipfile
    zip = zipfile.ZipFile(zipfilepath)
    zip.extractall(path=extractiondir)


# Path for downloading
download = os.getcwd()
os.chdir(download)

# Nantes csv list
url_csv_ortho_nantes = "https://gitorious.org/sharedscrapers/orthonantes/blobs/raw/master/data/orthonantes.csv"
file_ortho = download_file(url_csv_ortho_nantes)

import csv
# Now import csv file content
f = open(file_ortho, "rb")
reader = csv.reader(f, delimiter=";")

urls = []
for index, value in enumerate(reader):
    # Escape first line to retrieve url for downloading
    if index!= 0:
        ville, url, abscisse, ordonnee, empty = value
        urls.append(url)

# Delete doubles urls
urls = list(set(urls))

for url in urls:
    filezip = download_file(url)
    extract(filezip, ".")
    # Delete original file after you dezip
    os.remove(filezip)
	#!/usr/bin/env python
	# -- coding: UTF-8 --

	import requests, re, os

	def download_file(url, fileName=None):
	def getFileName(url, req):
	h = req.headers
	file = None;
	if h.has_key("content-disposition") and h.get("content-disposition")!= None and 'filename=' in h.get("content-disposition"):
	file = re.findall("filename=(\S+)", h.get("content-disposition"))[-1]
	file = file.strip("\"'")
	if "/" in file:
	file = file.split("/")[-1]
	else:
	file = url.split("/")[-1]
	return file
	try:
	r = requests.get(url)
	fileName = getFileName(url, r)
	if fileName == None:
	print "You must set a fileName, unable to auto retrieve it"
	with open(fileName, 'wb') as f:
	f.write(r.content)
	return fileName
	except:
	print "You got an error"
	pass

	def extract(zipfilepath, extractiondir):
	""""Extract files and dir from zip from zipfilepath to extractiondir
	"""
	import zipfile
	zip = zipfile.ZipFile(zipfilepath)
	zip.extractall(path=extractiondir)


	# Path for downloading
	download = os.getcwd()
	os.chdir(download)

	# Nantes csv list
	url_csv_ortho_nantes = "https://gitorious.org/sharedscrapers/orthonantes/blobs/raw/master/data/orthonantes.csv"
	file_ortho = download_file(url_csv_ortho_nantes)

	import csv
	# Now import csv file content
	f = open(file_ortho, "rb")
	reader = csv.reader(f, delimiter=";")

	urls = []
	for index, value in enumerate(reader):
	# Escape first line to retrieve url for downloading
	if index!= 0:
	ville, url, abscisse, ordonnee, empty = value
	urls.append(url)

	# Delete doubles urls
	urls = list(set(urls))

	for url in urls:
	filezip = download_file(url)
	extract(filezip, ".")
	# Delete original file after you dezip
	os.remove(filezip)