Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save dnwk/bd1671ab4d79307809229b97cb89e5b7 to your computer and use it in GitHub Desktop.
Save dnwk/bd1671ab4d79307809229b97cb89e5b7 to your computer and use it in GitHub Desktop.
Python script to download urls in a csv file
#!/usr/bin/python
import os
import sys
import urllib
import csv
try:
filename = sys.argv[1]
url_name = sys.argv[2]
except:
print "\nERROR: Please specify filename and url column name to download\n"
print "Usage:"
print " $ picodash_export_url_download.py data.csv image_url\n"
print "- First param should be the csv file path"
print "- Second param should be the column name that has image urls to download\n"
sys.exit(0)
# open csv file to read
with open(filename, 'r') as csvfile:
csv_reader = csv.reader(csvfile)
# iterate on all rows in csv
for row_index,row in enumerate(csv_reader):
# find the url column name to download in first row
if row_index == 0:
IMAGE_URL_COL_NUM = None
for col_index,col in enumerate(row):
if col == url_name:
IMAGE_URL_COL_NUM = col_index
if IMAGE_URL_COL_NUM is None:
print "\nERROR: url column name '"+url_name+"' not found, available options:"
for col_index,col in enumerate(row):
print " " + col
print "\nUsage:"
print " $ picodash_export_url_download.py data.csv image_url\n"
sys.exit(0)
continue
# check if we have an image URL and download in rows > 1
image_urls = row[IMAGE_URL_COL_NUM]
image_urls = image_urls.split('\n')
#print image_urls
for image_url in image_urls:
if image_url != '' and image_url != "\n":
# row 0 is the row where is filename and next line I hard code .jpg
date = row[0]
image_filename = date + ".jpg"
directory = filename.split('.csv')[0] + "-" + url_name
if not os.path.exists(directory):
os.makedirs(directory)
try:
urllib.urlretrieve(image_url, directory+'/'+image_filename)
print "["+str(row_index)+"] Image saved: " + image_filename
except:
# second attempt to download if failed
try:
urllib.urlretrieve(image_url, directory+'/'+image_filename)
print "["+str(row_index)+"] Image saved: " + image_filename
except:
print "["+str(row_index)+"] Could not download url: " + image_url
else:
print "["+str(row_index)+"] No " + url_name
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment