mapmeld/InstaScan.py

## InstaScan.py
# InstaScan.py
# prints a CSV of all geolocated Instagram photos with a certain tag between dates

import json
import urllib
import datetime

createdate = datetime.datetime.now()
latestprint = datetime.datetime(2012, 11, 11) # Nov 11, 2012
earliestprint = datetime.datetime(2012, 10, 28) # Oct 28, 2012
access_token = "143532409.1fb234f.9abac8ccd7324f6cbc33fb6e2a14a4ea"
max_id = ""
tagname = "sandy"
csvout = open('sandygram.csv', 'w')

print "ID, CREATED_TIME, LATITUDE, LONGITUDE, INSTAGRAM URL, PHOTO URL"
csvout.write("ID,CREATED_TIME,LATITUDE,LONGITUDE,INSTAGRAM_URL,PHOTO_URL\n")

while(createdate > earliestprint):
	apiout = urllib.urlopen("https://api.instagram.com/v1/tags/" + tagname + "/media/recent?max_id=" + max_id + "&access_token=" + access_token).read()

	photopage = None
	try:
		photopage = json.loads( apiout )
	except:
		print apiout
		continue

	photos = photopage["data"]

	for photo in photos:
		createdate = datetime.datetime.fromtimestamp(int(photo["created_time"]) )
		#print createdate
		if(createdate < latestprint and createdate > earliestprint):
			# inside timestamp bounds
			if((photo.has_key("location") == True) and (photo["location"] is not None) and (photo["location"].has_key("latitude") == True)):

				# has a location
				latitude = str(photo["location"]["latitude"])
				longitude = str(photo["location"]["longitude"])

				# print out CSV format
				print "\"" + (photo["id"] or "") + "\"," + (photo["created_time"] or "") + "000," + latitude + "," + longitude + ",\"" + (photo["link"] or "") + "\",\"" + (photo["images"]["standard_resolution"]["url"]  or "") + "\""
				csvout.write("\"" + (photo["id"] or "") + "\"," + (photo["created_time"] or "") + "000," + latitude + "," + longitude + ",\"" + (photo["link"] or "") + "\",\"" + (photo["images"]["standard_resolution"]["url"]  or "") + "\"\n")

	max_id = photopage["pagination"]["next_max_tag_id"]
	print "max_id = " + max_id
	createdate = datetime.datetime.fromtimestamp( int( int( max_id ) / 1000 ) )
	# InstaScan.py
	# prints a CSV of all geolocated Instagram photos with a certain tag between dates

	import json
	import urllib
	import datetime

	createdate = datetime.datetime.now()
	latestprint = datetime.datetime(2012, 11, 11) # Nov 11, 2012
	earliestprint = datetime.datetime(2012, 10, 28) # Oct 28, 2012
	access_token = "143532409.1fb234f.9abac8ccd7324f6cbc33fb6e2a14a4ea"
	max_id = ""
	tagname = "sandy"
	csvout = open('sandygram.csv', 'w')

	print "ID, CREATED_TIME, LATITUDE, LONGITUDE, INSTAGRAM URL, PHOTO URL"
	csvout.write("ID,CREATED_TIME,LATITUDE,LONGITUDE,INSTAGRAM_URL,PHOTO_URL\n")

	while(createdate > earliestprint):
	apiout = urllib.urlopen("https://api.instagram.com/v1/tags/" + tagname + "/media/recent?max_id=" + max_id + "&access_token=" + access_token).read()

	photopage = None
	try:
	photopage = json.loads( apiout )
	except:
	print apiout
	continue

	photos = photopage["data"]

	for photo in photos:
	createdate = datetime.datetime.fromtimestamp(int(photo["created_time"]) )
	#print createdate
	if(createdate < latestprint and createdate > earliestprint):
	# inside timestamp bounds
	if((photo.has_key("location") == True) and (photo["location"] is not None) and (photo["location"].has_key("latitude") == True)):

	# has a location
	latitude = str(photo["location"]["latitude"])
	longitude = str(photo["location"]["longitude"])

	# print out CSV format
	print "\"" + (photo["id"] or "") + "\"," + (photo["created_time"] or "") + "000," + latitude + "," + longitude + ",\"" + (photo["link"] or "") + "\",\"" + (photo["images"]["standard_resolution"]["url"] or "") + "\""
	csvout.write("\"" + (photo["id"] or "") + "\"," + (photo["created_time"] or "") + "000," + latitude + "," + longitude + ",\"" + (photo["link"] or "") + "\",\"" + (photo["images"]["standard_resolution"]["url"] or "") + "\"\n")

	max_id = photopage["pagination"]["next_max_tag_id"]
	print "max_id = " + max_id
	createdate = datetime.datetime.fromtimestamp( int( int( max_id ) / 1000 ) )