Skip to content

Instantly share code, notes, and snippets.

@mapmeld
Created November 28, 2012 15:30
Show Gist options
  • Save mapmeld/4161980 to your computer and use it in GitHub Desktop.
Save mapmeld/4161980 to your computer and use it in GitHub Desktop.
Scrape Instagram geo-photos with a certain tag and date range
# InstaScan.py
# prints a CSV of all geolocated Instagram photos with a certain tag between dates
import json
import urllib
import datetime
createdate = datetime.datetime.now()
latestprint = datetime.datetime(2012, 11, 11) # Nov 11, 2012
earliestprint = datetime.datetime(2012, 10, 28) # Oct 28, 2012
access_token = "143532409.1fb234f.9abac8ccd7324f6cbc33fb6e2a14a4ea"
max_id = ""
tagname = "sandy"
csvout = open('sandygram.csv', 'w')
print "ID, CREATED_TIME, LATITUDE, LONGITUDE, INSTAGRAM URL, PHOTO URL"
csvout.write("ID,CREATED_TIME,LATITUDE,LONGITUDE,INSTAGRAM_URL,PHOTO_URL\n")
while(createdate > earliestprint):
apiout = urllib.urlopen("https://api.instagram.com/v1/tags/" + tagname + "/media/recent?max_id=" + max_id + "&access_token=" + access_token).read()
photopage = None
try:
photopage = json.loads( apiout )
except:
print apiout
continue
photos = photopage["data"]
for photo in photos:
createdate = datetime.datetime.fromtimestamp(int(photo["created_time"]) )
#print createdate
if(createdate < latestprint and createdate > earliestprint):
# inside timestamp bounds
if((photo.has_key("location") == True) and (photo["location"] is not None) and (photo["location"].has_key("latitude") == True)):
# has a location
latitude = str(photo["location"]["latitude"])
longitude = str(photo["location"]["longitude"])
# print out CSV format
print "\"" + (photo["id"] or "") + "\"," + (photo["created_time"] or "") + "000," + latitude + "," + longitude + ",\"" + (photo["link"] or "") + "\",\"" + (photo["images"]["standard_resolution"]["url"] or "") + "\""
csvout.write("\"" + (photo["id"] or "") + "\"," + (photo["created_time"] or "") + "000," + latitude + "," + longitude + ",\"" + (photo["link"] or "") + "\",\"" + (photo["images"]["standard_resolution"]["url"] or "") + "\"\n")
max_id = photopage["pagination"]["next_max_tag_id"]
print "max_id = " + max_id
createdate = datetime.datetime.fromtimestamp( int( int( max_id ) / 1000 ) )
@zachwill
Copy link

zachwill commented Dec 2, 2012

@remmi11
Copy link

remmi11 commented Aug 7, 2017

I'm receiving: KeyError: 'next_max_id' - Instagram
What am I missing?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment