Navigation Menu

Skip to content

Instantly share code, notes, and snippets.

@loisaidasam
Last active April 16, 2018 10:00
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save loisaidasam/4983518 to your computer and use it in GitHub Desktop.
Save loisaidasam/4983518 to your computer and use it in GitHub Desktop.
Foursquare Miner! Mine the venues tips database for some specific keywords and save them to a csv file. You can then use OpenRefine (https://github.com/OpenRefine/OpenRefine) to clean up the data (if you want).
'''Mine foursquare for tips and save the results to a csv
Uses this (deprecated) API endpoint:
https://developer.foursquare.com/docs/tips/search
'''
import csv
import datetime
import json
import random
import time
import urllib2
import urllib
# A list of lat/lon points that mark your boundary
CORNERS = (
(46.092043, 14.451485),
(46.084186,14.579544),
(46.0158, 14.448738),
(46.014847,14.577827),
)
'''Ours (Ljubljana) look like this:
46.092043,14.451485 46.084186,14.579544
46.0158, 14.448738 46.014847,14.577827
'''
# Really dumb strategy for finding a queryable area
LAT_MIN = min(x[0] for x in CORNERS)
LAT_MAX = max(x[0] for x in CORNERS)
LON_MIN = min(x[1] for x in CORNERS)
LON_MAX = max(x[1] for x in CORNERS)
# What queries you want to hit Foursquare with
QUERIES = ('wifi', 'wi-fi', 'wireless')
# How long to sleep between requests
SLEEP_SECS = 5
CLIENT_ID = 'YOUR CLIENT ID'
CLIENT_SECRET = 'YOUR CLIENT SECRET'
def get_result(lat, lon, query):
params = urllib.urlencode({
'll': '%s,%s' % (lat, lon),
'client_id': CLIENT_ID,
'client_secret': CLIENT_SECRET,
'query': query,
'limit': 500,
})
url = "https://api.foursquare.com/v2/tips/search?%s" % params
print "searching for %s near %s, %s..." % (query, lat, lon)
result = json.loads(urllib2.urlopen(url).read())
print "found %s results!" % len(result['response']['tips'])
return result
def choose_lat_lon():
lat = random.uniform(LAT_MIN, LAT_MAX)
lon = random.uniform(LON_MIN, LON_MAX)
return (lat, lon)
def convert_tip_data(original_data):
converted_data = []
for item in original_data:
if isinstance(item, unicode):
item = item.encode('utf-8')
converted_data.append(item)
return converted_data
def main():
fp_write = open('data.csv', 'a')
writer = csv.writer(fp_write)
writer.writerow(['tip_id', 'created_at', 'text', 'venue_id', 'venue_lat', 'venue_lon', 'venue_name', 'venue_category'])
tip_ids = []
while True:
try:
for query in QUERIES:
lat, lon = choose_lat_lon()
data = get_result(lat, lon, query)
for tip_number, tip in enumerate(data['response']['tips']):
#print "tip #%s)" % tip_number
if tip['id'] in tip_ids:
continue
category = len(tip['venue']['categories']) and tip['venue']['categories'][0]['name'] or ''
row_data = [tip['id'], tip['createdAt'], tip['text'], tip['venue']['id'], tip['venue']['location']['lat'], tip['venue']['location']['lng'], tip['venue']['name'], category]
converted_data = convert_tip_data(row_data)
writer.writerow(converted_data)
tip_ids.append(tip['id'])
time.sleep(SLEEP_SECS)
# Catch ctrl-c
except KeyboardInterrupt:
break
fp_write.close()
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment