Last active
April 16, 2018 10:00
-
-
Save loisaidasam/4983518 to your computer and use it in GitHub Desktop.
Foursquare Miner! Mine the venues tips database for some specific keywords and save them to a csv file. You can then use OpenRefine (https://github.com/OpenRefine/OpenRefine) to clean up the data (if you want).
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
'''Mine foursquare for tips and save the results to a csv | |
Uses this (deprecated) API endpoint: | |
https://developer.foursquare.com/docs/tips/search | |
''' | |
import csv | |
import datetime | |
import json | |
import random | |
import time | |
import urllib2 | |
import urllib | |
# A list of lat/lon points that mark your boundary | |
CORNERS = ( | |
(46.092043, 14.451485), | |
(46.084186,14.579544), | |
(46.0158, 14.448738), | |
(46.014847,14.577827), | |
) | |
'''Ours (Ljubljana) look like this: | |
46.092043,14.451485 46.084186,14.579544 | |
46.0158, 14.448738 46.014847,14.577827 | |
''' | |
# Really dumb strategy for finding a queryable area | |
LAT_MIN = min(x[0] for x in CORNERS) | |
LAT_MAX = max(x[0] for x in CORNERS) | |
LON_MIN = min(x[1] for x in CORNERS) | |
LON_MAX = max(x[1] for x in CORNERS) | |
# What queries you want to hit Foursquare with | |
QUERIES = ('wifi', 'wi-fi', 'wireless') | |
# How long to sleep between requests | |
SLEEP_SECS = 5 | |
CLIENT_ID = 'YOUR CLIENT ID' | |
CLIENT_SECRET = 'YOUR CLIENT SECRET' | |
def get_result(lat, lon, query): | |
params = urllib.urlencode({ | |
'll': '%s,%s' % (lat, lon), | |
'client_id': CLIENT_ID, | |
'client_secret': CLIENT_SECRET, | |
'query': query, | |
'limit': 500, | |
}) | |
url = "https://api.foursquare.com/v2/tips/search?%s" % params | |
print "searching for %s near %s, %s..." % (query, lat, lon) | |
result = json.loads(urllib2.urlopen(url).read()) | |
print "found %s results!" % len(result['response']['tips']) | |
return result | |
def choose_lat_lon(): | |
lat = random.uniform(LAT_MIN, LAT_MAX) | |
lon = random.uniform(LON_MIN, LON_MAX) | |
return (lat, lon) | |
def convert_tip_data(original_data): | |
converted_data = [] | |
for item in original_data: | |
if isinstance(item, unicode): | |
item = item.encode('utf-8') | |
converted_data.append(item) | |
return converted_data | |
def main(): | |
fp_write = open('data.csv', 'a') | |
writer = csv.writer(fp_write) | |
writer.writerow(['tip_id', 'created_at', 'text', 'venue_id', 'venue_lat', 'venue_lon', 'venue_name', 'venue_category']) | |
tip_ids = [] | |
while True: | |
try: | |
for query in QUERIES: | |
lat, lon = choose_lat_lon() | |
data = get_result(lat, lon, query) | |
for tip_number, tip in enumerate(data['response']['tips']): | |
#print "tip #%s)" % tip_number | |
if tip['id'] in tip_ids: | |
continue | |
category = len(tip['venue']['categories']) and tip['venue']['categories'][0]['name'] or '' | |
row_data = [tip['id'], tip['createdAt'], tip['text'], tip['venue']['id'], tip['venue']['location']['lat'], tip['venue']['location']['lng'], tip['venue']['name'], category] | |
converted_data = convert_tip_data(row_data) | |
writer.writerow(converted_data) | |
tip_ids.append(tip['id']) | |
time.sleep(SLEEP_SECS) | |
# Catch ctrl-c | |
except KeyboardInterrupt: | |
break | |
fp_write.close() | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment