Skip to content

Instantly share code, notes, and snippets.

@Kalli
Last active August 29, 2015 14:07
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Kalli/790e7dc52953383838d5 to your computer and use it in GitHub Desktop.
Save Kalli/790e7dc52953383838d5 to your computer and use it in GitHub Desktop.
Find Facebook newsfeed posts related to the 10-influential-records meme and parse the record and artist titles from them
# -*- coding: utf-8 -*-
import facebook
import datetime
import urlparse
import json
import traceback
import time
import re
# get an auth token with feed reading permissions from https://developers.facebook.com/tools/explorer/
graph = facebook.GraphAPI("$AUTH_TOKEN")
# set up dates, 15th of september was roughly when this trend reignited in Iceland
startdate = datetime.datetime.strptime('2014-09-15', "%Y-%m-%d").date()
postdate = datetime.datetime.now().date()
f = open("/fbdata.json", "w")
def post_match(post):
keywords = [u"plötu", "influential albums", u"breiðskífur", "geisladiskar"]
if any(word in post["message"] for word in keywords):
return True
if post["message"].count("\n") > 9 and post["message"].count("-") > 9:
return True
if len(re.findall("\n\d+(\.|\)|,)", post["message"])) > 10:
return True
else:
return False
# set the time offset to the current time
until = int(time.time())
data = []
try:
while postdate > startdate:
print postdate
# get the 25 newsfeed posts leading up til the date
feed = graph.get_object("me/home", **{"until":until})
# parse the feed data we've loaded
for post in feed["data"]:
if "message" in post and post_match(post):
print "found record related post from " + post["from"]["name"]
entry = {"name": post["from"]["name"], "message": post["message"]}
data.append(entry)
# get the parameters for the next round of data
until = urlparse.parse_qs(urlparse.urlparse(feed["paging"]["next"]).query)["until"][0]
postdate = datetime.datetime.strptime(feed["data"][0]["created_time"][:10], "%Y-%m-%d").date()
except Exception:
print "Last data fetched: "+str(until)
print(traceback.format_exc())
f.write(json.dumps(data))
import json
import re
albums = []
f = open("/fbdata-parsed.csv", "w")
with open("/fbdata.json") as data_file:
data = json.load(data_file)
for post in data:
for line in post["message"].split("\n"):
if '-' in line:
line = line.replace(u'•', "")
line = re.sub("^\d+(\.|\)|,)", "", line) # remove numbers from start of lines
line = line.title().strip()
l = line.split("-")[0] + "\t" + line.split("-")[1]
l = l.strip() + "\n"
albums.append(l)
for l in sorted(albums):
f.write(l.encode("UTF-8"))
f.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment