Kalli/getfbdata

## getfbdata
# -*- coding: utf-8 -*-
import facebook
import datetime
import urlparse
import json
import traceback
import time
import re

# get an auth token with feed reading permissions from https://developers.facebook.com/tools/explorer/
graph = facebook.GraphAPI("$AUTH_TOKEN")

# set up dates, 15th of september was roughly when this trend reignited in Iceland
startdate = datetime.datetime.strptime('2014-09-15', "%Y-%m-%d").date()
postdate = datetime.datetime.now().date()

f = open("/fbdata.json", "w")

def post_match(post):
    keywords = [u"plötu", "influential albums", u"breiðskífur", "geisladiskar"]
    if any(word in post["message"] for word in keywords):
        return True
    if post["message"].count("\n") > 9 and post["message"].count("-") > 9:
        return True
    if len(re.findall("\n\d+(\.|\)|,)", post["message"])) > 10:
        return True
    else:
        return False

# set the time offset to the current time
until = int(time.time())

data = []
try:
    while postdate > startdate:
        print postdate
        # get the 25 newsfeed posts leading up til the date
        feed = graph.get_object("me/home", **{"until":until})
        # parse the feed data we've loaded
        for post in feed["data"]:
            if "message" in post and post_match(post):
                print "found record related post from " + post["from"]["name"]
                entry = {"name": post["from"]["name"], "message": post["message"]}
                data.append(entry)
        # get the parameters for the next round of data
        until = urlparse.parse_qs(urlparse.urlparse(feed["paging"]["next"]).query)["until"][0]
        postdate = datetime.datetime.strptime(feed["data"][0]["created_time"][:10], "%Y-%m-%d").date()
except Exception:
    print "Last data fetched: "+str(until)
    print(traceback.format_exc())
f.write(json.dumps(data))

## parsefbdata
import json
import re

albums = []
f = open("/fbdata-parsed.csv", "w")

with open("/fbdata.json") as data_file:
    data = json.load(data_file)
for post in data:
    for line in post["message"].split("\n"):
        if '-' in line:
            line = line.replace(u'•', "")
            line = re.sub("^\d+(\.|\)|,)", "", line) # remove numbers from start of lines
            line = line.title().strip()
            l = line.split("-")[0] + "\t" + line.split("-")[1]
            l = l.strip() + "\n"
            albums.append(l)
for l in sorted(albums):
    f.write(l.encode("UTF-8"))
f.close()
	# -- coding: utf-8 --
	import facebook
	import datetime
	import urlparse
	import json
	import traceback
	import time
	import re

	# get an auth token with feed reading permissions from https://developers.facebook.com/tools/explorer/
	graph = facebook.GraphAPI("$AUTH_TOKEN")

	# set up dates, 15th of september was roughly when this trend reignited in Iceland
	startdate = datetime.datetime.strptime('2014-09-15', "%Y-%m-%d").date()
	postdate = datetime.datetime.now().date()

	f = open("/fbdata.json", "w")

	def post_match(post):
	keywords = [u"plötu", "influential albums", u"breiðskífur", "geisladiskar"]
	if any(word in post["message"] for word in keywords):
	return True
	if post["message"].count("\n") > 9 and post["message"].count("-") > 9:
	return True
	if len(re.findall("\n\d+(\.\|\)\|,)", post["message"])) > 10:
	return True
	else:
	return False

	# set the time offset to the current time
	until = int(time.time())

	data = []
	try:
	while postdate > startdate:
	print postdate
	# get the 25 newsfeed posts leading up til the date
	feed = graph.get_object("me/home", **{"until":until})
	# parse the feed data we've loaded
	for post in feed["data"]:
	if "message" in post and post_match(post):
	print "found record related post from " + post["from"]["name"]
	entry = {"name": post["from"]["name"], "message": post["message"]}
	data.append(entry)
	# get the parameters for the next round of data
	until = urlparse.parse_qs(urlparse.urlparse(feed["paging"]["next"]).query)["until"][0]
	postdate = datetime.datetime.strptime(feed["data"][0]["created_time"][:10], "%Y-%m-%d").date()
	except Exception:
	print "Last data fetched: "+str(until)
	print(traceback.format_exc())
	f.write(json.dumps(data))
	import json
	import re

	albums = []
	f = open("/fbdata-parsed.csv", "w")

	with open("/fbdata.json") as data_file:
	data = json.load(data_file)
	for post in data:
	for line in post["message"].split("\n"):
	if '-' in line:
	line = line.replace(u'•', "")
	line = re.sub("^\d+(\.\|\)\|,)", "", line) # remove numbers from start of lines
	line = line.title().strip()
	l = line.split("-")[0] + "\t" + line.split("-")[1]
	l = l.strip() + "\n"
	albums.append(l)
	for l in sorted(albums):
	f.write(l.encode("UTF-8"))
	f.close()