Skip to content

Instantly share code, notes, and snippets.

@tfiers
Last active January 10, 2017 14:23
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save tfiers/11251129 to your computer and use it in GitHub Desktop.
Save tfiers/11251129 to your computer and use it in GitHub Desktop.
Facebook post insight & event attendees tracker
# Periodically downloads facebook events and page post insights data to a SQLite database.
from pytz import timezone
from datetime import datetime
import facebook
import sqlite3
from time import time, sleep # both in seconds
import logging
from pprint import pprint, pformat
import settings as S
# Provides:
# PAGE_ACCESS_TOKEN = str
# DATABASE_FILE = str
# UPDATE_PERIOD = int
# RETRY_PERIOD = int
# TRACKED_EVENTS = ({"name": str, "id": str}, ... )
# EVENTS_TRACKED_RSVP_STATUSES = (str, ...)
# TRACKED_PAGE_POSTS = ({"name": str, "id": str}, ... )
# PAGE_POST_TRACKED_FIELDS = ({"name": str, "metric-name": str}, ..., {"name": str, "metric-name": str, "type-name": str}, ... )
logFormatter = logging.Formatter("%(asctime)s [%(levelname)-5.5s] %(message)s")
rootLogger = logging.getLogger()
rootLogger.setLevel(logging.INFO)
fileHandler = logging.FileHandler("fb_data_fetcher.log")
fileHandler.setFormatter(logFormatter)
rootLogger.addHandler(fileHandler)
consoleHandler = logging.StreamHandler()
consoleHandler.setFormatter(logFormatter)
rootLogger.addHandler(consoleHandler)
# Make sure the recorded timestamp is correct regardless
# of where this script runs.
fmt = "%Y-%m-%d %H:%M:%S"
tz = timezone('Europe/Brussels')
def init_db():
# Creates the database file if it does not exist yet.
conn = sqlite3.connect(S.DATABASE_FILE)
# 'with' automatically releases the resource ('conn.close') "and provides error handling".
with conn:
# conn.execute("CREATE TABLE IF NOT EXISTS EventGoers("++")")
PagePostData_columns = ", ".join(["datetime DATETIME", "post_name TEXT"]+[field["name"]+" INTEGER" for field in S.PAGE_POST_TRACKED_FIELDS])
conn.execute("CREATE TABLE IF NOT EXISTS PagePostData("+PagePostData_columns+");")
conn.execute("CREATE TABLE IF NOT EXISTS EventGoers(time_registered DATETIME, event_name TEXT, name TEXT, uid TEXT, rsvp_status TEXT);")
def fetch_data():
global S
S = reload(S) # Might they have changed.
time = datetime.now(tz).strftime(fmt)
try:
graph = facebook.GraphAPI(S.PAGE_ACCESS_TOKEN)
conn = sqlite3.connect(S.DATABASE_FILE)
with conn:
for post in S.TRACKED_PAGE_POSTS:
data = graph.get_object(post["id"]+"/insights")["data"]
values = []
for field in S.PAGE_POST_TRACKED_FIELDS:
for metric in data:
if field["metric-name"] == metric["name"]:
value_node = metric["values"][0]["value"]
if "type-name" in field:
if field["type-name"] in value_node:
values.append(str(value_node[field["type-name"]]))
else:
values.append("0")
else:
values.append(str(value_node))
q = "INSERT INTO PagePostData VALUES (strftime('{}', '{}'), '{}', {});".format(fmt, time, post["name"], ", ".join(values))
conn.execute(q)
new_event_entries = 0
for event in S.TRACKED_EVENTS:
for rsvp_status in S.EVENTS_TRACKED_RSVP_STATUSES:
# Note that we do not page through the data, as all results seem to be returned on one page.
# The docs discourage doing this, but fuck the police.
data = graph.get_object("{}/{}".format(event["id"], rsvp_status))["data"]
for entry in data:
# Only insert a new record if, for the current event, the user is new or
# if the user's last status is different from the currently fetched status.
q = "SELECT rsvp_status FROM EventGoers WHERE event_name='{}' AND uid='{}' ORDER BY time_registered DESC LIMIT 1".format(
event["name"], entry["id"])
result = conn.execute(q).fetchone()
if result is None or result[0] != rsvp_status:
q = "INSERT INTO EventGoers VALUES (strftime(?, ?), ?, ?, ?, ?);"
conn.execute(q, (fmt, time, event["name"], entry["name"], entry["id"], rsvp_status))
new_event_entries += 1
conn.commit()
logging.info("{} - {}".format(time, "okey"))
logging.info("new event entries: {}".format(new_event_entries))
return "great success"
except Exception, e:
logging.exception("Something awful happened!")
return False
def fetch_periodically():
# Fetch data. Wait. Repeat.
while True:
t0 = time()
elapsed_time = 0
retries = 0
while elapsed_time < S.UPDATE_PERIOD:
if fetch_data() == "great success":
elapsed_time = time() - t0
logging.info("retries: "+str(retries))
logging.info("elapsed time: {:.3f}\n".format(elapsed_time))
# Wait until next update.
break
else:
# Wait a short bit and then retry.
sleep(S.RETRY_PERIOD)
retries += 1
elapsed_time = time() - t0
if elapsed_time < S.UPDATE_PERIOD: # Testing is so vital. (Passing a negative number to 'sleep()' makes it sleep a very long time.)
sleep(S.UPDATE_PERIOD - elapsed_time)
init_db()
fetch_periodically()
# For playing with data. Normal program flow does not reach this.
conn = sqlite3.connect(S.DATABASE_FILE)
# conn.row_factory = sqlite3.Row # Return results as dictionaries.
c = conn.cursor()
#
# GET ETERNAL PAGE ACCESS TOKEN
#
# As per here:
# https://developers.facebook.com/docs/facebook-login/access-tokens/#extendingpagetokens
# and here:
# https://developers.facebook.com/docs/facebook-login/access-tokens/#pagetokens
#
# First get a long lived access token, with the 'manage_pages' permission.
# Use get_long_lived_access_token.py
import facebook
from pprint import pprint
from datetime import datetime
PAGE_NAME = "Arenbergorkest" # Page you manage for which you desire an eternal access token.
LONG_LIVED_ACCESS_TOKEN = "CAAInC..."
graph = facebook.GraphAPI(LONG_LIVED_ACCESS_TOKEN)
r = graph.get_object("me/accounts")
# pprint(r)
for page in r["data"]:
if page["name"] == PAGE_NAME:
eternal_access_token = page["access_token"]
# Expires: Never
graph = facebook.GraphAPI(eternal_access_token)
print u'\u2713'
# # # # # # # # # #
# Arenbergorkest: #
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# CAAIn... #
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# For ease of use:
def obj(path):
return graph.get_object(path)
def pr(path_or_obj):
if isinstance(path_or_obj, basestring):
pprint(obj(path_or_obj))
else:
pprint(path_or_obj)
# GET LONG LIVED ACCESS TOKEN
#
# As per here:
# https://developers.facebook.com/docs/facebook-login/access-tokens/#extending
#
# This needs to be redone every 2 months.
import requests
from pprint import pprint
from datetime import datetime
# Get a user token from the Graph API Explorer (https://developers.facebook.com/tools/explorer/),
# with the 'read_insights' permission e.g. If you want to get an (eternal) page access token, also grant the
# 'manage_pages' permission.
#
# (Get info about this token - app, user, permissions and expiry - here: https://developers.facebook.com/tools/debug/)
#
short_lived_user_token = "CAAInCjiZAhnQBAIPSpXw5SZB8RZBf6tjab0ZABuJnpKo63399WQZCX76LZA5ZBoPsF8IZBQciSGrY1uFs8pZC8jhZBWiN2Qx6IOFzidFG3WVZB9VQuD8ciaLi33Eux6tZCCJEdebMXaIE6CQAZCcLh2wLkhg8PZAAy6ZB9CIkRagVfxmZBPYn2JJZBfmB8XGswlATPTB1ZCp8ZD"
# Expires in about an hour.
APP_ID = ""
APP_SECRET = ""
r = requests.get("https://graph.facebook.com/oauth/access_token?"+
"grant_type=fb_exchange_token"+
"&client_id="+APP_ID+
"&client_secret="+APP_SECRET+
"&fb_exchange_token="+short_lived_user_token)
pprint(r.text)
long_lived_access_token = r.text.split('=')[1]
# Expires in about 2 months
# Long lived access token issued 2014-04-08 03:10:
#
# CAA...
#
# "Expires 1402188344 (in about 2 months)" (--> 2014-06-08 02:45)
#
# print(datetime.fromtimestamp(int("5183999")).strftime('%Y-%m-%d %H:%M:%S'))
# A never expiring one preferably. See 'get_eternal_page_access_token.py'.
# PAGE_ACCESS_TOKEN = "CAAInCj..."
DATABASE_FILE = "fb_data.db" # SQLite
# In seconds
UPDATE_PERIOD = 15*60
# UPDATE_PERIOD = 5
RETRY_PERIOD = 60
# RETRY_PERIOD = 1
# On insights data update intervals
#
# This page: https://developers.facebook.com/docs/graph-api/reference/insights says:
# "Metrics that are marked with * are available in real-time, i.e., they are refreshed approximately every 15 minutes.
# Most other metrics will update once every 24 hours."
# Page post impressions and engagement are marked *.
# Page impressions and engagement are not marked.
TRACKED_EVENTS = (
{"name": "woe", "id": "653690918014139"},
{"name": "don", "id": "309063299244493"},
{"name": "ie", "id": "635511229861503"}
)
# Note: not the same strings as in the returned field 'rsvp_status' (those have 'unsure' instead of 'maybe' for example). We don't use those.
EVENTS_TRACKED_RSVP_STATUSES = (
"attending",
"maybe",
"declined",
"noreply"
)
TRACKED_PAGE_POSTS = (
{"name": "Bellingen 2014", "id": "16875510075_10153945920955076"},
{"name": "Share Woe", "id": "16875510075_10153973792450076"},
{"name": "Share Don", "id": "16875510075_10153973793850076"},
{"name": "Promo-post", "id": "16875510075_10154026155645076"}
)
# Keys will be used as SQLite column headings.
PAGE_POST_TRACKED_FIELDS = (
{"name": "impressions", "metric-name": "post_impressions"},
{"name": "impressions_unique", "metric-name": "post_impressions_unique"},
{"name": "impressions_organic", "metric-name": "post_impressions_organic"},
{"name": "impressions_organic_unique", "metric-name": "post_impressions_organic_unique"},
{"name": "impressions_viral", "metric-name": "post_impressions_viral"},
{"name": "impressions_viral_unique", "metric-name": "post_impressions_viral_unique"},
{"name": "impressions_fan", "metric-name": "post_impressions_fan"},
{"name": "impressions_fan_unique", "metric-name": "post_impressions_fan_unique"},
{"name": "consumptions", "metric-name": "post_consumptions"},
{"name": "photo_views", "metric-name": "post_consumptions_by_type", "type-name": "photo view"},
{"name": "other_clicks", "metric-name": "post_consumptions_by_type", "type-name": "other clicks"},
{"name": "consumptions_unique", "metric-name": "post_consumptions_unique"},
{"name": "photo_views_unique", "metric-name": "post_consumptions_by_type_unique", "type-name": "photo view"},
{"name": "other_clicks_unique", "metric-name": "post_consumptions_by_type_unique", "type-name": "other clicks"},
# more consumption types?
{"name": "engaged_users", "metric-name": "post_engaged_users"},
{"name": "stories", "metric-name": "post_stories"},
{"name": "stories_by_like", "metric-name": "post_stories_by_action_type", "type-name": "like"},
{"name": "stories_by_share", "metric-name": "post_stories_by_action_type", "type-name": "share"},
{"name": "storytellers", "metric-name": "post_storytellers"},
{"name": "storytellers_by_liking", "metric-name": "post_storytellers_by_action_type", "type-name": "like"},
{"name": "storytellers_by_sharing", "metric-name": "post_storytellers_by_action_type", "type-name": "share"},
# more story/storyteller types?
)
# nog mogelijk te tracken:
# /{photo-id}: likes,comments,shares op event pics
#
-- Export to csv via SQLite Database Browser:
create table promo_post_data as select * from pagepostdata where post_name='Promo-post'
-- Export to csv via SQLite CLI:
sqlite> .header on
sqlite> .output c:/work/dataout.csv
sqlite> .mode csv
sqlite> SELECT * FROM pagepostdata WHERE post_name='Promo-post'
sqlite> .output stdout
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment