Skip to content

Instantly share code, notes, and snippets.

@bluechoochoo
Forked from mediaczar/sitemap_share_counter.py
Last active August 29, 2015 13:56
Show Gist options
  • Save bluechoochoo/9123709 to your computer and use it in GitHub Desktop.
Save bluechoochoo/9123709 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
# -*- coding: utf-8 -*-
######### count up twitter, FB, Pinterest shares + comments on your/project's site, put them in beautiful spreadsheet
######### screenshot: https://twitter.com/bluechoochoo/status/431853397612834816/photo/1
######### you just have to change ONE line of code, baby.
######### i'm not author, just adding newbie instructions. forked from https://gist.github.com/mediaczar/7808764
######### ******************* how-to: *******************
######### 1. replace URL in line 57 with YOUR sitemap (probably http://yourdomain.com/sitemap.xml)
######### 2. paste this entire code-snippet in your scraperwiki.com session (you've already signed-up, right?)
######### 3. let scraperwiki work its magic
######### 4. profit
######### 4a. tell me on twitter: @bluechoochoo
###################
import scraperwiki
import requests
import xmltodict
import json
import time
graph_query_root = "https://graph.facebook.com/fql"
graph_attr = ['share_count', 'like_count', 'comment_count']
###################
def query_graph_api(url): # query the Graph API, return data.
result = {}
graph_query = graph_query_root + '?q=SELECT ' + ','.join(graph_attr) + ' FROM link_stat WHERE url = "' + url + '"'
print graph_query # debug console
query_data = requests.get(graph_query)
query_json = json.loads(query_data.text)
for item in graph_attr:
result[item] = query_json['data'][0][item]
time.sleep(2)
return result
def query_twitter(url):
twitter_query = "http://urls.api.twitter.com/1/urls/count.json?url=%s" % url
print twitter_query
query_data = requests.get(twitter_query)
query_json = json.loads(query_data.text)
return query_json['count']
def query_pinterest(url):
pinterest_query = "http://widgets.pinterest.com/v1/urls/count.json?url=%s" % url
print pinterest_query
query_data = requests.get(pinterest_query)
query_data_trim = query_data.text[13:-1]
query_json = json.loads(query_data_trim)
return query_json['count']
################### CREATE DICTIONARY FROM SITEMAP
sitemapURL = 'http://www.recipegirl.com/sitemap.xml'
sitemap_raw = requests.get(sitemapURL)
sitemap_dict = xmltodict.parse(sitemap_raw.text)
################### COLLECT DATA FROM SITEMAP
for page in sitemap_dict['urlset']['url']:
pages = {}
pages['url'] = page['loc']
# Get Facebook Graph data
graph_data = query_graph_api(pages['url'])
for item in graph_attr:
pages[item] = graph_data[item]
# Get Twitter data
pages['tweets'] = query_twitter(pages['url'])
# Get Pinterest data
pages['pins'] = query_pinterest(pages['url'])
# Get LinkedIn data
# Get Delicious data
# Get StumbleUpon data
# Get Reddit data
# Commit data
scraperwiki.sqlite.save(unique_keys=['url'], data=pages)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment