Skip to content

Instantly share code, notes, and snippets.

@dansku
Last active December 18, 2015 05:39
Show Gist options
  • Save dansku/5734028 to your computer and use it in GitHub Desktop.
Save dansku/5734028 to your computer and use it in GitHub Desktop.
Statcounter doesnt have a API or a way to retrieve stats information, so this is a scraper to get the info from the page... Still need a lot of development!
import cookielib
import urllib
import urllib2
from BeautifulSoup import BeautifulSoup
import re
import time
#----[CONFIGURE]---------------------------------------------------------------------#
#
# Change HERE!
#
login = 'yourLogin'
password = 'yourPassword'
project = 'yourProjectPage'
#----[LOGIN]------------------------------------------------------------------------#
# Store the cookies and create an opener that will hold them
cj = cookielib.CookieJar()
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
#Headers
opener.addheaders = [('User-agent', 'StatCounter Scraper')]
#Open
urllib2.install_opener(opener)
# URL
url = 'http://statcounter.com/'+project+'/summary/'
# Input parameters we are going to send
payload = {
'op': 'login-main',
'form_user': login,
'form_pass': password
}
# Use urllib to encode the payload
data = urllib.urlencode(payload)
# Build our Request object (supplying 'data' makes it a POST)
req = urllib2.Request(url, data)
# Make the request and read the response
resp = urllib2.urlopen(req)
# contents = resp.read()
#----[Scraping]------------------------------------------------------------------------#
# Get data for the last 7 days
doc = BeautifulSoup(resp.read())
data=doc.findAll("td")
pageviews_7days = data[4].string
unique_visits_7days = data[5].string
pageviews_today = data[16].string
unique_visits_today = data[17].string
print '----[Status]---------------------------'
print '-----------[Last 7 Day]----------------'
print '| PageViews - last 7 days: '+pageviews_7days
print '| Unique Visits - last 7 days: '+unique_visits_7days
print '------------[Today]--------------------'
print '| PageViews today: '+pageviews_today
print '| Unique Visits today: '+unique_visits_today
print '---------------------------------------'
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment