Last active
December 10, 2015 23:38
-
-
Save mhawksey/4510468 to your computer and use it in GitHub Desktop.
Using R and the SparkTable package to get Wordpress postview sparklines. See http://mashe.hawksey.info/2013/01/wordpress-stats-in-r-postviews-sparktable
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# <- Start of bit by Tony Hirst (@psychemedia) see http://blog.ouseful.info/2013/01/09/wordpress-stats-in-r/ | |
#Wordpress Stats | |
##--------------- | |
#Wordpress Stats API docs (from http://stats.wordpress.com/csv.php) | |
#You can get a copy of your API key (required) from Akismet: | |
#Login with you Wordpress account: http://akismet.com/account/ | |
#Resend API key: https://akismet.com/resend/ | |
#Required parameters: api_key, blog_id or blog_uri. | |
#Optional parameters: table, post_id, end, days, limit, summarize. | |
#Parameters: | |
#api_key String A secret unique to your WordPress.com user account. | |
#blog_id Integer The number that identifies your blog. Find it in other stats URLs. | |
#blog_uri String The full URL to the root directory of your blog. Including the full path. | |
#table String One of views, postviews, referrers, referrers_grouped, searchterms, clicks, videoplays. | |
#post_id Integer For use with postviews table. | |
#end String The last day of the desired time frame. Format is 'Y-m-d' (e.g. 2007-05-01) and default is UTC date. | |
#days Integer The length of the desired time frame. Default is 30. "-1" means unlimited. | |
#period String For use with views table and the 'days' parameter. The desired time period grouping. 'week' or 'month' | |
#Use 'days' as the number of results to return (e.g. '&period=week&days=12' to return 12 weeks) | |
#limit Integer The maximum number of records to return. Default is 100. "-1" means unlimited. If days is -1, limit is capped at 500. | |
#summarize Flag If present, summarizes all matching records. | |
#format String The format the data is returned in, 'csv', 'xml' or 'json'. Default is 'csv'. | |
##--------------------------------------------- | |
#NOTE: some of the report calls I tried didn't seem to work properly? | |
#Need to build up a list of tested calls to the API that actually do what you think they should? | |
##----- | |
wordpress.getstats.demo=function(apikey, blogurl, table='postviews', end=Sys.Date(), days='12', period='week', limit='', summarise=''){ | |
#default parameters gets back last 12 weeks of postviews aggregated by week | |
url=paste('http://stats.wordpress.com/csv.php?', | |
'api_key=',apikey, | |
'&blog_uri=',blogurl, | |
'&table=',table, | |
'&end=',end, | |
'&days=',days, | |
'&period=',period, | |
'&limit=',limit, | |
'&',summarise, #set this to 'summarise=T' if required | |
sep='' | |
) | |
#Martin's post notes that JSON appears to work better than CSV | |
#May be worth doing a JSON parsing version? | |
read.csv(url) | |
} | |
APIKEY='YOUR_API_KEY_HERE' | |
#Use the URL of a Wordpress blog associated with the same account as the API key | |
BLOGURL='http://mashe.hawksey.info' | |
#Examples | |
wp.pageviews.last12weeks=wordpress.getstats.demo(APIKEY,BLOGURL) | |
wp.views.last12weeks.byweek=wordpress.getstats.demo(APIKEY,BLOGURL,'views') | |
wp.views.last30days.byday=wordpress.getstats.demo(APIKEY,BLOGURL,'views',days=30,period='') | |
wp.clicks.wpdefault=wordpress.getstats.demo(APIKEY,BLOGURL,'clicks',days='',period='') | |
wp.clicks.lastday=wordpress.getstats.demo(APIKEY,BLOGURL,'clicks',days='1',period='') | |
wp.referrers.lastday=wordpress.getstats.demo(APIKEY,BLOGURL,'referrers',days='1',period='') | |
# -> End of bit by Tony Hirst (@psychemedia) | |
require(reshape) | |
# getting stats for postviews last year. Note examples above for different query options e.g. without end date | |
wp.postviews=wordpress.getstats.demo(APIKEY,BLOGURL,'postviews',days='366',end='2012-12-31', period='day',limit=-1) | |
# make data frame of just title, views and date | |
data <- data.frame(post_title=wp.postviews$post_title, views=wp.postviews$views, date=wp.postviews$date) | |
# convert date string into date | |
data$date <- as.Date(as.character(data$date ),format="%Y-%m-%d") | |
# cast data frame to fill missing data points with 0 (seemed to be required for spaarkTable) | |
data.casted <- cast(data, post_title ~ date, sum) | |
# melt back for sparkTable | |
data.melted <- melt(data.casted, id.vars=c("post_title")) | |
df <- data.melted | |
# examples appear to use set column heading variabe, value and time | |
columnNames <- c("variable","value","time") | |
colnames(df) <- columnNames | |
library(sparkTable) | |
# sparkTable cobbled together from http://stackoverflow.com/q/8588060/1027723 and | |
#http://web.warwick.ac.uk/statsdept/user2011/TalkSlides/Contributed/18Aug_0950_FocusVI_4-ReportingData_2-Kowarik.pdf | |
content<-list() | |
#content[['LinePlot']]<-newSparkLine() | |
content[['BarPlot']]<-newSparkBar() | |
content [['Mean']] <- function (x) { round(sum(x)/length(which(x>0)),1) } | |
content [['Max']] <- function (x) { max(x) } | |
content [['Views']] <- function (x) { sum(x) } | |
content [['Days']] <- function(x) { length(which(x>0)) } | |
varType<-rep("value",5) | |
df<-df[,c("variable","value","time")] | |
df$time<-as.numeric(as.character(df$time)) | |
dat<-reshapeExt(df,idvar="variable",varying=list(2)) | |
sparkTab<-newSparkTable(dat,content,varType) | |
plotSparkTable ( sparkTab , outputType = "html", filename = "t1") | |
# a dead end but keeping in for ref - adding summary to cast of data frame | |
pivot <- cast(data, post_title ~ date, sum, fill=NA) | |
noDays <- ncol(pivot) | |
pivot$sum <- apply(pivot[2:noDays], 1, sum, na.rm=TRUE) | |
pivot$max <- apply(pivot[2:noDays], 1, max, na.rm=TRUE) | |
pivot$mean <- lapply(pivot$mean,round,2) | |
pivot$count <- apply(pivot[2:noDays], 1, function(x) length(which(!is.na(x)))) | |
pivot.sort <- pivot[order(-pivot$mean) , ][1:20,] | |
# <- More of Tony Hirst (@psychemedia) see http://blog.ouseful.info/2013/01/09/wordpress-stats-in-r/ | |
require(stringr) | |
getDomain=function(url) str_match(url, "^http[s]?://([^/]*)/.*?")[, 2] | |
#We can pull out the domains clicks were sent to or referrals came from | |
wp.clicks.lastday$domain=getDomain(wp.clicks.lastday$click) | |
wp.referrers.lastday$domain=getDomain(wp.referrers.lastday$referrer) | |
require(ggplot2) | |
#Scruffy bar chart - is there a way of doing this sorted chart using geom_bar? How would we reorder x? | |
c=as.data.frame(table(wp.clicks.yesterday$domain)) | |
ggplot(c)+geom_bar(aes(x=reorder(Var1,Freq),y=Freq),stat='identity')+theme( axis.text.x=element_text(angle=-90)) | |
c=as.data.frame(table(wp.referrers.lastday$domain)) | |
ggplot(c)+geom_bar(aes(x=reorder(Var1,Freq),y=Freq),stat='identity')+theme( axis.text.x=element_text(angle=-90)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment