Skip to content

Instantly share code, notes, and snippets.

@CerebralMastication
Created October 13, 2016 14:07
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
Star You must be signed in to star a gist
Save CerebralMastication/f5408672ac627511399d7a643eb297ef to your computer and use it in GitHub Desktop.
# query run here: https://data.stackexchange.com/stackoverflow/query/edit/543286
#
# select
# TagName,
# str(DATEPART(yyyy, p.CreationDate )) + '-' + str(DATEPART(mm, p.CreationDate )) as postmonth,
# count(pt.PostId) as Num
# from
# Tags,
# PostTags pt,
# Posts as p
# where Tags.Id = pt.TagId
# and p.Id = pt.PostId
# AND tags.tagname = 'r'
# group by
# TagName ,
# str(DATEPART(yyyy, p.CreationDate )) + '-' + str(DATEPART(mm, p.CreationDate ));
## data downloaded and saved to '~/Downloads/QueryResults-r.csv'
library(stringr)
library(ggplot2)
library(plyr)
soData <- read.csv('~/Downloads/QueryResults-r.csv')
soData$postmonth <- gsub("\\s+", " ", str_trim(soData$postmonth))
soData$yearNum <- as.numeric(substr(soData$postmonth,1,4))
soData$monthNum <- substr(soData$postmonth, nchar(soData$postmonth)-2 , nchar(soData$postmonth))
soData$monthNum <- as.numeric(gsub('![[:alnum:]]*[[:space:]]|[[:punct:]]', '', soData$monthNum))
soData$postmonth <- factor(soData$postmonth, levels = soData$postmonth)
theme_set(theme_gray(base_size = 18))
ggplot(data = subset(soData, yearNum <= 2009), aes( x=postmonth, y=Num)) +
geom_bar(stat="identity") +
theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5)
)
ggplot(data = soData, aes( x=postmonth, y=Num)) +
geom_bar(stat="identity") +
theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5)
)
write.csv(soData, '~/Downloads/QueryResults-r-soData.csv' )
## linear trend
soDataSubset <- subset(soData, yearNum >= 2012 & postmonth != '2016- 10')
soDataSubset$index <- 1:length(soDataSubset$Num)
reg <- lm(Num ~ index, data =soDataSubset )
soDataSubset$modeled <- predict(reg)
# calculate percent deviation of modeled
soDataSubset$dev <- (soDataSubset$Num - soDataSubset$modeled) / soDataSubset$modeled
## plot errors
ggplot(data = soDataSubset, aes( x=postmonth, y=dev)) +
geom_bar(stat="identity") +
theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5)
)
monthlyAvgDev <- ddply(soDataSubset, 'monthNum', function(df)mean(df$dev) )
colnames(monthlyAvgDev) <- c('monthNum','avgErr')
monthlyAvgDev$monthNum <- factor( monthlyAvgDev$monthNum )
## plot average error by month
ggplot(data = monthlyAvgDev, aes( x=monthNum, y=avgErr)) +
geom_bar(stat="identity") +
theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5)
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment