Skip to content

Instantly share code, notes, and snippets.

@stephlocke stephlocke/ExeterCode.R
Last active Aug 29, 2015

Embed
What would you like to do?
SQLSaturday Exeter Submissions
# This script will work for any SQL Saturday where schedule has not been confirmed (not tested on others!) just change the URL
library(rvest)
library(data.table)
library(ggplot2)
#wordcloud only required packages
library(wordcloud)
library(tm)
library(foreach)
#get data
url <- "https://www.sqlsaturday.com/372/schedule.aspx"
sessions <- url %>%
html() %>%
html_nodes("table") %>%
html_table()
#prep it
sessions <- data.table(sessions[[2]])[,Level:=factor(Level
,levels =c("Beginner","Intermediate","Advanced") )]
#start charting for kicks
ggplot(sessions,aes(x=Level,y=..count..))+
geom_histogram(fill="light blue")+
theme_minimal()+
scale_fill_brewer()+
labs(title="Submitted sessions")
# top submitters
sessions[,.N, by= Speaker][order(-N,Speaker)][1:5]
# avg number of submissions per speaker
sessions[,.N, by= Speaker][,mean(N)]
# wordcloud
wordcloudgen <- function (v=sessions[,Title]) {
allc<-VectorSource(v)
corp<-Corpus(allc)
foreach(j=seq(corp)) %do% {
corp[[j]] <- gsub("/"," ",corp[[j]])
corp[[j]] <- gsub("@"," ",corp[[j]])
}
corp<- tm_map(corp,tolower)
corp<- tm_map(corp,removeWords, stopwords("english"))
corp<- tm_map(corp,removeNumbers)
corp<- tm_map(corp,removePunctuation)
corp <- tm_map(corp,stripWhitespace)
corp <- tm_map(corp, PlainTextDocument)
dtm <- DocumentTermMatrix(corp)
m <- as.matrix(dtm)
v <- sort(colSums(m),decreasing=TRUE)
head(v,14)
words <- names(v)
d <- data.frame(word=words, freq=v)
pal <- brewer.pal(6,"RdYlBu")
commentcloud<-wordcloud(d$word,d$freq,c(8,3),2,,FALSE,,.15,pal)
print(commentcloud)
}
wordcloudgen()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.