Skip to content

Instantly share code, notes, and snippets.

@tts
Last active October 13, 2015 05:37
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save tts/4147093 to your computer and use it in GitHub Desktop.
Save tts/4147093 to your computer and use it in GitHub Desktop.
Word cloud from Aalto People jobtitles
###############################################################
#
# Word cloud from Aalto People jobtitles
#
# Data from Linked Open Aalto Data Service
# http://data.aalto.fi/
#
# R code below is only slightly adapted from the second example at
# http://onertipaday.blogspot.fi/2011/07/word-cloud-in-r.html
#
# Tuija Sonkkila 25.11.2012, 18.5.2013
#
#
# Note that only a fraction of Aalto University staff
# has an open Aalto People profile. Again, from those that have,
# not everyone has mentioned a title.
#
#
###############################################################
library(tm)
library(wordcloud)
library(RColorBrewer)
library(SPARQL)
endpoint <- "http://data.aalto.fi/sparql"
q <- "SELECT ?onlyTitle
WHERE {
GRAPH <http://data.aalto.fi/id/people/>
{
?person <http://schema.org/jobtitle> ?title
}
FILTER (langMatches(lang(?title), 'en' ))
BIND (str(?title) AS ?onlyTitle)
}
ORDER BY ?onlyTitle"
res <- SPARQL(url=endpoint, q)$results
dim(res)
# EDIT 18.5.2013 the dimension of res has changed
# res.corpus <- Corpus(DataframeSource(res[1:404]))
res.corpus <- Corpus(DataframeSource(res[1]))
res.corpus <- tm_map(res.corpus, removePunctuation)
res.corpus <- tm_map(res.corpus, tolower)
res.corpus <- tm_map(res.corpus, function(x) removeWords(x, stopwords("english")))
tdm <- TermDocumentMatrix(res.corpus)
m <- as.matrix(tdm)
v <- sort(rowSums(m),decreasing=TRUE)
d <- data.frame(word = names(v),freq=v)
table(d$freq)
pal2 <- brewer.pal(8, "Dark2")
png("wordcloudtitles.png", width=1280,height=800)
wordcloud(d$word,d$freq, scale=c(8,.2),min.freq=3,max.words=Inf, random.order=FALSE, rot.per=.15, colors=pal2)
dev.off()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment