Create a gist now

Instantly share code, notes, and snippets.

What would you like to do?
make a word cloud from an orcid id
## install packages using the following
## install.packages("devtools")
## library(devtools)
## install_github("ropensci/rorcid")
## install.packages(c("tm","wordcloud"))
## load libraries
library(rorcid)
library(wordcloud)
## functions
get.years <- function(x) {as.numeric(x[["works"]][["publication-date.year.value"]])}
get.titles <- function(x) {x[["works"]][["work-title.title.value"]]}
get.authors <- function(x,surnames=FALSE) {
all <- x[["works"]][["work-contributors.contributor"]]
all <- lapply(all,"[[","credit-name.value")
if(surnames) {
all <- lapply(all,function(x) sub(",.*","",x))
all <- lapply(all,function(x) sub(".* ","",x))
} else {
all <- lapply(all, function(x) sub(" ",",",x))
}
return(all)
}
## adapted this function from http://entrenchant.blogspot.co.uk/2013/06/english-word-clouds-in-r.html
aggregate.plurals <- function (m) {
words <- rownames(m)
mind <- vector("list",length(words))
for (i in seq_along(words)) {
if(i %in% unlist(mind))
next
mind[[i]] <- i
if(i==length(words))
next
plurals <- paste0(words[[i]], c("s","es"))
wh <- which(words[-c(1:i)] %in% plurals) + i
if(length(wh))
mind[[i]] <- c(mind[[i]],wh)
}
mind.l <- sapply(mind,length)
for(i in which(mind.l>1)) {
m[i,] <- colSums(m[mind[[i]],,drop=FALSE])
}
m <- m[mind.l>0,,drop=FALSE]
return(m)
}
mystopwords <- c("using","near","one","two","three","ten","without")
make.cloud <- function(text,by=NULL,group=NULL,min.freq=3) {
if(!is.null(group) && !is.null(by))
by <- group * (as.numeric(by) %/% group)
if(!is.null(by)) {
text <- tapply(text,by,paste,collapse=" ")
}
corp <- VCorpus(VectorSource(text))
corp <- tm_map(corp, content_transformer(tolower))
corp <- tm_map(corp,removePunctuation)
corp <- tm_map(corp,removeNumbers)
corp <- tm_map(corp, removeWords, c(stopwords("english"),mystopwords))
tdm <- TermDocumentMatrix(corp)
m <- as.matrix(tdm)
colnames(m) <- names(text)
m <- aggregate.plurals(m)
m <- m[order(rowSums(m),decreasing=TRUE),,drop=FALSE ]
## for(i in 1:ncol(m))
## m[,i] <- m[,i]/sum(m[,i])
m <- m[rowSums(m)>=min.freq, ,drop=FALSE]
pal <- brewer.pal(6,"Dark2")
pal <- pal[-1]
if(is.null(by)) {
v <- sort(rowSums(m),decreasing=TRUE)
d <- data.frame(word = names(v),freq=v)
return(wordcloud(d$word,d$freq,colors=brewer.pal(6,"Dark2"),random.order=FALSE))
}
comparison.cloud(m)
}
orcid.cloud <- function(data,what="titles") {
switch(what,
"titles" = make.cloud(get.titles(data[[1]])),
"authors" = make.cloud(get.authors(data[[1]],surnames=TRUE)),
"years" = hist(get.years(data[[1]])),
message(what," not recognised"))
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment