source('https://raw.github.com/gist/1606595/269d61dfcc7930f5275a212e11f3c43771ab2591/GoogleReader.R') rbloggers = getRSSFeed(feedURL="http://r-bloggers.com/feed", email="GOOGLE READER EMAIL", passwd="GOOGLE READER PASSWORD", posts=5000) entries = rbloggers[which(names(rbloggers) == "entry")] length(entries) saveXML(rbloggers, file='rbloggers.xml') #This will create a data frame with some of the information from the RSS feed posts = data.frame(title=character(0), author=character(0), link=character(0), stringsAsFactors=FALSE) posts[1:length(entries),1:ncol(posts)] = NA posts$published = as.Date(NA) posts.categories = list() for(i in 1:length(entries)) { entry = entries[[i]] posts[i,]$title = unclass(xmlChildren(entry[['title']])$text)$value posts[i,]$author = unclass(xmlChildren(entry[['author']][['name']])$text)$value posts[i,]$link = xmlAttrs(entry[['link']])[['href']] posts[i,]$published = as.Date(substr(unclass( xmlChildren(entry[['published']])$text)$value, 1, 10)) categories = entry[which(names(entry) == "category")] posts.categories[[i]] = list() if(length(categories) > 1) { #Ignore the first category as it is used for Google Reader l = list() for(j in 2:length(categories)) { l[(j-1)] = xmlAttrs(categories[[j]])[['term']] } posts.categories[[i]] = l } } #We'll use Paul Bleicher's calendarHeat function to visualize the number of posts per day source('https://raw.github.com/tavisrudd/r_users_group_1/master/calendarHeat.R') cal = as.data.frame(table(posts$published)) cal$Var1 = as.Date(cal$Var1) calendarHeat(cal$Var1, cal$Freq, color="r2b", varname="Number of Posts on R-Bloggers.com") #Create a word cloud require(wordcloud) ctab = unlist(posts.categories) ctab = unlist(strsplit(ctab, ' ')) ctab = as.data.frame(table(ctab)) ctab = ctab[-which(ctab$ctab == 'Uncategorized'),] wordcloud(ctab$ctab, ctab$Freq, min.freq=10)