Instantly share code, notes, and snippets.

Embed
What would you like to do?
Function to scrape github to combine your traffic across repositories into one plot
library(RSelenium)
library(XML)
library(ggplot2)
library(reshape2)
library(plyr)
library(dplyr)
gh_user <- '<your github login name>'
gh_pass <- '<your github login password>'
gh_team <- '<team associated with account>'
repos <- '<repositories in team>'
#failsafe if function fails and you need to close the port manually
#rD <- rsDriver(verbose = FALSE,port=4444L)
#remDr <- rD$client
#remDr$close()
github_traffic <- function(gh_user,gh_pass,gh_team,repos){
rD <- rsDriver(verbose = FALSE)
remDr <- rD[["client"]]
remDr$navigate(sprintf('https://github.com/%s/%s/graphs/traffic',gh_team,repos[1]))
webElem <- remDr$findElement(using = 'id', value = "login_field")
webElem$setElementAttribute(attributeName = 'value',value = gh_user)
webElem <- remDr$findElement(using = 'id', value = "password")
webElem$setElementAttribute(attributeName = 'value',value = gh_pass)
webElem=remDr$findElement(using = 'xpath','//*[@id="login"]/form/div[4]/input[3]')
webElem$clickElement()
Sys.sleep(1)
out <- plyr::llply(repos,function(repo){
remDr$navigate(sprintf('https://github.com/%s/%s/graphs/traffic',gh_team,repo))
Sys.sleep(1)
out <- XML::htmlParse(remDr$getPageSource(),asText = TRUE)
sapply(c('clones','visitors'),function(type){
XML::getNodeSet(out,sprintf(sprintf('//*[@id="js-%s-graph"]/div/div[1]/svg/g/g',type)))
},simplify = FALSE,USE.NAMES = TRUE)
},.progress = 'text')
names(out) <- repos
remDr$close()
rD[["server"]]$stop()
plot_data <- plyr::ldply(out,function(repo){
plyr::mdply(names(repo),function(type){
dat <- repo[[type]]
if(is.null(dat)) return(NULL)
yticks_total <- as.numeric(sapply(getNodeSet(dat[[2]],'g'),XML::xmlValue))
yticks_unique <- as.numeric(sapply(getNodeSet(dat[[5]],'g'),XML::xmlValue))
x <- data.frame(type=type,
date = as.Date(sapply(getNodeSet(dat[[1]],'g'),XML::xmlValue),format = '%m/%d'),
total = as.numeric(sapply(getNodeSet(dat[[3]],'circle'),XML::xmlGetAttr,name = 'cy')),
unique = as.numeric(sapply(getNodeSet(dat[[4]],'circle'),XML::xmlGetAttr,name = 'cy')))
x$total <- rescale(x$total,rev(range(yticks_total)))
x$unique <- rescale(x$unique,rev(range(yticks_unique)))
x%>%reshape2::melt(.,c('type','date'),variable.name=c('metric'))
})
},.id='repo')%>%select(-X1)
ggplot(plot_data,aes(x=date,y=value,colour=repo))+
geom_point()+geom_line()+
facet_grid(type~metric,scales='free_y')+
scale_x_date(date_breaks = "1 day",date_labels = "%m/%d")+
theme_bw()+
theme(axis.text.x = element_text(angle=90),legend.position = 'top')+
labs(title=sprintf('Github Team: %s',gh_team))
}
traffic_plot <- github_traffic(gh_user=gh_user,
gh_pass=gh_pass,
gh_team=gh_team,
repos=repos)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment