Function to scrape github to combine your traffic across repositories into one plot
library(RSelenium) | |
library(XML) | |
library(ggplot2) | |
library(reshape2) | |
library(plyr) | |
library(dplyr) | |
gh_user <- '<your github login name>' | |
gh_pass <- '<your github login password>' | |
gh_team <- '<team associated with account>' | |
repos <- '<repositories in team>' | |
#failsafe if function fails and you need to close the port manually | |
#rD <- rsDriver(verbose = FALSE,port=4444L) | |
#remDr <- rD$client | |
#remDr$close() | |
github_traffic <- function(gh_user,gh_pass,gh_team,repos){ | |
rD <- rsDriver(verbose = FALSE) | |
remDr <- rD[["client"]] | |
remDr$navigate(sprintf('https://github.com/%s/%s/graphs/traffic',gh_team,repos[1])) | |
webElem <- remDr$findElement(using = 'id', value = "login_field") | |
webElem$setElementAttribute(attributeName = 'value',value = gh_user) | |
webElem <- remDr$findElement(using = 'id', value = "password") | |
webElem$setElementAttribute(attributeName = 'value',value = gh_pass) | |
webElem=remDr$findElement(using = 'xpath','//*[@id="login"]/form/div[4]/input[3]') | |
webElem$clickElement() | |
Sys.sleep(1) | |
out <- plyr::llply(repos,function(repo){ | |
remDr$navigate(sprintf('https://github.com/%s/%s/graphs/traffic',gh_team,repo)) | |
Sys.sleep(1) | |
out <- XML::htmlParse(remDr$getPageSource(),asText = TRUE) | |
sapply(c('clones','visitors'),function(type){ | |
XML::getNodeSet(out,sprintf(sprintf('//*[@id="js-%s-graph"]/div/div[1]/svg/g/g',type))) | |
},simplify = FALSE,USE.NAMES = TRUE) | |
},.progress = 'text') | |
names(out) <- repos | |
remDr$close() | |
rD[["server"]]$stop() | |
plot_data <- plyr::ldply(out,function(repo){ | |
plyr::mdply(names(repo),function(type){ | |
dat <- repo[[type]] | |
if(is.null(dat)) return(NULL) | |
yticks_total <- as.numeric(sapply(getNodeSet(dat[[2]],'g'),XML::xmlValue)) | |
yticks_unique <- as.numeric(sapply(getNodeSet(dat[[5]],'g'),XML::xmlValue)) | |
x <- data.frame(type=type, | |
date = as.Date(sapply(getNodeSet(dat[[1]],'g'),XML::xmlValue),format = '%m/%d'), | |
total = as.numeric(sapply(getNodeSet(dat[[3]],'circle'),XML::xmlGetAttr,name = 'cy')), | |
unique = as.numeric(sapply(getNodeSet(dat[[4]],'circle'),XML::xmlGetAttr,name = 'cy'))) | |
x$total <- rescale(x$total,rev(range(yticks_total))) | |
x$unique <- rescale(x$unique,rev(range(yticks_unique))) | |
x%>%reshape2::melt(.,c('type','date'),variable.name=c('metric')) | |
}) | |
},.id='repo')%>%select(-X1) | |
ggplot(plot_data,aes(x=date,y=value,colour=repo))+ | |
geom_point()+geom_line()+ | |
facet_grid(type~metric,scales='free_y')+ | |
scale_x_date(date_breaks = "1 day",date_labels = "%m/%d")+ | |
theme_bw()+ | |
theme(axis.text.x = element_text(angle=90),legend.position = 'top')+ | |
labs(title=sprintf('Github Team: %s',gh_team)) | |
} | |
traffic_plot <- github_traffic(gh_user=gh_user, | |
gh_pass=gh_pass, | |
gh_team=gh_team, | |
repos=repos) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment