A visual tour of my publications
## Code for this blog post: | |
## http://mathewkiang.com/2016/10/08/a-visual-tour-of-my-publications/ | |
## Imports | |
library(RColorBrewer) | |
library(ggplot2) | |
library(dplyr) | |
library(devtools) | |
devtools::install_github("jkeirstead/scholar") | |
library(scholar) | |
## Helper function | |
get_all_article_history <- function (id, sleep = 0, return_pubs = FALSE, | |
debug = FALSE) { | |
## Get publications and unique articles (with citations) | |
pubs <- get_publications(id) | |
articles <- as.character(unique(pubs$pubid[pubs$cites > 0])) | |
npubs <- length(articles) | |
## Loop through each article and get citation history | |
holder <- NULL | |
i <- 1 | |
for (pub in articles) { | |
if (debug) { | |
print(paste0("(", i, " of ", npubs, ") ", pub, " | ", Sys.time())) | |
} | |
x <- get_article_cite_history(id = id, article = pub) | |
holder <- rbind(holder, x) | |
if (sleep > 0) { | |
## Google Scholar doesn't have a real API. To avoid violating TOS, | |
## we space out requests by a specified number of minutes (20 - 30) | |
## and add some randomness. (abs() controls for unlikely event of | |
## negative number.) | |
sleeptime <- 60 * abs(rnorm(1, sleep, .25 * sleep)) | |
Sys.sleep(sleeptime) | |
} | |
i <- i + 1 | |
} | |
if (sum(pubs$cites == 0) > 0) { | |
holder <- rbind(holder, pubs[pubs$cites == 0, | |
c("year", "cites", "pubid")]) | |
} | |
## Return both the author history and the citation history | |
if (return_pubs) { | |
return(list(pubs = pubs, cites = holder)) | |
} | |
return(holder) | |
} | |
## Define author | |
mvk <- "eD9_J3wAAAAJ" | |
## Get publication and citation history | |
mk <- get_all_article_history(mvk, return_pubs = TRUE) | |
## Extract publications and then merge with the type of paper | |
## Note: All publications as of 10/6/2016 | |
mkpubs <- mk$pubs | |
pubid <- c("d1gkVwhDpl0C", "u-x6o8ySG0sC", "qjMakFHDy7sC", "9yKSN-GCB0IC", | |
"YsMSGLbcyi4C", "Y0pCki6q_DkC", "zYLM7Y9cAGgC", "u5HHmVD_uO8C", | |
"Tyk-4Ss8FVUC", "eQOLeE2rZwMC", "IjCSPb-OGe4C", "2osOgNQ5qMEC", | |
"L_l9e5I586QC", "hqOjcs7Dif8C", "LkGwnXOMwfcC", "WF5omc3nYNoC", | |
"__bU50VfleQC", "roLk4NBRz8UC", "_FxGoFyzp5QC", "D_tqNUsBuKoC", | |
"tHtfpZlB6tUC", "c1e4I3QdEKYC") | |
ptypes <- c("MSM/Drugs", "MSM/Drugs", "MSM/Drugs", "HPM", | |
"Inequality", "Inequality", "HPM", "MSM/Drugs", | |
"MSM/Drugs", "Inequality", "MSM/Drugs", "MSM/Drugs", | |
"Inequality", "Inequality", "HPM", "HPM", | |
"DP", "Inequality", "Inequality", "Inequality", | |
"HPM", "HPM") | |
pubtypes <- data.frame(pubid = pubid, pubtype = ptypes) | |
## Clean up factors with more descriptive names | |
pubtypes$pubtype <- factor(pubtypes$pubtype, | |
levels = c("DP", "Inequality", | |
"HPM", "MSM/Drugs"), | |
labels = c("Digital Phenotyping", | |
"Health Inequalities", | |
"Health Policy and Management", | |
"MSM / HIV / Drugs"), | |
ordered = TRUE) | |
mkpubs <- merge(mkpubs, pubtypes, by = "pubid") | |
## Rename column for merging, then merge, and get years from publication | |
names(mkpubs)[grep(x = names(mkpubs), pattern = "year")] <- "pubyear" | |
mkcites <- mk$cites | |
mkcites <- merge(mkcites, select(mkpubs, pubid, pubyear, pubtype), | |
by = "pubid") | |
mkcites$delta <- ifelse(mkcites$year - mkcites$pubyear >= 0, | |
mkcites$year - mkcites$pubyear, 0) | |
## Get cumulative sum -- remove NAs | |
mkcites %<>% | |
group_by(pubid) %>% | |
arrange(year) %>% | |
mutate(ccite = cumsum(cites)) | |
mkcites <- mkcites[!is.na(mkcites$pubyear), ] | |
## If delta doesn't start at zero, add it in. | |
for (id in unique(mkcites$pubid)) { | |
if (min(mkcites$delta[mkcites$pubid == id], na.rm = TRUE) != 0) { | |
pyear <- mkcites$pubyear[mkcites$pubid == id][1] | |
ptype <- mkcites$pubtype[mkcites$pubid == id][1] | |
## Can't just use c() below since pubid is a string | |
new_row <- data.frame(pubid = id, year = pyear, cites = 0, | |
pubyear = pyear, pubtype = ptype, | |
delta = 0, ccite = 0) | |
## tibbles handle rbinding strangely -- convert back and forth | |
mkcites <- as_data_frame(rbind(as.data.frame(mkcites), new_row)) | |
} | |
} | |
## Feature image | |
p0 <- ggplot(data = mkcites, aes(x = year, y = ccite)) + | |
geom_line(stat = "smooth", method = "lm", se = FALSE, | |
color = "gray60", size = 1.5, alpha = .15) + | |
geom_point(aes(group = pubid),size = 1, alpha = .75, color = "grey30") + | |
geom_line(aes(group = pubid), size = .75, alpha = .75, color = "grey30") + | |
theme_classic() + | |
scale_x_continuous("", expand = c(0, .07)) + | |
scale_y_continuous("Citations", expand = c(0, .15)) + | |
labs(title = "Citation history over time") | |
ggsave(p0, filename = "./feature_img.jpg", width = 7, height = 4, scale = 1.2) | |
## Plot of each article's citations over age of article | |
p1 <- ggplot(data = mkcites, aes(x = delta, y = ccite, color = pubtype)) + | |
geom_line(stat = "smooth", method = "lm", se = FALSE, | |
color = "gray60", size = 1.5, alpha = .5) + | |
geom_point(aes(group = pubid),size = 1, alpha = .75) + | |
geom_line(aes(group = pubid), size = .75, alpha = .75) + | |
theme_classic() + | |
scale_color_brewer("Article Type", type = "qual", palette = "Dark2") + | |
scale_x_continuous("Years from publication", expand = c(0, .02)) + | |
scale_y_continuous("Citations", expand = c(0, .15)) + | |
theme(legend.position = c(0, 1), | |
legend.justification = c(0, 1)) + | |
labs(title = "Citation trajectory of my articles") | |
ggsave(p1, filename = "./mk_traj_shift.jpg", width = 7, height = 4, scale = 1.2) | |
## Plot of each article's citation over time | |
p2 <- ggplot(data = mkcites, aes(x = year, y = ccite, color = pubtype)) + | |
geom_line(stat = "smooth", method = "lm", se = FALSE, | |
color = "gray60", size = 1.5, alpha = .85) + | |
geom_point(aes(group = pubid),size = 1, alpha = .75) + | |
geom_line(aes(group = pubid), size = .75, alpha = .75) + | |
theme_classic() + | |
scale_color_brewer("Article Type", type = "qual", palette = "Dark2") + | |
scale_x_continuous("", expand = c(0, .07)) + | |
scale_y_continuous("Citations", expand = c(0, .15)) + | |
theme(legend.position = c(0, 1), | |
legend.justification = c(0, 1)) + | |
labs(title = "Citation history over time") | |
ggsave(p2, filename = "./mk_traj.jpg", width = 7, height = 4, scale = 1.2) | |
## Same as p2 but aggregate by article type | |
p3 <- ggplot(data = mkcites, aes(x = year, y = ccite)) + | |
geom_point(aes(group = pubid), color = "grey75",size = 1, alpha = .25) + | |
geom_line(aes(group = pubid), color = "grey75", size = .75, alpha = .25) + | |
geom_line(stat = "smooth", method = "lm", se = FALSE, | |
aes(color = pubtype), size = 1.5, alpha = .9) + | |
theme_classic() + | |
scale_color_brewer("Article Type", type = "qual", palette = "Dark2") + | |
scale_x_continuous("", expand = c(0, .07)) + | |
scale_y_continuous("Citations", expand = c(0, .15), limits = c(0, 25)) + | |
theme(legend.position = c(0, 1), | |
legend.justification = c(0, 1)) + | |
labs(title = "Citation history, fitted by type, over time") | |
ggsave(p3, filename = "./mk_traj_lm.jpg", width = 7, height = 4, scale = 1.2) | |
## Same as p1 but again aggregate by article type | |
p4 <- ggplot(data = mkcites, aes(x = delta, y = ccite, color = pubtype)) + | |
geom_point(aes(group = pubid), color = "grey75", size = 1, alpha = .25) + | |
geom_line(aes(group = pubid), color = "grey75", size = .75, alpha = .25) + | |
geom_line(stat = "smooth", method = "lm", se = FALSE, | |
aes(color = pubtype), size = 1.5, alpha = .9) + | |
theme_classic() + | |
scale_color_brewer("Article Type", type = "qual", palette = "Dark2") + | |
scale_x_continuous("Years from publication", expand = c(0, .02)) + | |
scale_y_continuous("Citations", expand = c(0, .15)) + | |
theme(legend.position = c(0, 1), | |
legend.justification = c(0, 1)) + | |
labs(title = "Citation history, fitted by type, over article's age") | |
ggsave(p4, filename = "./mk_traj_shift_lm.jpg", width = 7, height = 4, scale = 1.2) | |
## Reshape dataframe for stacked barcharts | |
mkstack <- mkcites %>% | |
group_by(year, pubtype) %>% | |
summarize(total = sum(cites)) | |
mkstackc <- mkcites %>% | |
group_by(year, pubtype) %>% | |
summarize(total = sum(ccite)) | |
## Citations per year by group | |
p5 <- ggplot(data = mkstack, | |
aes(x = year, y = total, fill = pubtype)) + | |
geom_bar(stat="identity", alpha = .9) + | |
geom_hline(yintercept = seq(0, 65, 10), color = "white", alpha = .75) + | |
theme_classic() + | |
scale_fill_brewer("Article Type", type = "qual", palette = "Dark2") + | |
scale_x_continuous("", expand = c(0, .02), | |
breaks = 2010:2016, labels = 2010:2016) + | |
scale_y_continuous("Citations", expand = c(0, .15), | |
breaks = seq(0, 65, 10), labels = seq(0, 65, 10)) + | |
theme(legend.position = c(0, 1), | |
legend.justification = c(0, 1)) + | |
labs(title = "Citations per year by article type") | |
ggsave(p5, filename = "./mk_bar.jpg", width = 4, height = 4, scale = 1.2) | |
## Cumulative citations per year by group | |
p6 <- ggplot(data = mkstackc, | |
aes(x = year, y = total, fill = pubtype)) + | |
geom_bar(stat="identity", alpha = .9) + | |
geom_hline(yintercept = seq(0, 200, 25), color = "white", alpha = .75) + | |
theme_classic() + | |
scale_fill_brewer("Article Type", type = "qual", palette = "Dark2") + | |
scale_x_continuous("", expand = c(0, .02), | |
breaks = 2010:2016, labels = 2010:2016) + | |
scale_y_continuous("Cumulative citations", expand = c(0, .15), | |
breaks = seq(0, 200, 25), labels = seq(0, 200, 25)) + | |
theme(legend.position = c(0, 1), | |
legend.justification = c(0, 1)) + | |
labs(title = "Cumulative citations per year by article type") | |
ggsave(p6, filename = "./mk_bar_cum.jpg", width = 4, height = 4, scale = 1.2) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment