Created
October 8, 2016 06:39
-
-
Save mkiang/ea3fbe313bc1acc73a7b5f9c5615d855 to your computer and use it in GitHub Desktop.
A visual tour of my publications
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## Code for this blog post: | |
## http://mathewkiang.com/2016/10/08/a-visual-tour-of-my-publications/ | |
## Imports | |
library(RColorBrewer) | |
library(ggplot2) | |
library(dplyr) | |
library(devtools) | |
devtools::install_github("jkeirstead/scholar") | |
library(scholar) | |
## Helper function | |
get_all_article_history <- function (id, sleep = 0, return_pubs = FALSE, | |
debug = FALSE) { | |
## Get publications and unique articles (with citations) | |
pubs <- get_publications(id) | |
articles <- as.character(unique(pubs$pubid[pubs$cites > 0])) | |
npubs <- length(articles) | |
## Loop through each article and get citation history | |
holder <- NULL | |
i <- 1 | |
for (pub in articles) { | |
if (debug) { | |
print(paste0("(", i, " of ", npubs, ") ", pub, " | ", Sys.time())) | |
} | |
x <- get_article_cite_history(id = id, article = pub) | |
holder <- rbind(holder, x) | |
if (sleep > 0) { | |
## Google Scholar doesn't have a real API. To avoid violating TOS, | |
## we space out requests by a specified number of minutes (20 - 30) | |
## and add some randomness. (abs() controls for unlikely event of | |
## negative number.) | |
sleeptime <- 60 * abs(rnorm(1, sleep, .25 * sleep)) | |
Sys.sleep(sleeptime) | |
} | |
i <- i + 1 | |
} | |
if (sum(pubs$cites == 0) > 0) { | |
holder <- rbind(holder, pubs[pubs$cites == 0, | |
c("year", "cites", "pubid")]) | |
} | |
## Return both the author history and the citation history | |
if (return_pubs) { | |
return(list(pubs = pubs, cites = holder)) | |
} | |
return(holder) | |
} | |
## Define author | |
mvk <- "eD9_J3wAAAAJ" | |
## Get publication and citation history | |
mk <- get_all_article_history(mvk, return_pubs = TRUE) | |
## Extract publications and then merge with the type of paper | |
## Note: All publications as of 10/6/2016 | |
mkpubs <- mk$pubs | |
pubid <- c("d1gkVwhDpl0C", "u-x6o8ySG0sC", "qjMakFHDy7sC", "9yKSN-GCB0IC", | |
"YsMSGLbcyi4C", "Y0pCki6q_DkC", "zYLM7Y9cAGgC", "u5HHmVD_uO8C", | |
"Tyk-4Ss8FVUC", "eQOLeE2rZwMC", "IjCSPb-OGe4C", "2osOgNQ5qMEC", | |
"L_l9e5I586QC", "hqOjcs7Dif8C", "LkGwnXOMwfcC", "WF5omc3nYNoC", | |
"__bU50VfleQC", "roLk4NBRz8UC", "_FxGoFyzp5QC", "D_tqNUsBuKoC", | |
"tHtfpZlB6tUC", "c1e4I3QdEKYC") | |
ptypes <- c("MSM/Drugs", "MSM/Drugs", "MSM/Drugs", "HPM", | |
"Inequality", "Inequality", "HPM", "MSM/Drugs", | |
"MSM/Drugs", "Inequality", "MSM/Drugs", "MSM/Drugs", | |
"Inequality", "Inequality", "HPM", "HPM", | |
"DP", "Inequality", "Inequality", "Inequality", | |
"HPM", "HPM") | |
pubtypes <- data.frame(pubid = pubid, pubtype = ptypes) | |
## Clean up factors with more descriptive names | |
pubtypes$pubtype <- factor(pubtypes$pubtype, | |
levels = c("DP", "Inequality", | |
"HPM", "MSM/Drugs"), | |
labels = c("Digital Phenotyping", | |
"Health Inequalities", | |
"Health Policy and Management", | |
"MSM / HIV / Drugs"), | |
ordered = TRUE) | |
mkpubs <- merge(mkpubs, pubtypes, by = "pubid") | |
## Rename column for merging, then merge, and get years from publication | |
names(mkpubs)[grep(x = names(mkpubs), pattern = "year")] <- "pubyear" | |
mkcites <- mk$cites | |
mkcites <- merge(mkcites, select(mkpubs, pubid, pubyear, pubtype), | |
by = "pubid") | |
mkcites$delta <- ifelse(mkcites$year - mkcites$pubyear >= 0, | |
mkcites$year - mkcites$pubyear, 0) | |
## Get cumulative sum -- remove NAs | |
mkcites %<>% | |
group_by(pubid) %>% | |
arrange(year) %>% | |
mutate(ccite = cumsum(cites)) | |
mkcites <- mkcites[!is.na(mkcites$pubyear), ] | |
## If delta doesn't start at zero, add it in. | |
for (id in unique(mkcites$pubid)) { | |
if (min(mkcites$delta[mkcites$pubid == id], na.rm = TRUE) != 0) { | |
pyear <- mkcites$pubyear[mkcites$pubid == id][1] | |
ptype <- mkcites$pubtype[mkcites$pubid == id][1] | |
## Can't just use c() below since pubid is a string | |
new_row <- data.frame(pubid = id, year = pyear, cites = 0, | |
pubyear = pyear, pubtype = ptype, | |
delta = 0, ccite = 0) | |
## tibbles handle rbinding strangely -- convert back and forth | |
mkcites <- as_data_frame(rbind(as.data.frame(mkcites), new_row)) | |
} | |
} | |
## Feature image | |
p0 <- ggplot(data = mkcites, aes(x = year, y = ccite)) + | |
geom_line(stat = "smooth", method = "lm", se = FALSE, | |
color = "gray60", size = 1.5, alpha = .15) + | |
geom_point(aes(group = pubid),size = 1, alpha = .75, color = "grey30") + | |
geom_line(aes(group = pubid), size = .75, alpha = .75, color = "grey30") + | |
theme_classic() + | |
scale_x_continuous("", expand = c(0, .07)) + | |
scale_y_continuous("Citations", expand = c(0, .15)) + | |
labs(title = "Citation history over time") | |
ggsave(p0, filename = "./feature_img.jpg", width = 7, height = 4, scale = 1.2) | |
## Plot of each article's citations over age of article | |
p1 <- ggplot(data = mkcites, aes(x = delta, y = ccite, color = pubtype)) + | |
geom_line(stat = "smooth", method = "lm", se = FALSE, | |
color = "gray60", size = 1.5, alpha = .5) + | |
geom_point(aes(group = pubid),size = 1, alpha = .75) + | |
geom_line(aes(group = pubid), size = .75, alpha = .75) + | |
theme_classic() + | |
scale_color_brewer("Article Type", type = "qual", palette = "Dark2") + | |
scale_x_continuous("Years from publication", expand = c(0, .02)) + | |
scale_y_continuous("Citations", expand = c(0, .15)) + | |
theme(legend.position = c(0, 1), | |
legend.justification = c(0, 1)) + | |
labs(title = "Citation trajectory of my articles") | |
ggsave(p1, filename = "./mk_traj_shift.jpg", width = 7, height = 4, scale = 1.2) | |
## Plot of each article's citation over time | |
p2 <- ggplot(data = mkcites, aes(x = year, y = ccite, color = pubtype)) + | |
geom_line(stat = "smooth", method = "lm", se = FALSE, | |
color = "gray60", size = 1.5, alpha = .85) + | |
geom_point(aes(group = pubid),size = 1, alpha = .75) + | |
geom_line(aes(group = pubid), size = .75, alpha = .75) + | |
theme_classic() + | |
scale_color_brewer("Article Type", type = "qual", palette = "Dark2") + | |
scale_x_continuous("", expand = c(0, .07)) + | |
scale_y_continuous("Citations", expand = c(0, .15)) + | |
theme(legend.position = c(0, 1), | |
legend.justification = c(0, 1)) + | |
labs(title = "Citation history over time") | |
ggsave(p2, filename = "./mk_traj.jpg", width = 7, height = 4, scale = 1.2) | |
## Same as p2 but aggregate by article type | |
p3 <- ggplot(data = mkcites, aes(x = year, y = ccite)) + | |
geom_point(aes(group = pubid), color = "grey75",size = 1, alpha = .25) + | |
geom_line(aes(group = pubid), color = "grey75", size = .75, alpha = .25) + | |
geom_line(stat = "smooth", method = "lm", se = FALSE, | |
aes(color = pubtype), size = 1.5, alpha = .9) + | |
theme_classic() + | |
scale_color_brewer("Article Type", type = "qual", palette = "Dark2") + | |
scale_x_continuous("", expand = c(0, .07)) + | |
scale_y_continuous("Citations", expand = c(0, .15), limits = c(0, 25)) + | |
theme(legend.position = c(0, 1), | |
legend.justification = c(0, 1)) + | |
labs(title = "Citation history, fitted by type, over time") | |
ggsave(p3, filename = "./mk_traj_lm.jpg", width = 7, height = 4, scale = 1.2) | |
## Same as p1 but again aggregate by article type | |
p4 <- ggplot(data = mkcites, aes(x = delta, y = ccite, color = pubtype)) + | |
geom_point(aes(group = pubid), color = "grey75", size = 1, alpha = .25) + | |
geom_line(aes(group = pubid), color = "grey75", size = .75, alpha = .25) + | |
geom_line(stat = "smooth", method = "lm", se = FALSE, | |
aes(color = pubtype), size = 1.5, alpha = .9) + | |
theme_classic() + | |
scale_color_brewer("Article Type", type = "qual", palette = "Dark2") + | |
scale_x_continuous("Years from publication", expand = c(0, .02)) + | |
scale_y_continuous("Citations", expand = c(0, .15)) + | |
theme(legend.position = c(0, 1), | |
legend.justification = c(0, 1)) + | |
labs(title = "Citation history, fitted by type, over article's age") | |
ggsave(p4, filename = "./mk_traj_shift_lm.jpg", width = 7, height = 4, scale = 1.2) | |
## Reshape dataframe for stacked barcharts | |
mkstack <- mkcites %>% | |
group_by(year, pubtype) %>% | |
summarize(total = sum(cites)) | |
mkstackc <- mkcites %>% | |
group_by(year, pubtype) %>% | |
summarize(total = sum(ccite)) | |
## Citations per year by group | |
p5 <- ggplot(data = mkstack, | |
aes(x = year, y = total, fill = pubtype)) + | |
geom_bar(stat="identity", alpha = .9) + | |
geom_hline(yintercept = seq(0, 65, 10), color = "white", alpha = .75) + | |
theme_classic() + | |
scale_fill_brewer("Article Type", type = "qual", palette = "Dark2") + | |
scale_x_continuous("", expand = c(0, .02), | |
breaks = 2010:2016, labels = 2010:2016) + | |
scale_y_continuous("Citations", expand = c(0, .15), | |
breaks = seq(0, 65, 10), labels = seq(0, 65, 10)) + | |
theme(legend.position = c(0, 1), | |
legend.justification = c(0, 1)) + | |
labs(title = "Citations per year by article type") | |
ggsave(p5, filename = "./mk_bar.jpg", width = 4, height = 4, scale = 1.2) | |
## Cumulative citations per year by group | |
p6 <- ggplot(data = mkstackc, | |
aes(x = year, y = total, fill = pubtype)) + | |
geom_bar(stat="identity", alpha = .9) + | |
geom_hline(yintercept = seq(0, 200, 25), color = "white", alpha = .75) + | |
theme_classic() + | |
scale_fill_brewer("Article Type", type = "qual", palette = "Dark2") + | |
scale_x_continuous("", expand = c(0, .02), | |
breaks = 2010:2016, labels = 2010:2016) + | |
scale_y_continuous("Cumulative citations", expand = c(0, .15), | |
breaks = seq(0, 200, 25), labels = seq(0, 200, 25)) + | |
theme(legend.position = c(0, 1), | |
legend.justification = c(0, 1)) + | |
labs(title = "Cumulative citations per year by article type") | |
ggsave(p6, filename = "./mk_bar_cum.jpg", width = 4, height = 4, scale = 1.2) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment