Skip to content

Instantly share code, notes, and snippets.

@mkiang
Created October 8, 2016 06:39
Show Gist options
  • Save mkiang/ea3fbe313bc1acc73a7b5f9c5615d855 to your computer and use it in GitHub Desktop.
Save mkiang/ea3fbe313bc1acc73a7b5f9c5615d855 to your computer and use it in GitHub Desktop.
A visual tour of my publications
## Code for this blog post:
## http://mathewkiang.com/2016/10/08/a-visual-tour-of-my-publications/
## Imports
library(RColorBrewer)
library(ggplot2)
library(dplyr)
library(devtools)
devtools::install_github("jkeirstead/scholar")
library(scholar)
## Helper function
get_all_article_history <- function (id, sleep = 0, return_pubs = FALSE,
debug = FALSE) {
## Get publications and unique articles (with citations)
pubs <- get_publications(id)
articles <- as.character(unique(pubs$pubid[pubs$cites > 0]))
npubs <- length(articles)
## Loop through each article and get citation history
holder <- NULL
i <- 1
for (pub in articles) {
if (debug) {
print(paste0("(", i, " of ", npubs, ") ", pub, " | ", Sys.time()))
}
x <- get_article_cite_history(id = id, article = pub)
holder <- rbind(holder, x)
if (sleep > 0) {
## Google Scholar doesn't have a real API. To avoid violating TOS,
## we space out requests by a specified number of minutes (20 - 30)
## and add some randomness. (abs() controls for unlikely event of
## negative number.)
sleeptime <- 60 * abs(rnorm(1, sleep, .25 * sleep))
Sys.sleep(sleeptime)
}
i <- i + 1
}
if (sum(pubs$cites == 0) > 0) {
holder <- rbind(holder, pubs[pubs$cites == 0,
c("year", "cites", "pubid")])
}
## Return both the author history and the citation history
if (return_pubs) {
return(list(pubs = pubs, cites = holder))
}
return(holder)
}
## Define author
mvk <- "eD9_J3wAAAAJ"
## Get publication and citation history
mk <- get_all_article_history(mvk, return_pubs = TRUE)
## Extract publications and then merge with the type of paper
## Note: All publications as of 10/6/2016
mkpubs <- mk$pubs
pubid <- c("d1gkVwhDpl0C", "u-x6o8ySG0sC", "qjMakFHDy7sC", "9yKSN-GCB0IC",
"YsMSGLbcyi4C", "Y0pCki6q_DkC", "zYLM7Y9cAGgC", "u5HHmVD_uO8C",
"Tyk-4Ss8FVUC", "eQOLeE2rZwMC", "IjCSPb-OGe4C", "2osOgNQ5qMEC",
"L_l9e5I586QC", "hqOjcs7Dif8C", "LkGwnXOMwfcC", "WF5omc3nYNoC",
"__bU50VfleQC", "roLk4NBRz8UC", "_FxGoFyzp5QC", "D_tqNUsBuKoC",
"tHtfpZlB6tUC", "c1e4I3QdEKYC")
ptypes <- c("MSM/Drugs", "MSM/Drugs", "MSM/Drugs", "HPM",
"Inequality", "Inequality", "HPM", "MSM/Drugs",
"MSM/Drugs", "Inequality", "MSM/Drugs", "MSM/Drugs",
"Inequality", "Inequality", "HPM", "HPM",
"DP", "Inequality", "Inequality", "Inequality",
"HPM", "HPM")
pubtypes <- data.frame(pubid = pubid, pubtype = ptypes)
## Clean up factors with more descriptive names
pubtypes$pubtype <- factor(pubtypes$pubtype,
levels = c("DP", "Inequality",
"HPM", "MSM/Drugs"),
labels = c("Digital Phenotyping",
"Health Inequalities",
"Health Policy and Management",
"MSM / HIV / Drugs"),
ordered = TRUE)
mkpubs <- merge(mkpubs, pubtypes, by = "pubid")
## Rename column for merging, then merge, and get years from publication
names(mkpubs)[grep(x = names(mkpubs), pattern = "year")] <- "pubyear"
mkcites <- mk$cites
mkcites <- merge(mkcites, select(mkpubs, pubid, pubyear, pubtype),
by = "pubid")
mkcites$delta <- ifelse(mkcites$year - mkcites$pubyear >= 0,
mkcites$year - mkcites$pubyear, 0)
## Get cumulative sum -- remove NAs
mkcites %<>%
group_by(pubid) %>%
arrange(year) %>%
mutate(ccite = cumsum(cites))
mkcites <- mkcites[!is.na(mkcites$pubyear), ]
## If delta doesn't start at zero, add it in.
for (id in unique(mkcites$pubid)) {
if (min(mkcites$delta[mkcites$pubid == id], na.rm = TRUE) != 0) {
pyear <- mkcites$pubyear[mkcites$pubid == id][1]
ptype <- mkcites$pubtype[mkcites$pubid == id][1]
## Can't just use c() below since pubid is a string
new_row <- data.frame(pubid = id, year = pyear, cites = 0,
pubyear = pyear, pubtype = ptype,
delta = 0, ccite = 0)
## tibbles handle rbinding strangely -- convert back and forth
mkcites <- as_data_frame(rbind(as.data.frame(mkcites), new_row))
}
}
## Feature image
p0 <- ggplot(data = mkcites, aes(x = year, y = ccite)) +
geom_line(stat = "smooth", method = "lm", se = FALSE,
color = "gray60", size = 1.5, alpha = .15) +
geom_point(aes(group = pubid),size = 1, alpha = .75, color = "grey30") +
geom_line(aes(group = pubid), size = .75, alpha = .75, color = "grey30") +
theme_classic() +
scale_x_continuous("", expand = c(0, .07)) +
scale_y_continuous("Citations", expand = c(0, .15)) +
labs(title = "Citation history over time")
ggsave(p0, filename = "./feature_img.jpg", width = 7, height = 4, scale = 1.2)
## Plot of each article's citations over age of article
p1 <- ggplot(data = mkcites, aes(x = delta, y = ccite, color = pubtype)) +
geom_line(stat = "smooth", method = "lm", se = FALSE,
color = "gray60", size = 1.5, alpha = .5) +
geom_point(aes(group = pubid),size = 1, alpha = .75) +
geom_line(aes(group = pubid), size = .75, alpha = .75) +
theme_classic() +
scale_color_brewer("Article Type", type = "qual", palette = "Dark2") +
scale_x_continuous("Years from publication", expand = c(0, .02)) +
scale_y_continuous("Citations", expand = c(0, .15)) +
theme(legend.position = c(0, 1),
legend.justification = c(0, 1)) +
labs(title = "Citation trajectory of my articles")
ggsave(p1, filename = "./mk_traj_shift.jpg", width = 7, height = 4, scale = 1.2)
## Plot of each article's citation over time
p2 <- ggplot(data = mkcites, aes(x = year, y = ccite, color = pubtype)) +
geom_line(stat = "smooth", method = "lm", se = FALSE,
color = "gray60", size = 1.5, alpha = .85) +
geom_point(aes(group = pubid),size = 1, alpha = .75) +
geom_line(aes(group = pubid), size = .75, alpha = .75) +
theme_classic() +
scale_color_brewer("Article Type", type = "qual", palette = "Dark2") +
scale_x_continuous("", expand = c(0, .07)) +
scale_y_continuous("Citations", expand = c(0, .15)) +
theme(legend.position = c(0, 1),
legend.justification = c(0, 1)) +
labs(title = "Citation history over time")
ggsave(p2, filename = "./mk_traj.jpg", width = 7, height = 4, scale = 1.2)
## Same as p2 but aggregate by article type
p3 <- ggplot(data = mkcites, aes(x = year, y = ccite)) +
geom_point(aes(group = pubid), color = "grey75",size = 1, alpha = .25) +
geom_line(aes(group = pubid), color = "grey75", size = .75, alpha = .25) +
geom_line(stat = "smooth", method = "lm", se = FALSE,
aes(color = pubtype), size = 1.5, alpha = .9) +
theme_classic() +
scale_color_brewer("Article Type", type = "qual", palette = "Dark2") +
scale_x_continuous("", expand = c(0, .07)) +
scale_y_continuous("Citations", expand = c(0, .15), limits = c(0, 25)) +
theme(legend.position = c(0, 1),
legend.justification = c(0, 1)) +
labs(title = "Citation history, fitted by type, over time")
ggsave(p3, filename = "./mk_traj_lm.jpg", width = 7, height = 4, scale = 1.2)
## Same as p1 but again aggregate by article type
p4 <- ggplot(data = mkcites, aes(x = delta, y = ccite, color = pubtype)) +
geom_point(aes(group = pubid), color = "grey75", size = 1, alpha = .25) +
geom_line(aes(group = pubid), color = "grey75", size = .75, alpha = .25) +
geom_line(stat = "smooth", method = "lm", se = FALSE,
aes(color = pubtype), size = 1.5, alpha = .9) +
theme_classic() +
scale_color_brewer("Article Type", type = "qual", palette = "Dark2") +
scale_x_continuous("Years from publication", expand = c(0, .02)) +
scale_y_continuous("Citations", expand = c(0, .15)) +
theme(legend.position = c(0, 1),
legend.justification = c(0, 1)) +
labs(title = "Citation history, fitted by type, over article's age")
ggsave(p4, filename = "./mk_traj_shift_lm.jpg", width = 7, height = 4, scale = 1.2)
## Reshape dataframe for stacked barcharts
mkstack <- mkcites %>%
group_by(year, pubtype) %>%
summarize(total = sum(cites))
mkstackc <- mkcites %>%
group_by(year, pubtype) %>%
summarize(total = sum(ccite))
## Citations per year by group
p5 <- ggplot(data = mkstack,
aes(x = year, y = total, fill = pubtype)) +
geom_bar(stat="identity", alpha = .9) +
geom_hline(yintercept = seq(0, 65, 10), color = "white", alpha = .75) +
theme_classic() +
scale_fill_brewer("Article Type", type = "qual", palette = "Dark2") +
scale_x_continuous("", expand = c(0, .02),
breaks = 2010:2016, labels = 2010:2016) +
scale_y_continuous("Citations", expand = c(0, .15),
breaks = seq(0, 65, 10), labels = seq(0, 65, 10)) +
theme(legend.position = c(0, 1),
legend.justification = c(0, 1)) +
labs(title = "Citations per year by article type")
ggsave(p5, filename = "./mk_bar.jpg", width = 4, height = 4, scale = 1.2)
## Cumulative citations per year by group
p6 <- ggplot(data = mkstackc,
aes(x = year, y = total, fill = pubtype)) +
geom_bar(stat="identity", alpha = .9) +
geom_hline(yintercept = seq(0, 200, 25), color = "white", alpha = .75) +
theme_classic() +
scale_fill_brewer("Article Type", type = "qual", palette = "Dark2") +
scale_x_continuous("", expand = c(0, .02),
breaks = 2010:2016, labels = 2010:2016) +
scale_y_continuous("Cumulative citations", expand = c(0, .15),
breaks = seq(0, 200, 25), labels = seq(0, 200, 25)) +
theme(legend.position = c(0, 1),
legend.justification = c(0, 1)) +
labs(title = "Cumulative citations per year by article type")
ggsave(p6, filename = "./mk_bar_cum.jpg", width = 4, height = 4, scale = 1.2)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment