Last active
January 17, 2018 21:46
-
-
Save AndrewLJackson/646c2ed9edd474e84b85b832f9aa995c to your computer and use it in GitHub Desktop.
Code to calculate JDI from google scholar profile
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# About: Code to calculate JDI and produce barplot as per | |
# https://scientistseessquirrel.wordpress.com/2017/08/03/my-journal-life-list/ | |
# Additional code from | |
# https://www.r-bloggers.com/yet-another-post-on-google-scholar-data-analysis/ | |
# Author: Andrew Jackson | |
# http://www.tcd.ie/Zoology/research/research/theoretical/andrewjackson.php | |
# Date: 04-Aug-2017 | |
library(scholar) | |
library(tidyverse) | |
# = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = | |
# Import data from google scholar and clean it per one's own | |
# requirements | |
# Your scholar id. This is for me, Andrew Jackson | |
# Andrew Jackson | |
my_id = "E0sB7fUAAAAJ" | |
# get all publications and convert automatically created factors | |
# back to characters and remove the blank journals which are MixSIAR and | |
# a report i wrote. I filter out blank journals containing "", and | |
# pre-prints on arXiv and R packages logged as being on CRAN as indicated by | |
# what i hope is a unique string "Comprehensive", and remove "Preprints". | |
my_publications <- get_publications(my_id, cstart = 0) %>% | |
mutate_if(is.factor, as.character) %>% | |
filter(journal != "") %>% | |
filter(!grepl("arXiv", journal)) %>% | |
filter(!grepl("Comprehensive", journal)) %>% | |
filter(!grepl("Preprints", journal)) | |
# create a column of lower case only journal names for matching | |
my_publications$journal.lwr <- tolower(my_publications$journal) | |
# = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = | |
# count by journal name and create journal names as a factor ordered by their | |
# count | |
journal_counts <- my_publications %>% | |
dplyr::count(journal.lwr, sort = TRUE) %>% | |
mutate(journal.lwr = ordered(journal.lwr, | |
levels = journal.lwr[sort(n, | |
decreasing = FALSE, | |
index.return = TRUE)$ix])) | |
# = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = | |
# Calculate the JDI | |
# P is the total number of publications | |
P <- nrow(my_publications) | |
# J is the unique number of journals | |
J <- nrow(journal_counts) | |
# JDI is the ratio | |
JDI <- J / P | |
print(JDI) | |
# = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = | |
# Calcuate Journal Entropy Index JEI suggested by runner24 in comments on | |
# https://scientistseessquirrel.wordpress.com/2017/08/03/my-journal-life-list/ | |
p_i <- journal_counts$n / sum(journal_counts$n) | |
JEI <- -sum(p_i * log2(p_i)) | |
# and scaled to be 0 <= JEI <= 1 | |
JEIs <- -JEI / log2(1/sum(journal_counts$n)) | |
print(round(c(P = P, J = J, JDI = JDI, JEI = JEI, JEIs = JEIs),2)) | |
# = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = | |
# plot the count table using geom_col() and flip it to make | |
# it horizontal | |
my_profile <- get_profile(my_id) | |
p1 <- ggplot(data = journal_counts, mapping = aes(x = journal.lwr, y = n)) + | |
geom_col() + | |
xlab("") + | |
ylab("Number of papers") + | |
ggtitle(paste0(my_profile$name, "'s JDI = ", round(JDI,2))) + | |
coord_flip() | |
print(p1) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment