Skip to content

Instantly share code, notes, and snippets.

@AndrewLJackson
Last active January 17, 2018 21:46
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save AndrewLJackson/646c2ed9edd474e84b85b832f9aa995c to your computer and use it in GitHub Desktop.
Save AndrewLJackson/646c2ed9edd474e84b85b832f9aa995c to your computer and use it in GitHub Desktop.
Code to calculate JDI from google scholar profile
# About: Code to calculate JDI and produce barplot as per
# https://scientistseessquirrel.wordpress.com/2017/08/03/my-journal-life-list/
# Additional code from
# https://www.r-bloggers.com/yet-another-post-on-google-scholar-data-analysis/
# Author: Andrew Jackson
# http://www.tcd.ie/Zoology/research/research/theoretical/andrewjackson.php
# Date: 04-Aug-2017
library(scholar)
library(tidyverse)
# = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
# Import data from google scholar and clean it per one's own
# requirements
# Your scholar id. This is for me, Andrew Jackson
# Andrew Jackson
my_id = "E0sB7fUAAAAJ"
# get all publications and convert automatically created factors
# back to characters and remove the blank journals which are MixSIAR and
# a report i wrote. I filter out blank journals containing "", and
# pre-prints on arXiv and R packages logged as being on CRAN as indicated by
# what i hope is a unique string "Comprehensive", and remove "Preprints".
my_publications <- get_publications(my_id, cstart = 0) %>%
mutate_if(is.factor, as.character) %>%
filter(journal != "") %>%
filter(!grepl("arXiv", journal)) %>%
filter(!grepl("Comprehensive", journal)) %>%
filter(!grepl("Preprints", journal))
# create a column of lower case only journal names for matching
my_publications$journal.lwr <- tolower(my_publications$journal)
# = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
# count by journal name and create journal names as a factor ordered by their
# count
journal_counts <- my_publications %>%
dplyr::count(journal.lwr, sort = TRUE) %>%
mutate(journal.lwr = ordered(journal.lwr,
levels = journal.lwr[sort(n,
decreasing = FALSE,
index.return = TRUE)$ix]))
# = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
# Calculate the JDI
# P is the total number of publications
P <- nrow(my_publications)
# J is the unique number of journals
J <- nrow(journal_counts)
# JDI is the ratio
JDI <- J / P
print(JDI)
# = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
# Calcuate Journal Entropy Index JEI suggested by runner24 in comments on
# https://scientistseessquirrel.wordpress.com/2017/08/03/my-journal-life-list/
p_i <- journal_counts$n / sum(journal_counts$n)
JEI <- -sum(p_i * log2(p_i))
# and scaled to be 0 <= JEI <= 1
JEIs <- -JEI / log2(1/sum(journal_counts$n))
print(round(c(P = P, J = J, JDI = JDI, JEI = JEI, JEIs = JEIs),2))
# = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
# plot the count table using geom_col() and flip it to make
# it horizontal
my_profile <- get_profile(my_id)
p1 <- ggplot(data = journal_counts, mapping = aes(x = journal.lwr, y = n)) +
geom_col() +
xlab("") +
ylab("Number of papers") +
ggtitle(paste0(my_profile$name, "'s JDI = ", round(JDI,2))) +
coord_flip()
print(p1)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment