Created
September 22, 2018 11:58
-
-
Save ikashnitsky/3133422ef85ff3f3d65be9926d6bd990 to your computer and use it in GitHub Desktop.
Get all SCImago Journal & Country Rank data (script version of sjrdata package)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
################################################################################ | |
# | |
# bibliometrics 2018-09-22 | |
# SJR data -- load and combine all files -- GIST | |
# Ilya Kashnitsky, ilya.kashnitsky@gmail.com | |
# | |
################################################################################ | |
library(tidyverse) | |
library(janitor) | |
library(readxl) | |
# functions to get data --------------------------------------------------- | |
journal_url <- function(year) { | |
paste0( | |
"https://www.scimagojr.com/journalrank.php?year=", | |
year, | |
"&out=xls" | |
) | |
} | |
country_url <- function(year) { | |
paste0( | |
"https://www.scimagojr.com/countryrank.php?year=", | |
year, | |
"&out=xls" | |
) | |
} | |
# set last available year ------------------------------------------------- | |
# As of today, 2018-09-22, the last data year is 2017 | |
# TO be updated once a year | |
last_year <- 2017 | |
# get and tidy the data -- journals --------------------------------------- | |
years_j <- 1999:last_year | |
df_jr <- list() | |
for (i in seq_along(years_j)) { | |
# load specific year's data | |
dfi <- suppressMessages(suppressWarnings( | |
read_csv2(url(journal_url(years_j[i]))) | |
)) %>% clean_names() | |
# fix the uniquiely named column of total docs | |
colnames(dfi)[9] <- | |
colnames(dfi)[9] %>% | |
str_replace("[0-9]+", "year") | |
# write the temp df into the list | |
df_jr[[i]] <- dfi | |
# name the df in the list | |
names(df_jr)[i] <- years_j[i] | |
} | |
df_jr <- df_jr %>% bind_rows(.id = "year") | |
# consider saving the data! | |
# save(df_jr, file = "df_jr.rda") | |
# get and tidy the data -- countries year-by-year ------------------------- | |
years_c <- 1996:last_year | |
df_cr <- list() | |
for (i in seq_along(years_c)) { | |
tempi <- tempfile() | |
pathi <- paste0(tempi, ".xslx") | |
xlsxi <- download.file( | |
url = country_url(years_c[i]), | |
destfile = pathi, mode = "wb" | |
) | |
# load specific year's data | |
dfi <- suppressMessages(suppressWarnings( | |
read_xlsx(path = pathi, sheet = 1) | |
)) %>% clean_names() | |
# write the temp df into the list | |
df_cr[[i]] <- dfi | |
# name the df in the list | |
names(df_cr)[i] <- years_c[i] | |
} | |
df_cr <- df_cr %>% bind_rows(.id = "year") | |
# consider saving the data! | |
# save(df_cr, file = "df_cr.rda") | |
# get and tidy the data -- countries all years togetehr -------------------- | |
tempi <- tempfile() | |
pathi <- paste0(tempi, ".xslx") | |
xlsxi <- download.file( | |
url = country_url( | |
"https://www.scimagojr.com/countryrank.php?out=xls" | |
), | |
destfile = pathi, mode = "wb" | |
) | |
assign( | |
paste0("df_cr_1996_", last_year), | |
read_xlsx(pathi) %>% clean_names() | |
) | |
# consider saving the data! | |
# save(get(paste0("df_cr_1996_", last_year)), | |
# file = paste0("df_cr_1996_", last_year, ".rda")) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment