-
-
Save dreidpath/94b120eb3f1540d30d7948419a06e44c to your computer and use it in GitHub Desktop.
An R file for downloading PubMed database entry details for five medical journal's clinical case reports
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(RISmed) | |
library(data.table) | |
## Helper Functions | |
extractMesh <- function(mesh_frame, sep = '#'){ | |
# This function takes the data frame of MESH headings from the Mesh() function and returns a vector of character strings | |
# where each character string is a set of unique MESH separated by 'sep' | |
mesh_str <- tolower(unique((as.character(unlist(mesh_frame["Heading"]))))) | |
mesh_str <- paste(mesh_str, collapse = sep) | |
return (mesh_str) | |
} | |
## Search terms to extract case reports from the five journals | |
search_topics <- c('("N Engl J Med"[Journal]) AND (Case Reports[ptyp] AND "2000/09/05"[PDat] : "2016/09/04"[PDat])', | |
'("jama"[Journal]) AND (Case Reports[ptyp] AND "2000/09/05"[PDAT] : "2016/09/04"[PDAT])', | |
'("Lancet"[Journal]) AND (Case Reports[ptyp] AND "2000/09/05"[PDat] : "2016/09/04"[PDat])', | |
'("Br Med J"[Journal] OR "Br Med J (Clin Res Ed)"[Journal] OR "BMJ"[Journal]) AND (Case Reports[ptyp] AND "2000/09/05"[PDat] : "2016/09/02"[PDat])', | |
'("Ann Intern Med"[Journal]) AND (Case Reports[ptyp] AND "2000/09/05"[PDat] : "2016/09/02"[PDat])' | |
) | |
# Create an empty list to hold a list of dataframes | |
all_data <- list() | |
# Loop through each journal and get the article data | |
for(i in 1:length(search_topics)){ | |
search_query <- EUtilsSummary(search_topics[i], retmax=2000) | |
records <- EUtilsGet(search_query) | |
tmp_data <- data.frame('pmid'=PMID(records), | |
'year'=YearPubmed(records), | |
'title'=tolower(ArticleTitle(records)), | |
'abstract'=tolower(AbstractText(records)), | |
'issn'=ISSN(records), | |
stringsAsFactors = F) | |
# Store the mesh headings as a string in the dataframe | |
tmp_data$mesh <- unlist(lapply(Mesh(records), extractMesh)) | |
# Append the i_th queries data to a list of dataframes | |
all_data[[i]] <- tmp_data | |
} | |
# Create a single dataframe with the merged data | |
all_data <- rbindlist(all_data) | |
# Create a new set of variables tro sort out age and sex of the clinical cases | |
# does "male" appear in the MESH | |
all_data$sex_m <- grepl("#male", all_data$mesh) | grepl("^male", all_data$mesh) | |
# does "female" appear in the MESH | |
all_data$sex_f <- grepl("#female", all_data$mesh) | grepl("^female", all_data$mesh) | |
# does "child" appear in the MESH | |
all_data$age_child <- grepl("#child", all_data$mesh) | grepl("^child", all_data$mesh) | |
# does "adult" appear in the MESH | |
all_data$age_adult <- grepl("#adult", all_data$mesh) | grepl("^adult", all_data$mesh) | |
# does "human" appear in the MESH | |
all_data$human <- grepl("#human", all_data$mesh) | grepl("^human", all_data$mesh) | |
# Create a new variable representing each journal (i.e., based on ISSN) | |
all_data$journal <- NA | |
all_data$journal[all_data$issn == "0003-4819"] <- "AnnInternMed" | |
all_data$journal[all_data$issn == "1539-3704"] <- "AnnInternMed" | |
all_data$journal[all_data$issn == "1756-1833"] <- "BMJ" | |
all_data$journal[all_data$issn == "0959-8138"] <- "BMJ" | |
all_data$journal[all_data$issn == "1538-3598"] <- "JAMA" | |
all_data$journal[all_data$issn == "0098-7484"] <- "JAMA" | |
all_data$journal[all_data$issn == "1474-547X"] <- "Lancet" | |
all_data$journal[all_data$issn == "0140-6736"] <- "Lancet" | |
all_data$journal[all_data$issn == "1533-4406"] <- "NEJM" | |
all_data$journal[all_data$issn == "0028-4793"] <- "NEJM" | |
# Create a new variable representing article with only male or only female | |
all_data$gender <- NA | |
all_data$gender[all_data$sex_m == T & all_data$sex_f ==F] <- 0 | |
all_data$gender[all_data$sex_m == F & all_data$sex_f ==T] <- 1 | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment