Navigation Menu

Skip to content

Instantly share code, notes, and snippets.

@dreidpath
Created December 3, 2016 01:47
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save dreidpath/94b120eb3f1540d30d7948419a06e44c to your computer and use it in GitHub Desktop.
Save dreidpath/94b120eb3f1540d30d7948419a06e44c to your computer and use it in GitHub Desktop.
An R file for downloading PubMed database entry details for five medical journal's clinical case reports
library(RISmed)
library(data.table)
## Helper Functions
extractMesh <- function(mesh_frame, sep = '#'){
# This function takes the data frame of MESH headings from the Mesh() function and returns a vector of character strings
# where each character string is a set of unique MESH separated by 'sep'
mesh_str <- tolower(unique((as.character(unlist(mesh_frame["Heading"])))))
mesh_str <- paste(mesh_str, collapse = sep)
return (mesh_str)
}
## Search terms to extract case reports from the five journals
search_topics <- c('("N Engl J Med"[Journal]) AND (Case Reports[ptyp] AND "2000/09/05"[PDat] : "2016/09/04"[PDat])',
'("jama"[Journal]) AND (Case Reports[ptyp] AND "2000/09/05"[PDAT] : "2016/09/04"[PDAT])',
'("Lancet"[Journal]) AND (Case Reports[ptyp] AND "2000/09/05"[PDat] : "2016/09/04"[PDat])',
'("Br Med J"[Journal] OR "Br Med J (Clin Res Ed)"[Journal] OR "BMJ"[Journal]) AND (Case Reports[ptyp] AND "2000/09/05"[PDat] : "2016/09/02"[PDat])',
'("Ann Intern Med"[Journal]) AND (Case Reports[ptyp] AND "2000/09/05"[PDat] : "2016/09/02"[PDat])'
)
# Create an empty list to hold a list of dataframes
all_data <- list()
# Loop through each journal and get the article data
for(i in 1:length(search_topics)){
search_query <- EUtilsSummary(search_topics[i], retmax=2000)
records <- EUtilsGet(search_query)
tmp_data <- data.frame('pmid'=PMID(records),
'year'=YearPubmed(records),
'title'=tolower(ArticleTitle(records)),
'abstract'=tolower(AbstractText(records)),
'issn'=ISSN(records),
stringsAsFactors = F)
# Store the mesh headings as a string in the dataframe
tmp_data$mesh <- unlist(lapply(Mesh(records), extractMesh))
# Append the i_th queries data to a list of dataframes
all_data[[i]] <- tmp_data
}
# Create a single dataframe with the merged data
all_data <- rbindlist(all_data)
# Create a new set of variables tro sort out age and sex of the clinical cases
# does "male" appear in the MESH
all_data$sex_m <- grepl("#male", all_data$mesh) | grepl("^male", all_data$mesh)
# does "female" appear in the MESH
all_data$sex_f <- grepl("#female", all_data$mesh) | grepl("^female", all_data$mesh)
# does "child" appear in the MESH
all_data$age_child <- grepl("#child", all_data$mesh) | grepl("^child", all_data$mesh)
# does "adult" appear in the MESH
all_data$age_adult <- grepl("#adult", all_data$mesh) | grepl("^adult", all_data$mesh)
# does "human" appear in the MESH
all_data$human <- grepl("#human", all_data$mesh) | grepl("^human", all_data$mesh)
# Create a new variable representing each journal (i.e., based on ISSN)
all_data$journal <- NA
all_data$journal[all_data$issn == "0003-4819"] <- "AnnInternMed"
all_data$journal[all_data$issn == "1539-3704"] <- "AnnInternMed"
all_data$journal[all_data$issn == "1756-1833"] <- "BMJ"
all_data$journal[all_data$issn == "0959-8138"] <- "BMJ"
all_data$journal[all_data$issn == "1538-3598"] <- "JAMA"
all_data$journal[all_data$issn == "0098-7484"] <- "JAMA"
all_data$journal[all_data$issn == "1474-547X"] <- "Lancet"
all_data$journal[all_data$issn == "0140-6736"] <- "Lancet"
all_data$journal[all_data$issn == "1533-4406"] <- "NEJM"
all_data$journal[all_data$issn == "0028-4793"] <- "NEJM"
# Create a new variable representing article with only male or only female
all_data$gender <- NA
all_data$gender[all_data$sex_m == T & all_data$sex_f ==F] <- 0
all_data$gender[all_data$sex_m == F & all_data$sex_f ==T] <- 1
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment