Skip to content

Instantly share code, notes, and snippets.

@zhilongjia
Created September 7, 2015 23:27
Show Gist options
  • Save zhilongjia/6d8a7b22e54820720e12 to your computer and use it in GitHub Desktop.
Save zhilongjia/6d8a7b22e54820720e12 to your computer and use it in GitHub Desktop.
Period (days) needed for a manuscript to be accepted after receievd in journals of the area of bioinformatics
#! /usr/bin/Rscript
# This script is used for getting the period (days) needed for a manuscript to be
# accepted after receievd in journals of the area of bioinformatics.
# ref: http://www.r-bloggers.com/bioinformatics-journals-time-from-submission-to-acceptance-revisited/
################################################################################
# Some bioinformatics journals
titles <- c("Bioinformatics", "BMC Bioinformatics", "BMC Genomics",
"PLoS Computational Biology")
################################################################################
# Get XML files from pubmed
getJournalXML <- function(title) {
library(rentrez)
print (title)
term <- paste(title, "[JOUR]", sep = "")
term <- paste(term, 'AND ("2013/01/01"[Date - Publication] : "3000"[Date - Publication])')
e <- entrez_search("pubmed", term, usehistory = "y")
f <- entrez_fetch("pubmed", WebEnv = e$WebEnv, query_key = e$QueryKey,
rettype = "xml", retmax = e$count)
d <- xmlTreeParse(f, useInternalNodes = TRUE)
outfile <- paste(gsub(" ", "_", title), "xml", sep = ".")
saveXML(xmlRoot(d), outfile)
}
# saves XML files in current working directory
sapply(titles, function(x) getJournalXML(x))
################################################################################
# period
xml2day <- function(xmlfile){
library(XML)
journal_xml <- xmlParse(xmlfile)
################################################################################
received_year <- xpathSApply(journal_xml, "//PubmedData/History", function(x) {
if (xpathSApply(x, "boolean(./PubMedPubDate[@PubStatus='received']/Year)")) {
xpathSApply(x, "./PubMedPubDate[@PubStatus='received']/Year", xmlValue)
} else {
NA
}
})
received_month <- xpathSApply(journal_xml, "//PubmedData/History", function(x) {
if (xpathSApply(x, "boolean(./PubMedPubDate[@PubStatus='received']/Month)")) {
xpathSApply(x, "./PubMedPubDate[@PubStatus='received']/Month", xmlValue)
} else {
NA
}
})
received_month <- sapply(received_month, function(x) {
if (is.na(x)) return (NA)
else if (nchar(x)!=2) return (paste0(0,x))
else return (x)
}
)
received_day <- xpathSApply(journal_xml, "//PubmedData/History", function(x) {
if (xpathSApply(x, "boolean(./PubMedPubDate[@PubStatus='received']/Day)")) {
xpathSApply(x, "./PubMedPubDate[@PubStatus='received']/Day", xmlValue)
} else {
NA
}
})
received_day <- sapply(received_day, function(x) {
if (is.na(x)) return (NA)
else if (nchar(x)!=2) return (paste0(0,x))
else return (x)
}
)
################################################################################
accepted_year <- xpathSApply(journal_xml, "//PubmedData/History", function(x) {
if (xpathSApply(x, "boolean(./PubMedPubDate[@PubStatus='accepted']/Year)")) {
xpathSApply(x, "./PubMedPubDate[@PubStatus='accepted']/Year", xmlValue)
} else {
NA
}
})
accepted_month <- xpathSApply(journal_xml, "//PubmedData/History", function(x) {
if (xpathSApply(x, "boolean(./PubMedPubDate[@PubStatus='accepted']/Month)")) {
xpathSApply(x, "./PubMedPubDate[@PubStatus='accepted']/Month", xmlValue)
} else {
NA
}
})
accepted_month <- sapply(accepted_month, function(x) {
if (is.na(x)) return (NA)
else if (nchar(x)!=2) return (paste0(0,x))
else return (x)
}
)
accepted_day <- xpathSApply(journal_xml, "//PubmedData/History", function(x) {
if (xpathSApply(x, "boolean(./PubMedPubDate[@PubStatus='accepted']/Day)")) {
xpathSApply(x, "./PubMedPubDate[@PubStatus='accepted']/Day", xmlValue)
} else {
NA
}
})
accepted_day <- sapply(accepted_day, function(x) {
if (is.na(x)) return (NA)
else if (nchar(x)!=2) return (paste0(0,x))
else return (x)
}
)
# filter NA
ind <- which(!is.na(accepted_day))
ind1 <- which(!is.na(received_day))
ind <- intersect(ind, ind1)
received_year1 <- received_year[ind]
received_month1 <- received_month[ind]
received_day1 <- received_day[ind]
accepted_year1 <- accepted_year[ind]
accepted_month1 <- accepted_month[ind]
accepted_day1 <- accepted_day[ind]
days <- as.numeric(as.Date(paste(accepted_year1, accepted_month1, accepted_day1, sep="/") ) -
as.Date(paste(received_year1, received_month1, received_day1, sep="/") ) )
return (days)
}
xml_files <- paste(gsub(" ", "_", titles), "xml", sep = ".")
journal_days <- {}
for (i in 1:length(xml_files)) {
xmlfile <- xml_files[i]
journal <- titles[i]
journal_days[[journal]] <- xml2day(xmlfile)
}
################################################################################
# Visualization
boxplot(journal_days, notch=T, outline=FALSE)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment