Skip to content

Instantly share code, notes, and snippets.

@freshbiostats
Last active September 13, 2021 10:22
Show Gist options
  • Star 9 You must be signed in to star a gist
  • Fork 9 You must be signed in to fork a gist
  • Save freshbiostats/2de46d0c7a59cbf54ad0 to your computer and use it in GitHub Desktop.
Save freshbiostats/2de46d0c7a59cbf54ad0 to your computer and use it in GitHub Desktop.
Analysis of PubMed search results using R
library(RISmed)
library(ggplot2)
query <- "(exome OR whole OR deep OR high-throughput OR (next AND generation) OR (massively AND parallel)) AND sequencing"
ngs_search <- EUtilsSummary(query, type="esearch",db = "pubmed",mindate=1980, maxdate=2013, retmax=30000)
QueryCount(ngs_search)
ngs_records <- EUtilsGet(ngs_search)
years <- Year(ngs_records)
ngs_pubs_count <- as.data.frame(table(years))
total <- NULL
for (i in 1980:2013){
peryear <- EUtilsSummary("", type="esearch", db="pubmed", mindate=i, maxdate=i)
total[i] <- QueryCount(peryear)
}
year <- 1980:2013
total_pubs_count<- as.data.frame(cbind(year,total[year]))
names(total_pubs_count) <- c("year","Total_publications")
names(ngs_pubs_count) <- c("year","NGS_publications")
pubs_year <- merge(ngs_pubs_count,total_pubs_count,by="year")
pubs_year$NGS_publications_normalized <- pubs_year$NGS_publications *100000 / pubs_year$Total_publications
write.table(pubs_year,"NGS_publications_per_year.txt",quote=F,sep="\t",row.names=F)
journal <- MedlineTA(ngs_records)
ngs_journal_count <- as.data.frame(table(journal))
ngs_journal_count_top25 <- ngs_journal_count[order(-ngs_journal_count[,2]),][1:25,]
journal_names <- paste(ngs_journal_count_top25$journal,"[jo]",sep="")
total_journal <- NULL
for (i in journal_names){
perjournal <- EUtilsSummary(i, type='esearch', db='pubmed',mindate=1980, maxdate=2013)
total_journal[i] <- QueryCount(perjournal)
}
journal_ngs_total <- cbind(ngs_journal_count_top25,total_journal)
names(journal_ngs_total) <- c("journal","NGS_publications","Total_publications")
journal_ngs_total$NGS_publications_normalized <- journal_ngs_total$NGS_publications / journal_ngs_total$Total_publications
write.table(journal_ngs_total,"NGS_publications_per_journal.txt",quote=F,sep="\t",row.names=F)
pubs_per_year <- read.table("NGS_publications_per_year.txt",header = T,sep="\t")
pubs_per_journal <- read.table("NGS_publications_per_journal.txt",header = T,sep="\t")
ggplot(pubs_per_year,aes(year, NGS_publications_normalized)) + geom_line (colour="blue",size=2) +
xlab("Year") +
ylab("NGS/100000 articles")+
ggtitle("NGS PubMed articles")
ggplot(pubs_per_journal,aes(journal, NGS_publications,fill=journal)) + geom_bar(stat="identity")+
coord_flip()+
theme(legend.position="none")
ggplot(pubs_per_journal ,aes(journal, NGS_publications_normalized,fill=journal)) + geom_bar(stat="identity")+
coord_flip()+
theme(legend.position="none")
@jalapic
Copy link

jalapic commented Oct 4, 2015

Great tutorial - I have noted that the Year function doesn't appear to work. Have to ues years <- YearPubmed(ngs_records) now instead.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment