-
-
Save freshbiostats/2de46d0c7a59cbf54ad0 to your computer and use it in GitHub Desktop.
Analysis of PubMed search results using R
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(RISmed) | |
library(ggplot2) | |
query <- "(exome OR whole OR deep OR high-throughput OR (next AND generation) OR (massively AND parallel)) AND sequencing" | |
ngs_search <- EUtilsSummary(query, type="esearch",db = "pubmed",mindate=1980, maxdate=2013, retmax=30000) | |
QueryCount(ngs_search) | |
ngs_records <- EUtilsGet(ngs_search) | |
years <- Year(ngs_records) | |
ngs_pubs_count <- as.data.frame(table(years)) | |
total <- NULL | |
for (i in 1980:2013){ | |
peryear <- EUtilsSummary("", type="esearch", db="pubmed", mindate=i, maxdate=i) | |
total[i] <- QueryCount(peryear) | |
} | |
year <- 1980:2013 | |
total_pubs_count<- as.data.frame(cbind(year,total[year])) | |
names(total_pubs_count) <- c("year","Total_publications") | |
names(ngs_pubs_count) <- c("year","NGS_publications") | |
pubs_year <- merge(ngs_pubs_count,total_pubs_count,by="year") | |
pubs_year$NGS_publications_normalized <- pubs_year$NGS_publications *100000 / pubs_year$Total_publications | |
write.table(pubs_year,"NGS_publications_per_year.txt",quote=F,sep="\t",row.names=F) | |
journal <- MedlineTA(ngs_records) | |
ngs_journal_count <- as.data.frame(table(journal)) | |
ngs_journal_count_top25 <- ngs_journal_count[order(-ngs_journal_count[,2]),][1:25,] | |
journal_names <- paste(ngs_journal_count_top25$journal,"[jo]",sep="") | |
total_journal <- NULL | |
for (i in journal_names){ | |
perjournal <- EUtilsSummary(i, type='esearch', db='pubmed',mindate=1980, maxdate=2013) | |
total_journal[i] <- QueryCount(perjournal) | |
} | |
journal_ngs_total <- cbind(ngs_journal_count_top25,total_journal) | |
names(journal_ngs_total) <- c("journal","NGS_publications","Total_publications") | |
journal_ngs_total$NGS_publications_normalized <- journal_ngs_total$NGS_publications / journal_ngs_total$Total_publications | |
write.table(journal_ngs_total,"NGS_publications_per_journal.txt",quote=F,sep="\t",row.names=F) | |
pubs_per_year <- read.table("NGS_publications_per_year.txt",header = T,sep="\t") | |
pubs_per_journal <- read.table("NGS_publications_per_journal.txt",header = T,sep="\t") | |
ggplot(pubs_per_year,aes(year, NGS_publications_normalized)) + geom_line (colour="blue",size=2) + | |
xlab("Year") + | |
ylab("NGS/100000 articles")+ | |
ggtitle("NGS PubMed articles") | |
ggplot(pubs_per_journal,aes(journal, NGS_publications,fill=journal)) + geom_bar(stat="identity")+ | |
coord_flip()+ | |
theme(legend.position="none") | |
ggplot(pubs_per_journal ,aes(journal, NGS_publications_normalized,fill=journal)) + geom_bar(stat="identity")+ | |
coord_flip()+ | |
theme(legend.position="none") | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Great tutorial - I have noted that the
Year
function doesn't appear to work. Have to uesyears <- YearPubmed(ngs_records)
now instead.