Skip to content

Instantly share code, notes, and snippets.

@dgrapov
Last active December 19, 2015 00:48
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save dgrapov/5871064 to your computer and use it in GitHub Desktop.
Save dgrapov/5871064 to your computer and use it in GitHub Desktop.
Check for key word in PubMed article titles
#Check pubmed article titles for a given year for a keyword (using partial matching).
library(XML)
library(stringr)
#get PubMed Ids for all journals for a given year
getPubMedIds<-function(year=2013, max=100){
#max = maximum results to return
url<-paste0("http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=pubmed&term=",year,"[PDAT]&RetMax=",max)
tmp<-readLines(url)
doc<-xmlParse(tmp)
unlist(xmlToList(doc)$IdList)
}
# get article Titles
getArticleTitle<-function(id){
unlist(sapply(1:length(id), function(i,pb = txtProgressBar(min = 0, max = length(id), style = 3)){
url<-paste0("http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&id=",id[i],"&retmode=XML&rettype=abstract")
setTxtProgressBar(pb, i)
tmp<-readLines(url)
xml.data <- xmlParse(tmp)
xmlToList(xml.data)$PubmedArticle$MedlineCitation$Article$ArticleTitle
}))
}
# Check title for query term and return term
checkGetTerm<-function(obj,term){
#obj is the object to check as a character vector
#term is the query to do fuzzy matching on
unlist(sapply(1:length(obj), function(i){
tmp<-unlist(strsplit(obj[i]," "))
id<-c(1:length(tmp))[!is.na(str_extract(tmp, term))]
tmp[id]
}))
}
#EXAMPLE
#--------------------
#Get Article IDs
id<-getPubMedIds(year=2013, max=100)
#get article titles
obj<-getArticleTitle(id)
#check for term
checkGetTerm(obj,term="omics")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment