Skip to content

Instantly share code, notes, and snippets.

@leipzig
Created January 19, 2012 02:19
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save leipzig/1637248 to your computer and use it in GitHub Desktop.
Save leipzig/1637248 to your computer and use it in GitHub Desktop.
When can we expect the last damn microarray?
library("plyr")
library("XML")
library("ggplot2")
#Concatenate SQL-style
concat<-function(...,sep="",collapse=NULL){
strings<-list(...)
#NULL, NA
if(
all(unlist(llply(strings,length))>0)
&&
all(!is.na(unlist(strings)))
){
do.call("paste", c(strings, list(sep = sep, collapse = collapse)))
}else{
NULL
}
}
getCount<-function(term){function(year){
nihUrl<-concat("http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=pubmed&term=",term,"+",year,"[pdat]")
#cleanurl<-gsub('\\]','%5D',gsub('\\[','%5B',x=url))
#http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=pubmed&term=microarray%5btitle%5d+2003%5bpdat%5d
xml<-xmlTreeParse(URLencode(nihUrl),isURL=TRUE)
#Data Mashups in R, pg17
as.numeric(xmlValue(xml$doc$children$eSearchResult$children$Count$children$text))
}}
years<-1995:2011
df<-data.frame(type="obs",year=years,
mic=sapply(years,function(x){do.call(getCount('microarray[title]'),list(x))}),
ngs=sapply(years,function(x){do.call(getCount('"next generation sequencing"[title] OR "high-throughput sequencing"[title]'),list(x))})
)
#97 is a fair start
df<-subset(df,year>=1997)
mdf<-melt(df,id.vars=c("type","year"),variable_name="citation")
c<-ggplot(mdf,aes(x=year))
p<-c+geom_point(aes(y=value,color=citation),size=3) +
ylab("papers") +
stat_smooth(aes(y=value,color=citation),data=subset(mdf,citation=="mic"),method="loess") +
scale_x_continuous(breaks=seq(from=1997,to=2011,by=2))
print(p)
#Return 0 for negative elements
# noNeg(c(3,2,1,0,-1,-2,2))
# [1] 3 2 1 0 0 0 2
noNeg<-function(v){sapply(v,function(x){max(x,0)})}
#Return up to the first negative/zero element inclusive
# toZeroNoNeg(c(3,2,1,0,-1,-2,2))
# [1] 3 2 1 0
toZeroNoNeg<-function(v){noNeg(v)[1:firstZero(noNeg(v))]}
#return index of first zero
firstZero<-function(v){which(noNeg(v)==0)[1]}
#let's peer into the future
df.lo.mic<-loess(mic ~ year,df,control=loess.control(surface="direct"))
#when will it stop?
mic_predict<-as.integer(predict(df.lo.mic,data.frame(year=2012:2020),se=FALSE))
zero_year<-2011+firstZero(mic_predict)
cat(concat("LOESS projects ",sum(toZeroNoNeg(mic_predict))," more damn microarray papers."))
cat(concat("The last damn microarray paper is projected to be in ",(zero_year-1),"."))
#predict ngs growth
df.lo.ngs<-loess(ngs ~ year,df,control=loess.control(surface="direct"))
ngs_predict<-as.integer(predict(df.lo.ngs,data.frame(year=2012:zero_year),se=FALSE))
pred_df<-data.frame(type="pred",year=c(2012:zero_year),mic=toZeroNoNeg(mic_predict),ngs=ngs_predict)
df2<-rbind(df,pred_df)
mdf2<-melt(df2,id.vars=c("type","year"),variable_name="citation")
c2<-ggplot(mdf2,aes(x=year))
p2<-c2+geom_point(aes(y=value,color=citation,shape=type),size=3) +
ylab("papers") +
scale_y_continuous(breaks=seq(from=0,to=1600,by=200))+
scale_x_continuous(breaks=seq(from=1997,to=zero_year,by=2))
print(p2)
@leipzig
Copy link
Author

leipzig commented Jan 10, 2013

thanks for your tips.

I have created this repository that includes the Data Mashups in R code using the wretched Yahoo! BOSS API. This includes the supplemental census and shapefiles that have gone missing:
https://github.com/leipzig/datamashupsinr

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment