Skip to content

Instantly share code, notes, and snippets.

@lindenb
Created September 24, 2010 21:42
Show Gist options
  • Save lindenb/596106 to your computer and use it in GitHub Desktop.
Save lindenb/596106 to your computer and use it in GitHub Desktop.
/**
* Author:
* Pierre Lindenbaum PhD
* plindenbaum@yahoo.fr
*
* Compile
javac PubmedPerYear.java
* Execute:
java PubmedPerYear "Wikipedia"
2005 1
2006 3
2007 9
2008 15
2009 17
2010 13
*/
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.URL;
import java.net.URLEncoder;
import java.util.Map;
import java.util.TreeMap;
import javax.xml.stream.XMLEventReader;
import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLOutputFactory;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.XMLStreamWriter;
import javax.xml.stream.events.XMLEvent;
/**
* PubmedGender
*/
public class PubmedPerYear
{
private String query="";
private PubmedPerYear()
{
}
private XMLEventReader newReader(URL url) throws IOException,XMLStreamException
{
XMLInputFactory f= XMLInputFactory.newInstance();
f.setProperty(XMLInputFactory.IS_COALESCING, Boolean.TRUE);
f.setProperty(XMLInputFactory.IS_NAMESPACE_AWARE,Boolean.FALSE);
f.setProperty(XMLInputFactory.IS_REPLACING_ENTITY_REFERENCES,Boolean.TRUE);
f.setProperty(XMLInputFactory.IS_VALIDATING,Boolean.FALSE);
f.setProperty(XMLInputFactory.SUPPORT_DTD,Boolean.FALSE);
XMLEventReader reader=f.createXMLEventReader(url.openStream());
return reader;
}
private void run() throws Exception
{
Map<Integer,Integer> year2count=new TreeMap<Integer,Integer>();
URL url= new URL(
"http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=pubmed&term="+
URLEncoder.encode(this.query, "UTF-8")+
"&retstart=0&retmax=0&usehistory=y&retmode=xml&email=plindenbaum_at_yahoo.fr&tool=gender");
XMLEventReader reader= newReader(url);
XMLEvent evt;
String QueryKey=null;
String WebEnv=null;
int count=-1;
while(!(evt=reader.nextEvent()).isEndDocument())
{
if(!evt.isStartElement()) continue;
String tag= evt.asStartElement().getName().getLocalPart();
if(tag.equals("QueryKey"))
{
QueryKey= reader.getElementText().trim();
}
else if(tag.equals("WebEnv"))
{
WebEnv= reader.getElementText().trim();
}
else if(tag.equals("Count") && count==-1)
{
count=Integer.parseInt(reader.getElementText());
}
}
reader.close();
if(count>0)
{
url= new URL("http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&WebEnv="+
URLEncoder.encode(WebEnv,"UTF-8")+
"&query_key="+URLEncoder.encode(QueryKey,"UTF-8")+
"&retmode=xml&retmax="+count+"&email=plindenbaum_at_yahoo.fr&tool=mail");
reader= newReader(url);
while(reader.hasNext())
{
evt=reader.nextEvent();
if(!evt.isStartElement()) continue;
String tagName= evt.asStartElement().getName().getLocalPart();
if(!tagName.equals("DateCreated")) continue;
while(reader.hasNext())
{
evt=reader.nextEvent();
if(evt.isStartElement())
{
String localName=evt.asStartElement().getName().getLocalPart();
if(localName.equals("Year"))
{
try
{
int year=Integer.parseInt(reader.getElementText());
Integer num=year2count.get(year);
if(num==null) num=0;
num++;
year2count.put(year,num);
}
catch(Exception numerr)
{
System.err.println(numerr.getMessage());
}
}
}
else if(evt.isEndElement())
{
if(evt.asEndElement().getName().getLocalPart().equals("DateCreated"))
{
break;
}
}
}
}
reader.close();
}
for(Integer year:year2count.keySet())
{
System.out.println(""+year+"\t"+year2count.get(year));
}
}
public static void main(String[] args)
{
try
{
PubmedPerYear app=new PubmedPerYear();
int optind=0;
app.query="";
while(optind< args.length)
{
if(!app.query.isEmpty()) app.query+=" ";
app.query+=args[optind++];
}
app.query=app.query.trim();
if(app.query.trim().isEmpty())
{
System.err.println("Query is empty");
return;
}
app.run();
}
catch (Exception e)
{
e.printStackTrace();
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment