Skip to content

Instantly share code, notes, and snippets.

@lindenb
Created July 19, 2011 22:03
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save lindenb/1093853 to your computer and use it in GitHub Desktop.
Save lindenb/1093853 to your computer and use it in GitHub Desktop.
SVG Timeline from http://data.bnf.fr
/**
* Author:
* Pierre Lindenbaum PhD
* Date:
* July-2011
* Contact:
* plindenbaum@yahoo.fr
* Reference:
*
* WWW:
* http://plindenbaum.blogspot.com
* Motivation:
* timeline from http://data.bnf.fr
*
*/
import java.awt.Dimension;
import java.awt.image.BufferedImage;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.StringReader;
import java.net.URL;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.logging.Logger;
import javax.imageio.ImageIO;
import javax.imageio.ImageReader;
import javax.imageio.stream.ImageInputStream;
import javax.xml.XMLConstants;
import javax.xml.namespace.NamespaceContext;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.stream.XMLOutputFactory;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.XMLStreamWriter;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpression;
import javax.xml.xpath.XPathFactory;
import org.w3c.dom.Attr;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;
import org.w3c.tidy.Tidy;
import org.xml.sax.EntityResolver;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
public class BNFTimeline
{
static final int ICON_SIZE=64;
static final int AUTHOR_HEIGHT=ICON_SIZE+12;
static final int MARGIN=5;
private static final String SVG="http://www.w3.org/2000/svg";
private static final String CubicWeb="http://www.logilab.org/2008/cubicweb";
private static final String HTML="http://www.w3.org/1999/xhtml";
private static Logger LOG=Logger.getLogger(BNFTimeline.class.getName());
private DocumentBuilder docBuilder=null;
private XPath xpath=null;
private Map<String, String> prefix2uri=new HashMap<String, String>();
private Double minDays=null;
private Double maxDays=null;
private static class Date implements Comparable<Date>
{
String literal;
int year;
Integer month;
Integer day;
@Override
public int compareTo(Date o)
{
double d= days()-o.days();
if(d!=0.0) return d<0?-1:1;
return 0;
}
public double days()
{
double v= year*365.25;
if(month!=null)
{
v+= (365.25/12.0)*month;
if(day!=null)
{
v+=day;
}
}
return v;
}
}
private class Author
{
String url;
String name;
String birthPlace;
Date birthDate;
String deathPlace;
Date deathDate;
String gender;
String shortBio;
String depiction;
Dimension iconSize;
int y;
public double x1()
{
return convertDate2Pixel(birthDate);
}
public double x2()
{
return convertDate2Pixel(deathDate);
}
void writeXML(XMLStreamWriter w) throws XMLStreamException
{
w.writeStartElement("a");
w.writeAttribute("xlink:href", this.url);
w.writeAttribute("xlink:target","_blank");
w.writeStartElement("g");
w.writeAttribute("title",String.valueOf(name));
w.writeAttribute("transform", "translate("+x1()+","+(MARGIN+y*(AUTHOR_HEIGHT+MARGIN))+")");
w.writeStartElement("rect");
w.writeAttribute("style", "fill:black;stroke:white;");
w.writeAttribute("height", String.valueOf(AUTHOR_HEIGHT));
w.writeAttribute("width", String.valueOf(x2()-x1()));
w.writeEndElement();//rect
double textLength=(x2()-x1())-(MARGIN/2);
int shift=MARGIN;
if(this.iconSize!=null)
{
w.writeEmptyElement("image");
w.writeAttribute("x", String.valueOf(MARGIN+(ICON_SIZE-this.iconSize.width)/2));
w.writeAttribute("y", String.valueOf(MARGIN+(ICON_SIZE-this.iconSize.height)/2));
w.writeAttribute("width", String.valueOf(this.iconSize.width));
w.writeAttribute("height", String.valueOf(this.iconSize.height));
w.writeAttribute("xlink:href",this.depiction);
shift+=(ICON_SIZE+MARGIN);
textLength-=(ICON_SIZE+MARGIN);
}
w.writeStartElement("g");
w.writeAttribute("transform", "translate("+shift+",0)");
w.writeAttribute("style", "stroke:white;fill:white;font-size:14pt;font-weight:normal;");
w.writeStartElement("text");
w.writeAttribute("x", "0");
w.writeAttribute("y", "18");
w.writeCharacters(this.name+" ("+birthDate.year+" / "+this.deathDate.year+")");
w.writeEndElement();
if(this.shortBio==null) this.shortBio="";
//note: 123 chars/600px
// 0.25char/px
String biography=shortBio;
int posY=40;
int maxCharParLine=(int)(textLength*0.2);
while(biography.length()>0 && posY+10 < AUTHOR_HEIGHT)
{
String s=biography;
if(s.length()>maxCharParLine) s=biography.substring(0,maxCharParLine);
w.writeStartElement("text");
w.writeAttribute("style", "font-size:50%;");
w.writeAttribute("x", "0");
w.writeAttribute("y", String.valueOf(posY));
w.writeCharacters(s);
w.writeEndElement();
posY+=11;
biography=biography.substring(s.length());
}
w.writeEndElement();//g
w.writeEndElement();//g
w.writeEndElement();//a
}
}
private BNFTimeline() throws Exception
{
DocumentBuilderFactory f=DocumentBuilderFactory.newInstance();
f.setCoalescing(true);
f.setNamespaceAware(true);
f.setValidating(false);
f.setExpandEntityReferences(true);
f.setIgnoringComments(false);
f.setIgnoringElementContentWhitespace(true);
this.docBuilder=f.newDocumentBuilder();
this.docBuilder.setEntityResolver(new EntityResolver()
{
@Override
public InputSource resolveEntity(String publicId, String systemId)
throws SAXException, IOException
{
LOG.info("resolve "+publicId+" "+systemId);
return new InputSource(new StringReader(""));
}
});
this.prefix2uri.put("h", HTML);
this.prefix2uri.put("cubicweb", CubicWeb);
this.prefix2uri.put(XMLConstants.XML_NS_PREFIX, XMLConstants.XML_NS_URI);
this.prefix2uri.put(XMLConstants.XMLNS_ATTRIBUTE, XMLConstants.XMLNS_ATTRIBUTE_NS_URI);
this.prefix2uri.put("dc","http://purl.org/dc/terms/");
this.prefix2uri.put("owl","http://www.w3.org/2002/07/owl#");
this.prefix2uri.put("foaf","http://xmlns.com/foaf/0.1/");
this.prefix2uri.put("rdagroup2elements","http://RDVocab.info/ElementsGr2/");
this.prefix2uri.put("rdf","http://www.w3.org/1999/02/22-rdf-syntax-ns#");
this.prefix2uri.put("skos","http://www.w3.org/2004/02/skos/core#");
this.prefix2uri.put("xfoaf","http://www.foafrealm.org/xfoaf/0.1/");
XPathFactory xpathFactory=XPathFactory.newInstance();
this.xpath=xpathFactory.newXPath();
this.xpath.setNamespaceContext(new NamespaceContext()
{
@SuppressWarnings({ "rawtypes", "unchecked" })
@Override
public Iterator getPrefixes(String namespaceURI)
{
return prefix2uri.keySet().iterator();
}
@Override
public String getPrefix(String ns)
{
for(String k:prefix2uri.keySet())
{
if(prefix2uri.get(k).equals(ns)) return k;
}
return null;
}
@Override
public String getNamespaceURI(String prefix)
{
String u=prefix2uri.get(prefix);
return (u!=null?u:XMLConstants.NULL_NS_URI);
}
});
}
private int getScreenWidthInPixel()
{
return 15000;
}
private double convertDate2Pixel(Date d)
{
return getScreenWidthInPixel()*((d.days()-minDays)/((double)this.maxDays-(double)this.minDays));
}
private void parse() throws Exception
{
Tidy tidy = new Tidy();
tidy.setXHTML(true);
File xmlFile=File.createTempFile("_tmp", ".xml");
xmlFile.deleteOnExit();
final String prefix="http://data.bnf.fr/";
int pageIndex=1;
XPathExpression expr=this.xpath.compile(".//h:li/h:a[@href]");
List<Author> authors=new ArrayList<Author>();
//scan each index
for(;;)
{
boolean found=false;
URL url=new URL("http://data.bnf.fr/liste-auteurs/page"+pageIndex);
LOG.info(url.toString());
FileOutputStream fout=new FileOutputStream(xmlFile);
InputStream in=url.openStream();
tidy.parse(in,fout);
in.close();
fout.flush();
fout.close();
Document dom=this.docBuilder.parse(xmlFile);
NodeList L=(NodeList)expr.evaluate(dom, XPathConstants.NODESET);
for(int i=0;i< L.getLength();++i)
{
String href=Element.class.cast(L.item(i)).getAttribute("href");
if(!href.startsWith(prefix)) continue;
if(!href.substring(prefix.length()).matches("[0-9]+/[a-z\\-A-Z_0-9]+/"))
{
LOG.info("ignoring "+href);
continue;
}
Author author=new Author();
author.url=href;
authors.add(author);
found=true;
}
in.close();
if(!found) break;
++pageIndex;
}
xmlFile.delete();
int index=0;
while(index< authors.size())
{
Author author=authors.get(index);
LOG.info(author.url+"rdf.xml");
Document dom=this.docBuilder.parse(author.url+"rdf.xml");
Element root=(Element)xpath.evaluate("rdf:RDF/rdf:Description[rdf:type/@rdf:resource='http://xmlns.com/foaf/0.1/Person']",dom,XPathConstants.NODE);
if(root==null)
{
authors.remove(index);
continue; //e.g. "Academie Fr"
}
author.name=(String)xpath.evaluate("dc:title[1]", root,XPathConstants.STRING);
author.birthDate= parseDate((String)xpath.evaluate("rdagroup2elements:dateOfBirth", root,XPathConstants.STRING));
author.birthPlace = (String)xpath.evaluate("rdagroup2elements:placeOfBirth", root,XPathConstants.STRING);
author.deathDate = parseDate((String)xpath.evaluate("rdagroup2elements:dateOfDeath", root,XPathConstants.STRING));
author.deathPlace = (String)xpath.evaluate("rdagroup2elements:placeOfDeath", root,XPathConstants.STRING);
author.gender = (String)xpath.evaluate("foaf:gender", root,XPathConstants.STRING);
author.shortBio = (String)xpath.evaluate("rdagroup2elements:biographicalInformation", root,XPathConstants.STRING);
author.depiction=(String)xpath.evaluate("foaf:depiction/@rdf:resource",root,XPathConstants.STRING);
if(author.birthDate==null || author.deathDate==null
|| author.deathDate.year<1400 || author.birthDate.year<1400)//TODO
{
authors.remove(index);
continue;
}
if(author.depiction!=null && !author.depiction.trim().isEmpty())
{
author.iconSize=getDepictionSize(author.depiction);
}
if(this.minDays==null || author.birthDate.days()<this.minDays)
{
this.minDays= author.birthDate.days();
}
if(this.maxDays==null || author.deathDate.days()>this.maxDays)
{
this.maxDays= author.deathDate.days();
}
++index;
}
this.minDays-=360;
this.maxDays+=360;
//sort persons on birth-date/death-date
Collections.sort(authors, new Comparator<Author>()
{
@Override
public int compare(Author o1, Author o2)
{
int i=o1.birthDate.compareTo(o2.birthDate);
if(i!=0) return i;
return o1.deathDate.compareTo(o2.deathDate);
}
});
List<Author> remains=new ArrayList<Author>(authors);
int nLine=-1;
while(!remains.isEmpty())
{
++nLine;
Author first=remains.remove(0);
first.y=nLine;
while(true)
{
Author best=null;
int bestIndex=-1;
for(int i=0;i< remains.size();++i)
{
Author next=remains.get(i);
if(next.x1()< first.x2()+5) continue;
if(best==null ||
(next.x1()-first.x2() < best.x1()-first.x2()))
{
best=next;
bestIndex=i;
}
}
if(best==null) break;
first=best;
first.y=nLine;
remains.remove(bestIndex);
}
}
FileOutputStream fout=new FileOutputStream("output.svg");
XMLOutputFactory xmlfactory= XMLOutputFactory.newInstance();
XMLStreamWriter w= xmlfactory.createXMLStreamWriter(fout,"UTF-8");
w.writeStartDocument("UTF-8","1.0");
w.writeStartElement("svg");
w.writeAttribute("xmlns", SVG);
w.writeAttribute("xmlns:xlink","http://www.w3.org/1999/xlink");
w.writeAttribute("version","1.1");
w.writeAttribute("width",String.valueOf(getScreenWidthInPixel()));
w.writeAttribute("height",String.valueOf(MARGIN+((nLine+1)*(AUTHOR_HEIGHT+MARGIN))));
w.writeAttribute("style", "fill:none;stroke:black;stroke-width:1px;");
w.writeEmptyElement("rect");
w.writeAttribute("x","0");
w.writeAttribute("y","0");
w.writeAttribute("width",String.valueOf(getScreenWidthInPixel()-1));
w.writeAttribute("height",String.valueOf(MARGIN+((nLine+1)*(AUTHOR_HEIGHT+MARGIN))-1));
w.writeAttribute("style", "fill:lightgray;stroke:black;");
for(Author author:authors)
{
author.writeXML(w);
}
w.writeEndDocument();//svg
w.close();
fout.flush();
fout.close();
}
private Dimension getDepictionSize(String resourceFile) throws Exception
{
BufferedImage img=ImageIO.read(new URL(resourceFile));
Dimension d= new Dimension(img.getWidth(),img.getHeight());
if(d.getWidth()< d.getHeight())
{
double ratio= d.getWidth()/(double)d.getHeight();//<0
int len= (int)(ICON_SIZE*ratio);
d.width=len;
d.height=ICON_SIZE;
}
else
{
double ratio= d.getHeight()/(double)d.getWidth();//<0
int len= (int)(ICON_SIZE*ratio);
d.height=len;
d.width=ICON_SIZE;
}
return d;
}
private Date parseDate(String s)
{
if(s==null || s.isEmpty()) return null;
Date d=new Date();
d.literal=s;
s=s.trim();
if(s.startsWith("-"))
{
return null;
}
if(s.matches("[0-3][0-9]\\-[0-1][0-9]\\-[0-9][0-9][0-9][0-9]"))
{
String tokens[]=s.split("[\\-]");
d.day=Integer.parseInt(tokens[0]);
d.month=Integer.parseInt(tokens[1]);
d.year=Integer.parseInt(tokens[2]);
}
else if(s.matches("[0-1][0-9]\\-[0-9][0-9][0-9][0-9]"))
{
String tokens[]=s.split("[\\-]");
d.month=Integer.parseInt(tokens[0]);
d.year=Integer.parseInt(tokens[1]);
}
else if(s.matches("[0-9]{1,4}"))
{
d.year=Integer.parseInt(s);
}
else
{
return null;
}
return d;
}
public static void main(String[] args) {
try
{
BNFTimeline app=new BNFTimeline();
int optind=0;
while(optind< args.length)
{
if(args[optind].equals("-h") ||
args[optind].equals("-help") ||
args[optind].equals("--help"))
{
System.err.println("Options:");
System.err.println(" -h help; This screen.");
return;
}
else if(args[optind].equals("-L"))
{
}
else if(args[optind].equals("--"))
{
optind++;
break;
}
else if(args[optind].startsWith("-"))
{
System.err.println("Unknown option "+args[optind]);
return;
}
else
{
break;
}
++optind;
}
if(optind!=args.length)
{
System.err.println("Illegal number of arguments.");
return;
}
app.parse();
}
catch(Throwable err)
{
err.printStackTrace();
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment