/**
 * Author: Pierre Lindenbaum PhD
 *	plindenbaum@yahoo.fr
 * Date: 2012-11
 * Motivation: RDFGraph from openoffice calc files
 *
 */
package oocalc;

import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.LinkedList;
import java.util.List;
import java.util.zip.ZipEntry;
import java.util.zip.ZipFile;

import javax.xml.namespace.QName;
import javax.xml.stream.XMLEventReader;
import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.events.Attribute;
import javax.xml.stream.events.EndElement;
import javax.xml.stream.events.StartElement;
import javax.xml.stream.events.XMLEvent;
import com.hp.hpl.jena.assembler.assemblers.AssemblerBase;
import com.hp.hpl.jena.assembler.Assembler;
import com.hp.hpl.jena.sparql.core.assembler.AssemblerUtils;
import com.hp.hpl.jena.assembler.Mode;
import com.hp.hpl.jena.datatypes.RDFDatatype;
import com.hp.hpl.jena.datatypes.xsd.XSDDatatype;
import com.hp.hpl.jena.rdf.model.Resource;
import com.hp.hpl.jena.rdf.model.Statement;
import com.hp.hpl.jena.rdf.model.StmtIterator;
import com.hp.hpl.jena.rdf.model.Property;
import com.hp.hpl.jena.graph.Node;
import com.hp.hpl.jena.graph.Triple;
import com.hp.hpl.jena.graph.TripleMatch;
import com.hp.hpl.jena.graph.TripleMatchIterator;
import com.hp.hpl.jena.graph.impl.GraphBase;
import com.hp.hpl.jena.rdf.model.AnonId;
import com.hp.hpl.jena.rdf.model.ResourceFactory;
import com.hp.hpl.jena.rdf.model.impl.ModelCom;
import com.hp.hpl.jena.util.iterator.ExtendedIterator;
import com.hp.hpl.jena.util.iterator.NiceIterator;
import com.hp.hpl.jena.sparql.core.DatasetImpl;
import com.hp.hpl.jena.vocabulary.DC;
import com.hp.hpl.jena.vocabulary.RDF;
import com.hp.hpl.jena.vocabulary.XSD;
import com.hp.hpl.jena.query.Dataset;
import org.slf4j.LoggerFactory;
import com.hp.hpl.jena.query.*;

/**
 * implementation of a RDF Graph for OpenOffice calc
 *
 */
 
public class OpenOfficeCalcGraph
    extends GraphBase
    {
    /** logger */
    protected static final org.slf4j.Logger LOG= LoggerFactory.getLogger("ooffice2rdf");
    /** namespaces */
    private static final String OFFICE="urn:oasis:names:tc:opendocument:xmlns:office:1.0";
    private static final String TABLE="urn:oasis:names:tc:opendocument:xmlns:table:1.0";
    private static final String TEXT="urn:oasis:names:tc:opendocument:xmlns:text:1.0";
    private static final String NS="http://rdf.lindenb.org/";
    /** attributes */
    private static final QName number_columns_repeated=new QName(TABLE,"number-columns-repeated","table");
    private static final QName number_rows_repeated=new QName(TABLE,"number-rows-repeated","table");
    private static final QName value_type=new QName(OFFICE,"value-type","office");
    private static final QName value=new QName(OFFICE,"value","office");
    private static final QName name=new QName(TABLE,"name","table");
    //rdf:type Node
    private static final Node rdfType=Node.createURI(RDF.type.getURI());
    //all open office files
    private List<File> caclFiles=null;
    
    
    
    /** static Assembler for  OpenOfficeCalcGraph
     * An assembler creates a Dataset(graph) from a RDF-based configuration file.
     * It is called by Fuseki
     */
    public static OpenOfficeAssembler assembler = new OpenOfficeAssembler();
    
    
     public static class OpenOfficeAssembler extends  AssemblerBase implements Assembler
     	{
     	@Override
     	public Object open( Assembler a, Resource root, Mode mode )
		{
		//read the configuration an get the files
		List<File> files=new ArrayList<File>();
		StmtIterator iter=root.listProperties(fileRsrc);
		while(iter.hasNext())
			{
			Statement stmt=iter.nextStatement();
			if(!stmt.getObject().isLiteral()) throw new RuntimeException("Not a literal "+stmt);
			String lit=stmt.getString();
			File file=new File(lit);
			if(!file.exists()) throw new RuntimeException("File not found : "+file);
			if(!file.getName().endsWith(".ods")) throw new RuntimeException("Not an .ods file : "+file);
			files.add(file);
			}
		iter.close();
		OpenOfficeCalcGraph g=new OpenOfficeCalcGraph(files);
		OpenOfficeCalcModel m=new OpenOfficeCalcModel(g);
		Dataset ds=new DatasetImpl(m);
		return  ds;
		}
     	}
    
     /** Initializer for FUZEKI  */
      private static boolean init_called = false ;
      private static final Resource buildRsrc=ResourceFactory.createResource(NS+"build");
      private static final Property fileRsrc=ResourceFactory.createProperty(NS+"file");
      
    /** static initializer, when this class is invoked,
     *  it tells Fuzeki that there is another assembler using  Assembler.general 
     * the resource-name for this assembler is this.buildRsrc
     */
    static { init() ; }
    private static void init()
        {
        if(init_called) return;
        LOG.info("Calling OpenOfficeCalcGraph init");
        AssemblerUtils.init();
        Assembler.general.implementWith(buildRsrc,assembler);
        
        init_called=true;
        }
    
    
    /** RDF Model for OpenOfficeCalcGraph */
    public static class OpenOfficeCalcModel extends ModelCom
        {
        public OpenOfficeCalcModel(OpenOfficeCalcGraph g)
            {
            super(g);
            }
        }
    /* one row in the spredsheet */
    private static class Row
        {
        int repeat=1;
        private List<Cell> cells=new ArrayList<Cell>();
        }    
    
    /* one cell in the spredsheet */
    private static class Cell
        {
        int repeat=1;
        String type=null;
        String value=null;
        String literal=null;
        }
    
    /** Constructor from an array of OO files */
    public OpenOfficeCalcGraph(List<File> calcFiles)
        {
        this.caclFiles=new ArrayList<File>(calcFiles);
        this.getPrefixMapping().setNsPrefix("office", NS);
        this.getPrefixMapping().setNsPrefix("xsd", XSD.getURI());
        this.getPrefixMapping().setNsPrefix("dc", DC.getURI());
        }
    
    
    @Override
    protected ExtendedIterator<Triple> graphBaseFind(TripleMatch matcher)
        {
        return new TripleMatchIterator((Triple)matcher, new CellIterator());
        }
    
    /** parse the openoffice files and get the Triples */
    private class CellIterator extends NiceIterator<Triple>
        {
        /** current index in array of OO files */
        private int fileIndex=-1;
        /** buffer of triples */
        private List<Triple> buffer=new LinkedList<Triple>();
        /** next triple to be returned */
        private Triple next=null;
        /** was hasNext() called ? */
        private boolean hasNextCalled=false;
        /** current OO file opened */
        private File ioFile=null;
        /** Zip Handler for OO file */
        private ZipFile zipFile=null;
        /** Input Stream for current Zip entry */
        private InputStream zipInputStream;
        /** xml-handler for current zip entry */
        private XMLEventReader xmlEventReader;
        /* rdf subject for file */
        private Node fileRsrc=null;
        /* rdf subject for tab */
        private Node tabRsrc=null;
        /** current tab index */
        private int tabIndex=0;
        /* current colun */
        private int X=0;
        /** current row */
        private int Y=0;
        
        private void add(Node s,Node p,Node o)
            {
            this.buffer.add(Triple.create(s, p, o));
            }
        
        public CellIterator()
            {
                
            }

	private boolean isA(XMLEvent evt,String ns,String localName)
		{
		QName q=null;
	
		if(evt.isStartElement())
		        {
		        q=evt.asStartElement().getName();
		        }
	 	else if(evt.isEndElement())
		        {
		        q=evt.asEndElement().getName();
		        }
		return q!=null &&
		       q.getNamespaceURI().equals(ns) && 
		       q.getLocalPart().equals(localName)
		       ;
		}
        
        @Override
        public boolean hasNext()
            {
            if(!hasNextCalled)
                {
                hasNextCalled=true;
                next=null;
                for(;;)
                    {
                    if(!buffer.isEmpty())
                        {
                        next=buffer.remove(0);
                        break;
                        }
                    
                    try
                        {
                        
                        if(xmlEventReader==null)
                            {
                            //open next file
                            if(fileIndex+1>=OpenOfficeCalcGraph.this.caclFiles.size()) break;
                            this.fileIndex++;
                            this.tabIndex=0;
                            //open XML StaX reader for current OO file
                            XMLInputFactory xmlInputFactory = XMLInputFactory.newInstance();
                            xmlInputFactory.setProperty(XMLInputFactory.IS_NAMESPACE_AWARE, Boolean.TRUE);
                            xmlInputFactory.setProperty(XMLInputFactory.IS_COALESCING, Boolean.TRUE);
                            xmlInputFactory.setProperty(XMLInputFactory.IS_REPLACING_ENTITY_REFERENCES, Boolean.TRUE);
                            try
                                {
                                this.ioFile=OpenOfficeCalcGraph.this.caclFiles.get(this.fileIndex);
                                this.zipFile=new ZipFile(this.ioFile);
                                ZipEntry zipEntry=zipFile.getEntry("content.xml");
                                if(zipEntry==null) throw new RuntimeException("Cannot get content.xml");
                                this.zipInputStream=this.zipFile.getInputStream(zipEntry);
                                xmlEventReader= xmlInputFactory.createXMLEventReader(this.zipInputStream);
                                //describe the file as RDF
                                this.fileRsrc=Node.createURI(this.ioFile.toURI().toASCIIString());
                                add(this.fileRsrc,rdfType,Node.createURI(NS+"Spreadsheet"));
                                add(this.fileRsrc,Node.createURI(DC.title.getURI()),Node.createLiteral(this.ioFile.getName()));
                                continue;
                                }
                            catch (Exception e)
                                {
                                throw new RuntimeException(e);
                                }
                            }
                        
                        
                        if(xmlEventReader.hasNext())
                            {
                            Attribute att=null;
                            XMLEvent evt=xmlEventReader.nextEvent();
                            if(evt.isStartElement())
                                {
                                StartElement E=evt.asStartElement();
                                if(isA(E,TABLE,"table"))
                                    {
                                    att=E.getAttributeByName(name);
                                    this.tabIndex++;
                                    //describe the tab as RDF
                                    this.tabRsrc=Node.createURI(this.ioFile.toURI().toASCIIString()+"/t"+tabIndex);
                                    add(this.tabRsrc,Node.createURI(NS+"file"),this.fileRsrc);
                                    add(this.tabRsrc,rdfType,Node.createURI(NS+"Table"));
                                    add(this.tabRsrc,Node.createURI(DC.title.getURI()),Node.createLiteral(att.getValue()));
                                    this.X=0;
                                    this.Y=0;
                                    }
                                else if(isA(E,TABLE,"table-row"))
                                    {
                                    //parse the row
                                    Row row=parseRow(E);
                                    //create the statements for that row
                                    for(int i=0;i< row.repeat;++i)
                                        {
                                        this.X=0;
                                        this.Y++;
                                        for(Cell cell:row.cells)
                                            {
                                            for(int j=0;j< cell.repeat;++j)
                                                {
                                                this.X++;
                                                if(cell.value==null && cell.literal==null) continue;
                                                Node subject=Node.createURI(this.ioFile.toURI().toASCIIString()+"/t"+tabIndex+"/y"+Y+"/x"+X);
                                                add(subject,Node.createURI(NS+"table"),this.tabRsrc);
                                                add(subject,rdfType,Node.createURI(NS+"Cell"));
                                                
                                                add(subject,Node.createURI(NS+"X"),Node.createLiteral(String.valueOf(X),null,XSDDatatype.XSDint));
                                                add(subject,Node.createURI(NS+"Y"),Node.createLiteral(String.valueOf(Y),null,XSDDatatype.XSDint));
                                                Node cellValue=null;
                                                if(cell.type!=null && cell.value!=null)
                                                    {
                                                    XSDDatatype dataType=XSDDatatype.XSDstring;
                                                    if(cell.type.equals("float"))
                                                        {
                                                        dataType=XSDDatatype.XSDfloat;
                                                        }
                                                    else if(cell.type.equals("int"))
                                                        {
                                                        dataType=XSDDatatype.XSDint;
                                                        }
                                                    cellValue=Node.createLiteral(cell.value, null, dataType);
                                                    }
                                                else 
                                                    {
                                                    cellValue=Node.createLiteral(String.valueOf(cell.literal));
                                                    }
                                                add( subject,
                                                     Node.createURI(NS+"value"),
                                                     cellValue
                                                     );
                                                
                                                }
                                            }
                                        }
                                    }
                                }
                            else if(evt.isEndElement())
                                {
                                if(isA(evt,TABLE,"table"))
                                    {
                                    this.tabRsrc=null;
                                    }
                                }
                            }
                        else //we're done for that file.
                            {
                            this.xmlEventReader.close();
                            this.zipInputStream.close();
                            this.zipFile.close();
                            this.xmlEventReader=null;
                            this.zipInputStream=null;
                            this.zipFile=null;
                            this.fileRsrc=null;
                            this.ioFile=null;
                            }
                        }
                    catch(Exception err)
                        {
                        throw new RuntimeException(err);
                        }
                    }
                }
            return next!=null;
            }
        
        @Override
        public void close()
            {
            try { if(this.xmlEventReader!=null) this.xmlEventReader.close(); } catch (Exception e) {}
            this.xmlEventReader=null;
            try { if(this.zipInputStream!=null) this.zipInputStream.close(); } catch (Exception e) {}
            this.zipInputStream=null;
            try { if(this.zipFile!=null) this.zipFile.close(); } catch (Exception e) {}
            this.zipFile=null;
            this.buffer.clear();
            this.fileIndex=caclFiles.size();
            }
        
        @Override
        public Triple next()
            {
            if(!hasNextCalled) hasNext();
            if(!hasNext()) throw new IllegalStateException();
            Triple t=next;
            next=null;
            hasNextCalled=false;
            return t;
            }

        
        /** parses a table:table-row */
        private Row parseRow(StartElement root)
                throws XMLStreamException
                {
                Row row=new Row();
                Attribute att=root.getAttributeByName(number_rows_repeated);

                if(att!=null)
                    {
                    row.repeat=Integer.parseInt(att.getValue());
                    }
                while(this.xmlEventReader.hasNext())
                    {
                    XMLEvent evt=this.xmlEventReader.nextEvent();
                    if(evt.isStartElement())
                        {
                        StartElement E=evt.asStartElement();
                        if(isA(E,TABLE,"table-cell"))
                            {
                            row.cells.add(parseCell(E));
                            }
                        }
                    else if(evt.isEndElement())
                        {
                        if(isA(evt,TABLE,"table-row"))
                            {
                            break;
                            }
                        }
                    }
                return row;
                }
 	/** parses a table:table-cell */
        private Cell parseCell(StartElement root)
                throws XMLStreamException
                {
                Cell cell=new Cell();
                Attribute att=root.getAttributeByName(number_columns_repeated);

                if(att!=null)
                    {
                    cell.repeat=Integer.parseInt(att.getValue());
                    }
                
                att=root.getAttributeByName(value_type);
                if(att!=null)
                    {
                    cell.type=att.getValue();
                    }
                att=root.getAttributeByName(value);
                if(att!=null)
                    {
                    cell.value=att.getValue();
                    cell.literal=cell.value;
                    }
                

                while(this.xmlEventReader.hasNext())
                    {
                    XMLEvent evt=this.xmlEventReader.nextEvent();
                    if(evt.isStartElement())
                        {
                        StartElement E=evt.asStartElement();
                        if(isA(E,TEXT,"p"))
                            {
                            cell.literal=parseText(E);
                            }
                        }
                    else if(evt.isEndElement())
                        {
                        if(isA(evt,TABLE,"table-cell"))
                            {
                            break;
                            }
                        }
                    }
                return cell;
                }
               
         /** returns the content of <text:p/> */
        private String parseText(StartElement root)
            throws XMLStreamException
            {
            StringBuilder b=new StringBuilder();
            while(xmlEventReader.hasNext())
                {
                XMLEvent evt=this.xmlEventReader.nextEvent();
                if(evt.isStartElement())
                    {
                    throw new IllegalStateException();
                    }
                else if(evt.isEndElement())
                    {
                    if(isA(evt,TEXT,"p"))
                        {
                        return b.toString();
                        }
                    }
                else if(evt.isCharacters())
                    {
                    b.append(evt.asCharacters().getData());
                    }
                }
            throw new IllegalStateException();
            }
        
        }
    public static void main(String[] args) throws Exception
        {
       
        if(args.length<2)
        	{
        	System.err.println("Usage: query.sparql file1.ods, file2.ods... filen.ods");
        	return;
        	}
      
        List<File> files=new ArrayList<File>();
        for(int optind=1;optind< args.length;++optind)
            {
            files.add(new File(args[optind]));
            }
        OpenOfficeCalcGraph g=new OpenOfficeCalcGraph(files);
        OpenOfficeCalcModel m=new OpenOfficeCalcModel(g);
      
        
        
        com.hp.hpl.jena.query.Query query = QueryFactory.read(args[0]) ;
		LOG.info("starting query");
		QueryExecution qexec = QueryExecutionFactory.create(query, m) ;
		  try {
		    ResultSet results = qexec.execSelect();
		    ResultSetFormatter.out(System.out,results,g.getPrefixMapping());
		  
		  } finally { qexec.close() ; }

        }
    }