Skip to content

Instantly share code, notes, and snippets.

@hacksoldier
Last active March 1, 2016 11:48
Show Gist options
  • Save hacksoldier/7157a38747bd9bea9dfa to your computer and use it in GitHub Desktop.
Save hacksoldier/7157a38747bd9bea9dfa to your computer and use it in GitHub Desktop.
Metodo per prendere e convertire un file doc in HTML. Il file verrà restituito tramite string.
/**
* Copyright (c) 2015 Marco Velluto
* Warning! It only works with doc files, but NOT working with .docx
*/
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import org.apache.poi.hwpf.HWPFDocumentCore;
import org.apache.poi.hwpf.converter.WordToHtmlConverter;
import org.apache.poi.hwpf.converter.WordToHtmlUtils;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerConfigurationException;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import org.w3c.dom.Document;
private String convertDocToHTML(String docPath)
{
ByteArrayOutputStream out = null;
String result = null;
try
{
out = new ByteArrayOutputStream();
HWPFDocumentCore wordDocuemnt = WordToHtmlUtils.loadDoc(new FileInputStream(docPath));
WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(DocumentBuilderFactory.newInstance().newDocumentBuilder()
.newDocument());
wordToHtmlConverter.processDocument(wordDocuemnt);
Document htmlDocument = wordToHtmlConverter.getDocument();
DOMSource domSource = new DOMSource(htmlDocument);
StreamResult streamResult = new StreamResult(out);
TransformerFactory transformerFactory = TransformerFactory.newInstance();
Transformer serializer = transformerFactory.newTransformer();
serializer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
serializer.setOutputProperty(OutputKeys.INDENT, "yes");
serializer.setOutputProperty(OutputKeys.METHOD, "html");
serializer.transform(domSource, streamResult);
result = new String(out.toByteArray());
}
catch (IOException e)
{
log.error("errore in convertDocToHTML - Errore durante il load del documento a questo path " + docPath, e);
}
catch (ParserConfigurationException e)
{
log.error("errore in convertDocToHTML - Errore durante la creazione dell'oggetto WordToHtmlConverter ", e);
}
catch (TransformerConfigurationException e)
{
log.error("errore in convertDocToHTML - Errore durante la creazione dell'oggetto Transformer ", e);
}
catch (TransformerException e)
{
log.error("errore in convertDocToHTML - Errore durante la serializzazione dell'ogetto con domSource e streamResult ", e);
}
finally
{
if (out != null)
{
try
{
out.close();
}
catch (IOException e)
{}
}
}
return result;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment