Last active
March 1, 2016 11:48
-
-
Save hacksoldier/7157a38747bd9bea9dfa to your computer and use it in GitHub Desktop.
Metodo per prendere e convertire un file doc in HTML. Il file verrà restituito tramite string.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* Copyright (c) 2015 Marco Velluto | |
* Warning! It only works with doc files, but NOT working with .docx | |
*/ | |
import java.io.ByteArrayOutputStream; | |
import java.io.IOException; | |
import java.io.InputStream; | |
import org.apache.poi.hwpf.HWPFDocumentCore; | |
import org.apache.poi.hwpf.converter.WordToHtmlConverter; | |
import org.apache.poi.hwpf.converter.WordToHtmlUtils; | |
import javax.xml.parsers.DocumentBuilderFactory; | |
import javax.xml.parsers.ParserConfigurationException; | |
import javax.xml.transform.OutputKeys; | |
import javax.xml.transform.Transformer; | |
import javax.xml.transform.TransformerConfigurationException; | |
import javax.xml.transform.TransformerException; | |
import javax.xml.transform.TransformerFactory; | |
import javax.xml.transform.dom.DOMSource; | |
import javax.xml.transform.stream.StreamResult; | |
import org.w3c.dom.Document; | |
private String convertDocToHTML(String docPath) | |
{ | |
ByteArrayOutputStream out = null; | |
String result = null; | |
try | |
{ | |
out = new ByteArrayOutputStream(); | |
HWPFDocumentCore wordDocuemnt = WordToHtmlUtils.loadDoc(new FileInputStream(docPath)); | |
WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(DocumentBuilderFactory.newInstance().newDocumentBuilder() | |
.newDocument()); | |
wordToHtmlConverter.processDocument(wordDocuemnt); | |
Document htmlDocument = wordToHtmlConverter.getDocument(); | |
DOMSource domSource = new DOMSource(htmlDocument); | |
StreamResult streamResult = new StreamResult(out); | |
TransformerFactory transformerFactory = TransformerFactory.newInstance(); | |
Transformer serializer = transformerFactory.newTransformer(); | |
serializer.setOutputProperty(OutputKeys.ENCODING, "UTF-8"); | |
serializer.setOutputProperty(OutputKeys.INDENT, "yes"); | |
serializer.setOutputProperty(OutputKeys.METHOD, "html"); | |
serializer.transform(domSource, streamResult); | |
result = new String(out.toByteArray()); | |
} | |
catch (IOException e) | |
{ | |
log.error("errore in convertDocToHTML - Errore durante il load del documento a questo path " + docPath, e); | |
} | |
catch (ParserConfigurationException e) | |
{ | |
log.error("errore in convertDocToHTML - Errore durante la creazione dell'oggetto WordToHtmlConverter ", e); | |
} | |
catch (TransformerConfigurationException e) | |
{ | |
log.error("errore in convertDocToHTML - Errore durante la creazione dell'oggetto Transformer ", e); | |
} | |
catch (TransformerException e) | |
{ | |
log.error("errore in convertDocToHTML - Errore durante la serializzazione dell'ogetto con domSource e streamResult ", e); | |
} | |
finally | |
{ | |
if (out != null) | |
{ | |
try | |
{ | |
out.close(); | |
} | |
catch (IOException e) | |
{} | |
} | |
} | |
return result; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment