Created
October 7, 2012 04:11
-
-
Save katoy/3847076 to your computer and use it in GitHub Desktop.
prototype for xlsx -> html using apache poi
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
prototype for xlsx -> html using apache poi. | |
Book1.xlsx を OpenOffice で表示した様子と、変換結果 HEML を chrome で表示した様子を | |
スクリーンショットで示しています。 | |
空セル/空行も表示する、罫線、セル結合を反映する といった変更を施しています。 | |
(現状では、罫線の属性値は正しいものではありません) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// See | |
// http://code.google.com/p/office-to-html/source/browse/trunk/src/org/apache/poi/xssf/converter/XlsxConverter.java?r=3 | |
package com.katoy; | |
import com.sun.org.apache.xml.internal.serializer.OutputPropertiesFactory; | |
import java.io.File; | |
import java.io.FileOutputStream; | |
import java.io.IOException; | |
import java.io.OutputStreamWriter; | |
import java.io.StringWriter; | |
import java.math.BigDecimal; | |
import java.math.MathContext; | |
import java.util.Iterator; | |
import java.util.List; | |
import javax.xml.parsers.DocumentBuilderFactory; | |
import javax.xml.parsers.ParserConfigurationException; | |
import javax.xml.transform.OutputKeys; | |
import javax.xml.transform.Transformer; | |
import javax.xml.transform.TransformerException; | |
import javax.xml.transform.TransformerFactory; | |
import javax.xml.transform.dom.DOMSource; | |
import javax.xml.transform.stream.StreamResult; | |
import org.apache.poi.hwpf.converter.HtmlDocumentFacade; | |
import org.apache.poi.openxml4j.exceptions.InvalidFormatException; | |
import org.apache.poi.openxml4j.opc.OPCPackage; | |
import org.apache.poi.ss.usermodel.Cell; | |
import org.apache.poi.ss.usermodel.Row; | |
import org.apache.poi.ss.util.CellRangeAddress; | |
import org.apache.poi.xssf.usermodel.*; | |
import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTCol; | |
import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTCols; | |
import org.w3c.dom.Document; | |
import org.w3c.dom.Element; | |
public class XlsxConverter { | |
private XSSFWorkbook x; | |
private HtmlDocumentFacade htmlDocumentFacade; | |
private Element page; | |
private StringBuilder css = new StringBuilder(); | |
private XlsxConverter(String filePath) throws IOException, InvalidFormatException, ParserConfigurationException { | |
OPCPackage op = OPCPackage.open(filePath); | |
x = new XSSFWorkbook(op); | |
Document document = DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument(); | |
this.htmlDocumentFacade = new HtmlDocumentFacade(document); | |
Element window = htmlDocumentFacade.createBlock(); | |
window.setAttribute("id", "window"); | |
page = htmlDocumentFacade.createBlock(); | |
page.setAttribute("id", "page"); | |
window.appendChild(page); | |
htmlDocumentFacade.getBody().appendChild(window); | |
} | |
public static void main(String[] args) throws InvalidFormatException, IOException, ParserConfigurationException, TransformerException { | |
String name = "xml"; | |
XlsxConverter.convert("Book1.xlsx", "Book1.html"); | |
} | |
/** | |
* | |
* @param filePath | |
* @throws InvalidFormatException | |
* @throws IOException | |
* @throws ParserConfigurationException | |
* @throws TransformerException | |
*/ | |
public static void convert(String filePath, String output) throws InvalidFormatException, IOException, ParserConfigurationException, TransformerException { | |
XlsxConverter converter = new XlsxConverter(filePath); | |
Integer sheetNum = converter.x.getNumberOfSheets(); | |
for (int i = 0; i < sheetNum; i++) { | |
XSSFSheet sheet = converter.x.getSheet(converter.x.getSheetName(i)); | |
String sheetName = converter.x.getSheetName(i); | |
//System.out.println("----starting process sheet : " + sheetName); | |
// add sheet title | |
{ | |
Element title = converter.htmlDocumentFacade.createHeader2(); | |
title.setTextContent(sheetName); | |
converter.page.appendChild(title); | |
} | |
converter.processSheet(converter.page, sheet, "_" + i + "_"); | |
} | |
converter.htmlDocumentFacade.updateStylesheet(); | |
Element style = (Element) converter.htmlDocumentFacade.getDocument().getElementsByTagName("style").item(0); | |
style.setTextContent(converter.css.append(style.getTextContent()).toString()); | |
converter.saveAsHtml(output, converter.htmlDocumentFacade.getDocument()); | |
} | |
private void processSheet(Element container, XSSFSheet sheet, String sID) { | |
Element table = htmlDocumentFacade.createTable(); | |
int sIndex = sheet.getWorkbook().getSheetIndex(sheet); | |
String sId = "sheet_".concat(String.valueOf(sIndex)); | |
table.setAttribute("id", sId); | |
table.setAttribute("border", "1"); | |
table.setAttribute("cellpadding", "2"); | |
table.setAttribute("cellspacing", "0"); | |
table.setAttribute("style", "border-collapse: collapse;"); | |
if (sheet.getDefaultRowHeightInPoints() > 0) { | |
css.append("#").append(sId).append(" tr{height:").append(sheet.getDefaultRowHeightInPoints() / 28.34).append("cm}\n"); | |
} | |
if (sheet.getDefaultColumnWidth() > 0) { | |
css.append("#").append(sId).append(" td{width:").append(sheet.getDefaultColumnWidth() * 0.21).append("cm}\n"); | |
} | |
// cols | |
generateColumns(sheet, table); | |
//rows | |
final short col_num = get_col_max(sheet); | |
final int row_num = sheet.getLastRowNum() + 1; | |
for (int i = 0; i < row_num; i++) { | |
Row row = sheet.getRow(i); | |
processRow(table, (XSSFRow) row, sheet, col_num, sID, i); | |
} | |
container.appendChild(table); | |
} | |
private short get_col_max(XSSFSheet sheet) { | |
short ans = -1; | |
//rows | |
Iterator<Row> rows = sheet.iterator(); | |
while (rows.hasNext()) { | |
Row row = rows.next(); | |
if (row instanceof XSSFRow) { | |
short c = (short) (row.getLastCellNum()); | |
if (ans < c) { | |
ans = c; | |
} | |
} | |
} | |
return ans; | |
} | |
/** | |
* generated | |
* <code><col><code> tags. | |
* | |
* @param sheet | |
* @param table container. | |
*/ | |
private void generateColumns(XSSFSheet sheet, Element table) { | |
List<CTCols> colsList = sheet.getCTWorksheet().getColsList(); | |
MathContext mc = new MathContext(3); | |
for (CTCols cols : colsList) { | |
long oldLevel = 1; | |
for (CTCol col : cols.getColArray()) { | |
while (true) { | |
if (oldLevel == col.getMin()) { | |
break; | |
} | |
Element column = htmlDocumentFacade.createTableColumn(); | |
// htmlDocumentFacade.addStyleClass(column, "col", "width:2cm;"); | |
column.setAttribute("style", "width:2cm;"); | |
table.appendChild(column); | |
oldLevel++; | |
} | |
Element column = htmlDocumentFacade.createTableColumn(); | |
String width = new BigDecimal(sheet.getColumnWidth(Long.bitCount(oldLevel)) / 1440.0, mc).toString(); | |
column.setAttribute("style", "width:".concat(width).concat("cm;")); | |
table.appendChild(column); | |
oldLevel++; | |
} | |
} | |
} | |
private void processRow(Element table, XSSFRow row, XSSFSheet sheet, final int col_num, String sID, int pos_row) { | |
Element tr = htmlDocumentFacade.createTableRow(); | |
if (!(row instanceof XSSFRow)) { | |
for (int pos_col = 0; pos_col < col_num; pos_col++) { | |
processCell(tr, null, sID, pos_col, pos_row); // empty line | |
} | |
} else { | |
if (row.isFormatted()) { | |
//TODO build row style... | |
} | |
if (row.getCTRow().getCustomHeight()) { | |
tr.setAttribute("style", "height:".concat(String.valueOf(row.getHeightInPoints())).concat("pt;")); | |
} | |
for (int pos_col = 0; pos_col < col_num; pos_col++) { | |
Cell cell = row.getCell(pos_col); | |
if (cell instanceof XSSFCell) { | |
processCell(tr, (XSSFCell) cell, sID, pos_col, pos_row); | |
} else { | |
processCell(tr, null, sID, pos_col, pos_row); | |
} | |
} | |
} | |
table.appendChild(tr); | |
} | |
private void processCell(Element tr, XSSFCell cell, String sID, int pos_col, int pos_row) { | |
int cols = 1; | |
int rows = 1; | |
if (cell != null) { | |
if (cell != null) { | |
int num = cell.getSheet().getNumMergedRegions(); | |
// System.out.println(cell.getCTCell()); | |
for (int i = 0; i < num; i++) { | |
CellRangeAddress c = cell.getSheet().getMergedRegion(i); | |
//System.out.println(c.getFirstColumn()); | |
//System.out.println(c.getLastColumn()); | |
//System.out.println(c.getFirstRow()); | |
//System.out.println(c.getLastRow()); | |
//System.out.println(); | |
//System.out.println(cell.getRowIndex()); | |
//System.out.println(cell.getColumnIndex()); | |
//System.out.println("\n\n\n"); | |
// System.out.println(cra); | |
int x0 = c.getFirstColumn(); | |
int x1 = c.getLastColumn(); | |
int y0 = c.getFirstRow(); | |
int y1 = c.getLastRow(); | |
if (x0 == pos_col && y0 == pos_row) { | |
cols = c.getLastColumn() - c.getFirstColumn() + 1; | |
rows = c.getLastRow() - c.getFirstRow() + 1; | |
} else if ((x0 <= pos_col) && (pos_col <= x1) && (y0 <= pos_row) && (pos_row <= y1)) { | |
return; | |
} | |
} | |
} | |
} | |
Element td = htmlDocumentFacade.createTableCell(); | |
if (cols > 1) { | |
td.setAttribute("colspan", "" + cols); | |
} | |
if (rows > 1) { | |
td.setAttribute("rowspan", "" + rows); | |
} | |
Object value; | |
if (cell == null) { | |
// processCellStyle(td, cell.getCellStyle(), null); | |
td.setTextContent("\u00a0"); | |
} else { | |
switch (cell.getCellType()) { | |
case Cell.CELL_TYPE_BLANK: | |
value = "\u00a0"; | |
break; | |
case Cell.CELL_TYPE_NUMERIC: | |
value = cell.getNumericCellValue(); | |
break; | |
case Cell.CELL_TYPE_BOOLEAN: | |
value = cell.getBooleanCellValue(); | |
break; | |
case Cell.CELL_TYPE_FORMULA: | |
value = cell.getNumericCellValue(); | |
break; | |
default: | |
value = cell.getRichStringCellValue(); | |
break; | |
} | |
if (value instanceof XSSFRichTextString) { | |
processCellStyle(td, cell.getCellStyle(), (XSSFRichTextString) value, sID); | |
td.setTextContent(value.toString()); | |
} else { | |
processCellStyle(td, cell.getCellStyle(), null, sID); | |
td.setTextContent(value.toString()); | |
} | |
// String s = value.toString(); | |
// System.out.println(s); | |
} | |
// System.err.println(value); | |
tr.appendChild(td); | |
} | |
private void processCellStyle(Element td, XSSFCellStyle style, XSSFRichTextString rts, String sID) { | |
StringBuilder sb = new StringBuilder(); | |
if (rts != null) { | |
XSSFFont font = rts.getFontOfFormattingRun(1); | |
if (font != null) { | |
sb.append("font-family:").append(font.getFontName()).append(";"); | |
// sb.append("color:").append(font.getColor() ).append(";"); | |
sb.append("font-size:").append(font.getFontHeightInPoints()).append("pt;"); | |
if (font.getXSSFColor() != null) { | |
String color = font.getXSSFColor().getARGBHex().substring(2); | |
sb.append("color:#").append(color).append(";"); | |
} | |
if (font.getItalic()) { | |
sb.append("font-style:italic;"); | |
} | |
if (font.getBold()) { | |
sb.append("font-weight:").append(font.getBoldweight()).append(";"); | |
} | |
if (font.getStrikeout()) { | |
sb.append("text-decoration:underline;"); | |
} | |
} | |
} | |
if (style.getAlignment() != 1) { | |
switch (style.getAlignment()) { | |
case 2: | |
sb.append("text-align:").append("center;"); | |
break; | |
case 3: | |
sb.append("text-align:").append("right;"); | |
break; | |
} | |
} | |
// TODO: set correct value for type and width of border. | |
if (style.getBorderBottom() != 0) { | |
sb.append("border-bottom:solid; ").append(style.getBorderBottom()).append("px;"); | |
} | |
if (style.getBorderLeft() != 0) { | |
sb.append("border-left:solid; ").append(style.getBorderLeft()).append("px;"); | |
} | |
if (style.getBorderTop() != 0) { | |
sb.append("border-top:solid; ").append(style.getBorderTop()).append("px;"); | |
} | |
if (style.getBorderRight() != 0) { | |
sb.append("border-right:solid; ").append(style.getBorderRight()).append("px;"); | |
} | |
//if (style.getFillBackgroundXSSFColor() != null) { | |
// XSSFColor color = style.getFillBackgroundXSSFColor(); | |
//} | |
// System.out.println(style.getFillBackgroundXSSFColor()); | |
if (style.getFillBackgroundXSSFColor() != null) { | |
sb.append("background:#fcc;"); | |
} | |
// System.out.println(sb.toString()); | |
htmlDocumentFacade.addStyleClass(td, "td" + sID, sb.toString()); | |
} | |
/** | |
* @param output | |
* @param document | |
* @throws IOException | |
* @throws TransformerException | |
*/ | |
private void saveAsHtml(String output, org.w3c.dom.Document document) throws IOException, TransformerException { | |
// check path | |
File folder = new File(getFilePath(output)); | |
if (!folder.canRead()) { | |
folder.mkdirs(); | |
} | |
// FileWriter out = new FileWriter(output); | |
FileOutputStream fos = new FileOutputStream(output); | |
DOMSource domSource = new DOMSource(document); | |
StreamResult result = new StreamResult(new StringWriter()); | |
//StreamResult streamResult = new StreamResult(out); | |
TransformerFactory tf = TransformerFactory.newInstance(); | |
Transformer serializer = tf.newTransformer(); | |
// TODO set encoding from a command argument | |
// インデントを行う | |
serializer.setOutputProperty(OutputKeys.ENCODING, "UTF-8"); | |
serializer.setOutputProperty(OutputKeys.INDENT, "yes"); | |
serializer.setOutputProperty(OutputPropertiesFactory.S_KEY_INDENT_AMOUNT, "2"); | |
serializer.setOutputProperty(OutputKeys.METHOD, "html"); | |
//serializer.setOutputProperty(OutputKeys.DOCTYPE_SYSTEM, ""); | |
serializer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes"); | |
serializer.transform(domSource, result); | |
String s = "<!DOCTYPE html>\n" + result.getWriter().toString(); | |
OutputStreamWriter out = new OutputStreamWriter(fos, "UTF-8"); | |
out.write(s); | |
out.close(); | |
} | |
public String getFilePath(String fileFullPath) { | |
int sep = fileFullPath.lastIndexOf("\\") == -1 ? fileFullPath.lastIndexOf("/") : fileFullPath.lastIndexOf("\\"); | |
return fileFullPath.substring(0, sep + 1); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment