Created
July 2, 2015 02:15
-
-
Save ethanal/6a711b0bbf479a9f3f87 to your computer and use it in GitHub Desktop.
Excel Converter
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import java.io.File; | |
import java.io.FileInputStream; | |
import java.io.FileOutputStream; | |
import java.io.IOException; | |
import java.util.Arrays; | |
import java.util.HashMap; | |
import org.apache.poi.xssf.usermodel.XSSFWorkbook; | |
import org.apache.poi.ss.usermodel.Sheet; | |
import org.apache.poi.ss.usermodel.Workbook; | |
import org.apache.poi.hssf.usermodel.HSSFWorkbook; | |
import org.apache.poi.ss.usermodel.Cell; | |
import org.apache.poi.ss.usermodel.Row; | |
import org.apache.poi.ss.util.CellRangeAddress; | |
import static org.apache.commons.lang3.StringEscapeUtils.escapeHtml4; | |
class Coordinate { | |
private final int row; | |
private final int col; | |
public Coordinate(int row, int col) { | |
this.row = row; | |
this.col = col; | |
} | |
@Override | |
public boolean equals(Object other) { | |
if (this == other) | |
return true; | |
if (!(other instanceof Coordinate)) | |
return false; | |
Coordinate c = (Coordinate) other; | |
return row == c.row && col == c.col; | |
} | |
@Override | |
public int hashCode() { | |
return (row + "," + col).hashCode(); | |
} | |
} | |
abstract class ExcelFormatConverter { | |
protected abstract String escape(String value); | |
protected abstract String wrapCell(String component); | |
protected abstract String componentSeparator(); | |
protected abstract String rowBeginning(); | |
protected abstract String rowEnding(); | |
protected abstract String fileBeginning(); | |
protected abstract String fileEnding(); | |
protected abstract String fileExtension(); | |
protected String cellToString(Cell cell) { | |
int type = cell.getCellType(); | |
if (type == Cell.CELL_TYPE_FORMULA) | |
type = cell.getCachedFormulaResultType(); | |
String ret; | |
switch (type) { | |
case Cell.CELL_TYPE_BOOLEAN: | |
ret = this.escape(String.valueOf(cell.getBooleanCellValue())); | |
break; | |
case Cell.CELL_TYPE_NUMERIC: | |
ret = this.escape(String.valueOf(cell.getNumericCellValue())); | |
break; | |
case Cell.CELL_TYPE_STRING: | |
ret = this.escape(String.valueOf(cell.getStringCellValue())); | |
break; | |
case Cell.CELL_TYPE_BLANK: | |
case Cell.CELL_TYPE_ERROR: | |
ret = ""; | |
break; | |
default: | |
ret = this.escape(String.valueOf(cell)); | |
break; | |
} | |
return ret; | |
} | |
public void convert(File inFile) throws IOException { | |
String inFileName = inFile.getName(); | |
String extension = inFileName.substring(inFileName.lastIndexOf('.') + 1); | |
FileInputStream fis = new FileInputStream(inFile); | |
Workbook wb; | |
if (extension.equals("xls")) { | |
wb = new HSSFWorkbook(fis); | |
} else { | |
assert extension.equals("xlsx"); | |
wb = new XSSFWorkbook(fis); | |
} | |
int numSheets = wb.getNumberOfSheets(); | |
for (int sheetIndex = 0; sheetIndex < numSheets; ++sheetIndex) { | |
HashMap<Coordinate, String> cells = new HashMap<Coordinate, String>(); | |
StringBuffer outputBuffer = new StringBuffer(); | |
Sheet sheet = wb.getSheetAt(sheetIndex); | |
int minRow = Integer.MAX_VALUE; | |
int maxRow = -1; | |
int minCol = Integer.MAX_VALUE; | |
int maxCol = -1; | |
for (Row row : sheet) { | |
for (Cell cell : row) { | |
int r = cell.getRowIndex(); | |
int c = cell.getColumnIndex(); | |
Coordinate coord = new Coordinate(r, c); | |
String component = this.cellToString(cell); | |
if (component != "") { | |
cells.put(coord, component); | |
minRow = Math.min(minRow, r); | |
maxRow = Math.max(maxRow, r); | |
minCol = Math.min(minCol, c); | |
maxCol = Math.max(maxCol, c); | |
} | |
} | |
} | |
int numMergedRegions = sheet.getNumMergedRegions(); | |
for (int i = 0; i < numMergedRegions; ++i) { | |
CellRangeAddress region = sheet.getMergedRegion(i); | |
String val = cells.get(new Coordinate(region.getFirstRow(), region.getFirstColumn())); | |
for (int r = region.getFirstRow(); r <= region.getLastRow(); ++r) { | |
for (int c = region.getFirstColumn(); c <= region.getLastColumn(); ++c) { | |
cells.put(new Coordinate(r, c), val); | |
} | |
} | |
} | |
outputBuffer.append(this.fileBeginning()); | |
for (int r = minRow; r <= maxRow; ++r) { | |
outputBuffer.append(this.rowBeginning()); | |
for (int c = minCol; c <= maxCol; ++c) { | |
String component = cells.get(new Coordinate(r, c)); | |
if (component == null) { | |
component = ""; | |
} | |
component = this.wrapCell(component); | |
outputBuffer.append(component); | |
if (c != maxCol) { | |
outputBuffer.append(this.componentSeparator()); | |
} | |
} | |
outputBuffer.append(this.rowEnding()); | |
} | |
outputBuffer.append(this.fileEnding()); | |
int sheetNumber = sheetIndex + 1; | |
String outPath = inFile.getAbsoluteFile().getParent() + "/" + inFileName + "_sheet" + sheetNumber + "." + this.fileExtension(); | |
System.out.println(outPath); | |
FileOutputStream fos = new FileOutputStream(new File(outPath)); | |
fos.write(outputBuffer.toString().getBytes()); | |
fos.close(); | |
} | |
wb.close(); | |
} | |
} | |
class ExcelToCSVConverter extends ExcelFormatConverter { | |
@Override | |
protected String escape(String val) { | |
return val.replace("\"", "\"\""); | |
} | |
@Override | |
protected String wrapCell(String component) { | |
return "\"" + component + "\""; | |
} | |
@Override | |
protected String componentSeparator() { | |
return ","; | |
} | |
@Override | |
protected String rowBeginning() { | |
return ""; | |
} | |
@Override | |
protected String rowEnding() { | |
return "\n"; | |
} | |
@Override | |
protected String fileBeginning() { | |
return ""; | |
} | |
@Override | |
protected String fileEnding() { | |
return ""; | |
} | |
@Override | |
protected String fileExtension() { | |
return "csv"; | |
} | |
} | |
class ExcelToHTMLConverter extends ExcelFormatConverter { | |
@Override | |
protected String escape(String val) { | |
return escapeHtml4(val); | |
} | |
@Override | |
protected String wrapCell(String component) { | |
return "<td>" + component + "</td>"; | |
} | |
@Override | |
protected String componentSeparator() { | |
return ""; | |
} | |
@Override | |
protected String rowBeginning() { | |
return "<tr>"; | |
} | |
@Override | |
protected String rowEnding() { | |
return "</tr>\n"; | |
} | |
@Override | |
protected String fileBeginning() { | |
String style = "<style type='text/css'>table {border-collapse: collapse;} table td {border: 1px solid #999; padding: 4px;}</style>"; | |
return "<html>\n" + style + "<body>\n<table>\n"; | |
} | |
@Override | |
protected String fileEnding() { | |
return "</table>\n</body>\n</html>"; | |
} | |
@Override | |
protected String fileExtension() { | |
return "html"; | |
} | |
} | |
public class ExcelConverter { | |
public static void main(String[] args) throws IOException { | |
if (!(args.length == 2 && (args[0].equals("csv") || args[0].equals("html")))) { | |
System.out.println("Invalid Arguments"); | |
System.exit(1); | |
} | |
String fname = args[1]; | |
File in = new File(fname); | |
ExcelFormatConverter converter; | |
if (args[0].equals("csv")) | |
converter = new ExcelToCSVConverter(); | |
else | |
converter = new ExcelToHTMLConverter(); | |
converter.convert(in); | |
System.exit(1); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment