Skip to content

Instantly share code, notes, and snippets.

@aaronanderson
Last active November 24, 2022 13:47
Show Gist options
  • Save aaronanderson/e370f2bbe45b4c38e59ab73d47a9b43c to your computer and use it in GitHub Desktop.
Save aaronanderson/e370f2bbe45b4c38e59ab73d47a9b43c to your computer and use it in GitHub Desktop.
Apache POI Excel SAX Handler for large Excel files
//Example handler, in this case Excel rows are stored into an Apache Ignite cache
public class MappingSheetHandler extends SheetHandler {
IgniteCache<BinaryObject, BinaryObject> mappingCache;
protected Map<String, String> headerRowMapping = new HashedMap<>();
protected LocationSheetHandler() {
mappingCache = ignite.cache("mappings").withKeepBinary();
}
@Override
protected boolean processSheet() {
return "Sheet 1".equals(sheetName);
}
@Override
protected void startSheet() {
log.info(String.format("********************** Processing Mapping Sheet - %s ***************************", sheetName));
}
@Override
protected void endSheet() {
log.info(String.format("********************** Processed Mapping Sheet - %s ***************************", sheetName));
}
@Override
protected void processRow() {
if (rowNumber > 1 && !rowValues.isEmpty()) {
String oldID = rowValues.get("A");
String newID = rowValues.get("B");
if (newID == null) {
return;
}
String color = "white";
CTXf style = rowStyles.get("A");
if (style.getApplyFill()) {
XSSFCellFill fill = stylesTable.getFillAt((int) style.getFillId());
if ("FFFFFF00".contentEquals(fill.getFillForegroundColor().getARGBHex())) {
color = "yellow";
}
}
BinaryObjectBuilder keyBuilder = ignite.binary().builder("Key");
keyBuilder.setField("srtype", "SomeType");
keyBuilder.setField("srid", oldID);
keyBuilder.setField("color", color);
BinaryObjectBuilder valBuilder = ignite.binary().builder("Value");
valBuilder.setField("trtype", "SomeType");
valBuilder.setField("trid", newID);
mappingCache.put(keyBuilder.build(), valBuilder.build());
}
}
}
}
import java.io.InputStream;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.model.SharedStringsTable;
import org.apache.poi.xssf.model.StylesTable;
import org.apache.poi.xssf.usermodel.XSSFRichTextString;
import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTXf;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;
//Used to handle large Excel files where the full object model doesn't fit into the JVM heap so stream it instead. Based of off this example - https://dzone.com/articles/introduction-to-apache-poi-library
public class SheetHandler extends DefaultHandler {
private SharedStringsTable sharedStringsTable;
private String contents;
private boolean isCellValue;
private boolean fromSST;
protected String sheetName;
protected int sheetNumber = 0;
protected Map<String, String> header = new HashMap();
protected Map<String, String> rowValues = new HashMap();
protected StylesTable stylesTable;
protected Map<String, CTXf> rowStyles = new HashMap();
protected long rowNumber = 0;
protected String cellId;
@Override
public void startElement(String uri, String localName, String name, Attributes attributes) throws SAXException {
// Clear contents cache
contents = "";
// element row represents Row
if (name.equals("row")) {
String rowNumStr = attributes.getValue("r");
//System.out.println("Row# " + rowNumStr);
rowNumber = Long.parseLong(rowNumStr);
}
// element c represents Cell
else if (name.equals("c")) {
cellId = getColumnId(attributes.getValue("r"));
// attribute r represents the cell reference
//System.out.print(attributes.getValue("r") + " - ");
// attribute t represents the cell type
String cellType = attributes.getValue("t");
if (cellType != null && cellType.equals("s")) {
// cell type s means value will be extracted from SharedStringsTable
fromSST = true;
}
String cellStyleStr = attributes.getValue("s");
if (cellStyleStr != null) {
int cellStyleInt = Integer.parseInt(cellStyleStr);
CTXf cellStyle = stylesTable.getCellXfAt(cellStyleInt);
rowStyles.put(cellId, cellStyle);
}
// element v represents value of Cell
} else if (name.equals("v")) {
isCellValue = true;
}
}
@Override
public void characters(char[] ch, int start, int length) throws SAXException {
if (isCellValue) {
contents += new String(ch, start, length);
}
}
@Override
public void endElement(String uri, String localName, String name) throws SAXException {
if (isCellValue && fromSST) {
int index = Integer.parseInt(contents);
contents = new XSSFRichTextString(sharedStringsTable.getItemAt(index).getString()).toString();
//System.out.format("%d %s %s\n", rowNumber, cellId, contents);
rowValues.put(cellId, contents);
cellId = null;
isCellValue = false;
fromSST = false;
} else if (isCellValue) {
rowValues.put(cellId, contents);
isCellValue = false;
} else if (name.equals("row")) {
if (rowNumber == 1) {
for (Map.Entry<String, String> row : rowValues.entrySet()) {
header.put(row.getValue(), row.getKey());
}
}
processRow();
rowValues.clear();
}
}
protected boolean processSheet() {
return true;
}
protected void startSheet() {
}
protected void endSheet() {
}
protected void processRow() {
}
protected static String getColumnId(String attribute) throws SAXException {
for (int i = 0; i < attribute.length(); i++) {
if (!Character.isAlphabetic(attribute.charAt(i))) {
return attribute.substring(0, i);
}
}
throw new SAXException("Invalid format " + attribute);
}
public void readExcelFile(String filename) throws Exception {
SAXParserFactory factory = SAXParserFactory.newInstance();
SAXParser saxParser = factory.newSAXParser();
try (OPCPackage opcPackage = OPCPackage.open(filename);) {
XSSFReader xssfReader = new XSSFReader(opcPackage);
sharedStringsTable = xssfReader.getSharedStringsTable();
stylesTable = xssfReader.getStylesTable();
ContentHandler handler = this;
Iterator<InputStream> sheets = xssfReader.getSheetsData();
if (sheets instanceof XSSFReader.SheetIterator) {
XSSFReader.SheetIterator sheetiterator = (XSSFReader.SheetIterator) sheets;
while (sheetiterator.hasNext()) {
try (InputStream sheet = sheetiterator.next();) {
sheetName = sheetiterator.getSheetName();
sheetNumber++;
if (!processSheet()) {
continue;
}
startSheet();
saxParser.parse(sheet, (DefaultHandler) handler);
endSheet();
}
}
}
}
}
}
public class Test {
public static void main (String ... args) throws Exception {
MappingSheetHandler handler = new MappingSheetHandler();
handler.readExcelFile("Test.xlsx");
}
}
@RajeshkumarRanaTR
Copy link

Can we use sax for writing or exporting a big excel file?

@DatDC
Copy link

DatDC commented Nov 24, 2022

Can we use sax for writing or exporting a big excel file?

Yeb, But how can you read multiple files and multi request at once without writing the wrong data? I'm rewriting the function based on your sample and am having this problem. Thanks!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment