Skip to content

Instantly share code, notes, and snippets.

@drmalex07
Last active February 28, 2021 12:31
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save drmalex07/e424c0db62e4d6112ae781d56cbad255 to your computer and use it in GitHub Desktop.
Save drmalex07/e424c0db62e4d6112ae781d56cbad255 to your computer and use it in GitHub Desktop.
A basic spreadsheet reader in Java. #xlsx #xls #spreadsheet #excel
package com.example.hello_spreadsheet;
import java.io.Serializable;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.TimeZone;
import org.apache.commons.collections4.IteratorUtils;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.CellType;
import org.apache.poi.ss.usermodel.DateUtil;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.xssf.usermodel.XSSFSheet;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.datatype.jsr310.JavaTimeModule;
public abstract class AbstractSpreadsheetReader<T extends Serializable> implements SpreadsheetReader<T>
{
private static final ObjectMapper objectMapper = new ObjectMapper();
static {
objectMapper.registerModule(new JavaTimeModule());
}
private static final DateFormat simpleDateFormatter = new SimpleDateFormat("dd-MM-yyyy");
static {
simpleDateFormatter.setTimeZone(TimeZone.getDefault());
}
/**
* Does this row have meaningful data, or is it to be ignored?
*/
protected boolean hasData(Row row)
{
return row.getPhysicalNumberOfCells() > 0;
}
/**
* Does this row mark the end of data?
*/
protected boolean isEndOfData(Row row)
{
return false;
}
/**
* Return a list of expected field names (in the order encountered in the spreadsheet)
* @return a list of names, or <tt>null</tt> to make the reader infer names from header row.
*/
protected List<String> fieldNames()
{
return null;
}
private class RecordIterator implements Iterator<T>
{
final XSSFSheet spreadsheet;
final int startIndex, endIndex;
final List<String> fieldNames;
final List<String> headerNames;
final Iterator<Row> rowIterator;
Row row;
RecordIterator(XSSFSheet spreadsheet, boolean hasHeaders, int startIndex, int endIndex)
{
this.spreadsheet = spreadsheet;
this.startIndex = startIndex;
this.endIndex = endIndex;
this.rowIterator = spreadsheet.rowIterator();
this.row = null;
this.moveToFirstRow();
if (hasHeaders) {
this.headerNames = IteratorUtils.toList(
IteratorUtils.transformedIterator(row.cellIterator(), Object::toString));
moveToNextRow();
} else {
this.headerNames = null;
}
final List<String> names = fieldNames();
this.fieldNames = names != null? names : this.headerNames;
if (this.fieldNames == null) {
throw new IllegalStateException("The field names are unknown: " +
"Either provide a fieldNames method, or use headers from the spreadsheet");
}
}
void moveToFirstRow()
{
int rowIndex = -1;
while (rowIndex < startIndex && rowIterator.hasNext()) {
row = rowIterator.next();
rowIndex = row.getRowNum();
}
if (rowIndex < startIndex) {
row = null;
}
}
void moveToNextRow()
{
if (this.row == null)
return;
Row nextRow = null;
while (nextRow == null && this.rowIterator.hasNext()) {
nextRow = this.rowIterator.next();
if (!hasData(nextRow)) {
nextRow = null;
continue;
}
if (nextRow.getRowNum() >= this.endIndex || isEndOfData(nextRow)) {
nextRow = null;
break;
}
}
this.row = nextRow;
}
T rowToRecord(Row row)
{
return objectMapper.convertValue(rowToMap(row), recordType());
};
Map<String, Object> rowToMap(Row row)
{
final Map<String, Object> map = new HashMap<>();
final Iterator<Cell> cellIterator = row.cellIterator();
while (cellIterator.hasNext()) {
final Cell cell = cellIterator.next();
final CellType cellType = cell.getCellTypeEnum();
final int j = cell.getColumnIndex();
if (j >= fieldNames.size()) {
break;
}
final String fieldName = fieldNames.get(j);
if (fieldName == null) {
continue;
}
Object value = null;
switch (cellType) {
case BLANK:
break;
case NUMERIC:
case FORMULA:
if (DateUtil.isCellDateFormatted(cell)) {
value = simpleDateFormatter.format(cell.getDateCellValue());
} else {
value = Double.valueOf(cell.getNumericCellValue());
}
break;
case BOOLEAN:
value = Boolean.valueOf(cell.getBooleanCellValue());
break;
case STRING:
value = cell.getStringCellValue();
break;
default:
throw new IllegalStateException("Encountered a cell of type [" + cellType +"]");
}
map.put(fieldName, value);
}
return map;
}
@Override
public boolean hasNext()
{
return this.row != null;
}
@Override
public T next()
{
final T record = rowToRecord(this.row);
moveToNextRow();
return record;
}
}
@Override
public Iterator<T> recordIterator(XSSFSheet spreadsheet, boolean hasHeaders, int startIndex, int endIndex)
{
return new RecordIterator(spreadsheet, hasHeaders, startIndex, endIndex);
}
}
package com.example.hello_spreadsheet;
import java.io.Serializable;
import java.util.Iterator;
import java.util.Spliterator;
import java.util.Spliterators;
import java.util.stream.Stream;
import java.util.stream.StreamSupport;
import org.apache.poi.xssf.usermodel.XSSFSheet;
public interface SpreadsheetReader <T extends Serializable>
{
Class<T> recordType();
Iterator<T> recordIterator(XSSFSheet spreadsheet, boolean hasHeaders, int startIndex, int endIndex);
default Iterator<T> recordIterator(XSSFSheet spreadsheet, boolean hasHeaders, int startIndex)
{
return recordIterator(spreadsheet, hasHeaders, startIndex, Integer.MAX_VALUE);
}
default Stream<T> recordStream(XSSFSheet spreadsheet, boolean hasHeaders, int startIndex, int endIndex)
{
Iterator<T> it = recordIterator(spreadsheet, hasHeaders, startIndex, endIndex);
return StreamSupport.stream(Spliterators.spliteratorUnknownSize(it, Spliterator.IMMUTABLE), false);
}
default Stream<T> recordStream(XSSFSheet spreadsheet, boolean hasHeaders, int startIndex)
{
return recordStream(spreadsheet, hasHeaders, startIndex, Integer.MAX_VALUE);
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment