Last active
February 28, 2021 12:31
-
-
Save drmalex07/e424c0db62e4d6112ae781d56cbad255 to your computer and use it in GitHub Desktop.
A basic spreadsheet reader in Java. #xlsx #xls #spreadsheet #excel
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package com.example.hello_spreadsheet; | |
import java.io.Serializable; | |
import java.text.DateFormat; | |
import java.text.SimpleDateFormat; | |
import java.util.HashMap; | |
import java.util.Iterator; | |
import java.util.List; | |
import java.util.Map; | |
import java.util.TimeZone; | |
import org.apache.commons.collections4.IteratorUtils; | |
import org.apache.poi.ss.usermodel.Cell; | |
import org.apache.poi.ss.usermodel.CellType; | |
import org.apache.poi.ss.usermodel.DateUtil; | |
import org.apache.poi.ss.usermodel.Row; | |
import org.apache.poi.xssf.usermodel.XSSFSheet; | |
import com.fasterxml.jackson.databind.ObjectMapper; | |
import com.fasterxml.jackson.datatype.jsr310.JavaTimeModule; | |
public abstract class AbstractSpreadsheetReader<T extends Serializable> implements SpreadsheetReader<T> | |
{ | |
private static final ObjectMapper objectMapper = new ObjectMapper(); | |
static { | |
objectMapper.registerModule(new JavaTimeModule()); | |
} | |
private static final DateFormat simpleDateFormatter = new SimpleDateFormat("dd-MM-yyyy"); | |
static { | |
simpleDateFormatter.setTimeZone(TimeZone.getDefault()); | |
} | |
/** | |
* Does this row have meaningful data, or is it to be ignored? | |
*/ | |
protected boolean hasData(Row row) | |
{ | |
return row.getPhysicalNumberOfCells() > 0; | |
} | |
/** | |
* Does this row mark the end of data? | |
*/ | |
protected boolean isEndOfData(Row row) | |
{ | |
return false; | |
} | |
/** | |
* Return a list of expected field names (in the order encountered in the spreadsheet) | |
* @return a list of names, or <tt>null</tt> to make the reader infer names from header row. | |
*/ | |
protected List<String> fieldNames() | |
{ | |
return null; | |
} | |
private class RecordIterator implements Iterator<T> | |
{ | |
final XSSFSheet spreadsheet; | |
final int startIndex, endIndex; | |
final List<String> fieldNames; | |
final List<String> headerNames; | |
final Iterator<Row> rowIterator; | |
Row row; | |
RecordIterator(XSSFSheet spreadsheet, boolean hasHeaders, int startIndex, int endIndex) | |
{ | |
this.spreadsheet = spreadsheet; | |
this.startIndex = startIndex; | |
this.endIndex = endIndex; | |
this.rowIterator = spreadsheet.rowIterator(); | |
this.row = null; | |
this.moveToFirstRow(); | |
if (hasHeaders) { | |
this.headerNames = IteratorUtils.toList( | |
IteratorUtils.transformedIterator(row.cellIterator(), Object::toString)); | |
moveToNextRow(); | |
} else { | |
this.headerNames = null; | |
} | |
final List<String> names = fieldNames(); | |
this.fieldNames = names != null? names : this.headerNames; | |
if (this.fieldNames == null) { | |
throw new IllegalStateException("The field names are unknown: " + | |
"Either provide a fieldNames method, or use headers from the spreadsheet"); | |
} | |
} | |
void moveToFirstRow() | |
{ | |
int rowIndex = -1; | |
while (rowIndex < startIndex && rowIterator.hasNext()) { | |
row = rowIterator.next(); | |
rowIndex = row.getRowNum(); | |
} | |
if (rowIndex < startIndex) { | |
row = null; | |
} | |
} | |
void moveToNextRow() | |
{ | |
if (this.row == null) | |
return; | |
Row nextRow = null; | |
while (nextRow == null && this.rowIterator.hasNext()) { | |
nextRow = this.rowIterator.next(); | |
if (!hasData(nextRow)) { | |
nextRow = null; | |
continue; | |
} | |
if (nextRow.getRowNum() >= this.endIndex || isEndOfData(nextRow)) { | |
nextRow = null; | |
break; | |
} | |
} | |
this.row = nextRow; | |
} | |
T rowToRecord(Row row) | |
{ | |
return objectMapper.convertValue(rowToMap(row), recordType()); | |
}; | |
Map<String, Object> rowToMap(Row row) | |
{ | |
final Map<String, Object> map = new HashMap<>(); | |
final Iterator<Cell> cellIterator = row.cellIterator(); | |
while (cellIterator.hasNext()) { | |
final Cell cell = cellIterator.next(); | |
final CellType cellType = cell.getCellTypeEnum(); | |
final int j = cell.getColumnIndex(); | |
if (j >= fieldNames.size()) { | |
break; | |
} | |
final String fieldName = fieldNames.get(j); | |
if (fieldName == null) { | |
continue; | |
} | |
Object value = null; | |
switch (cellType) { | |
case BLANK: | |
break; | |
case NUMERIC: | |
case FORMULA: | |
if (DateUtil.isCellDateFormatted(cell)) { | |
value = simpleDateFormatter.format(cell.getDateCellValue()); | |
} else { | |
value = Double.valueOf(cell.getNumericCellValue()); | |
} | |
break; | |
case BOOLEAN: | |
value = Boolean.valueOf(cell.getBooleanCellValue()); | |
break; | |
case STRING: | |
value = cell.getStringCellValue(); | |
break; | |
default: | |
throw new IllegalStateException("Encountered a cell of type [" + cellType +"]"); | |
} | |
map.put(fieldName, value); | |
} | |
return map; | |
} | |
@Override | |
public boolean hasNext() | |
{ | |
return this.row != null; | |
} | |
@Override | |
public T next() | |
{ | |
final T record = rowToRecord(this.row); | |
moveToNextRow(); | |
return record; | |
} | |
} | |
@Override | |
public Iterator<T> recordIterator(XSSFSheet spreadsheet, boolean hasHeaders, int startIndex, int endIndex) | |
{ | |
return new RecordIterator(spreadsheet, hasHeaders, startIndex, endIndex); | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package com.example.hello_spreadsheet; | |
import java.io.Serializable; | |
import java.util.Iterator; | |
import java.util.Spliterator; | |
import java.util.Spliterators; | |
import java.util.stream.Stream; | |
import java.util.stream.StreamSupport; | |
import org.apache.poi.xssf.usermodel.XSSFSheet; | |
public interface SpreadsheetReader <T extends Serializable> | |
{ | |
Class<T> recordType(); | |
Iterator<T> recordIterator(XSSFSheet spreadsheet, boolean hasHeaders, int startIndex, int endIndex); | |
default Iterator<T> recordIterator(XSSFSheet spreadsheet, boolean hasHeaders, int startIndex) | |
{ | |
return recordIterator(spreadsheet, hasHeaders, startIndex, Integer.MAX_VALUE); | |
} | |
default Stream<T> recordStream(XSSFSheet spreadsheet, boolean hasHeaders, int startIndex, int endIndex) | |
{ | |
Iterator<T> it = recordIterator(spreadsheet, hasHeaders, startIndex, endIndex); | |
return StreamSupport.stream(Spliterators.spliteratorUnknownSize(it, Spliterator.IMMUTABLE), false); | |
} | |
default Stream<T> recordStream(XSSFSheet spreadsheet, boolean hasHeaders, int startIndex) | |
{ | |
return recordStream(spreadsheet, hasHeaders, startIndex, Integer.MAX_VALUE); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment