Created
May 9, 2016 13:54
-
-
Save milindjagre/34966d289da2e6d33dfbf0f76fc75271 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* To change this license header, choose License Headers in Project Properties. | |
* To change this template file, choose Tools | Templates | |
* and open the template in the editor. | |
*/ | |
package com.milind.mr.excel; | |
/** | |
* | |
* @author milind | |
*/ | |
import java.io.IOException; | |
import java.io.InputStream; | |
import java.util.Iterator; | |
import org.apache.commons.logging.Log; | |
import org.apache.commons.logging.LogFactory; | |
import org.apache.poi.hssf.usermodel.HSSFSheet; | |
import org.apache.poi.hssf.usermodel.HSSFWorkbook; | |
import org.apache.poi.ss.usermodel.Cell; | |
import org.apache.poi.ss.usermodel.Row; | |
public class ExcelParser { | |
private static final Log LOG = LogFactory.getLog(ExcelParser.class); | |
private StringBuilder currentString = null; | |
private long bytesRead = 0; | |
public String parseExcelData(InputStream is) { | |
try { | |
HSSFWorkbook workbook = new HSSFWorkbook(is); | |
// Taking first sheet from the workbook | |
HSSFSheet sheet = workbook.getSheetAt(0); | |
// Iterate through each rows from first sheet | |
Iterator<Row> rowIterator = sheet.iterator(); | |
currentString = new StringBuilder(); | |
while (rowIterator.hasNext()) { | |
Row row = rowIterator.next(); | |
// For each row, iterate through each columns | |
Iterator<Cell> cellIterator = row.cellIterator(); | |
while (cellIterator.hasNext()) { | |
Cell cell = cellIterator.next(); | |
switch (cell.getCellType()) { | |
case Cell.CELL_TYPE_BOOLEAN: | |
bytesRead++; | |
currentString.append(cell.getBooleanCellValue() + "\t"); | |
break; | |
case Cell.CELL_TYPE_NUMERIC: | |
bytesRead++; | |
currentString.append(cell.getNumericCellValue() + "\t"); | |
break; | |
case Cell.CELL_TYPE_STRING: | |
bytesRead++; | |
currentString.append(cell.getStringCellValue() + "\t"); | |
break; | |
} | |
} | |
currentString.append("\n"); | |
} | |
is.close(); | |
} catch (IOException e) { | |
LOG.error("IO Exception : File not found " + e); | |
} | |
return currentString.toString(); | |
} | |
public long getBytesRead() { | |
return bytesRead; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
It is a Custom Parser Class which is used while reading Excel File Using MapReduce API