Skip to content

Instantly share code, notes, and snippets.

@satendrakumar
Created August 15, 2017 14:27
Show Gist options
  • Save satendrakumar/0214ba32e0e12ae548232b8566418cd1 to your computer and use it in GitHub Desktop.
Save satendrakumar/0214ba32e0e12ae548232b8566418cd1 to your computer and use it in GitHub Desktop.
Convert excel file into csv file
import org.apache.http.annotation.NotThreadSafe
import org.apache.poi.ss.usermodel._
import scala.collection.JavaConversions._
import scala.util.control.NonFatal
object IXLS2CSV {
private val df = new DataFormatter(true)
@NotThreadSafe
def convert(xlsPath: String): List[String] = {
val file = new File(xlsPath)
val parentFolder = file.getParent
val fileName = file.getName.split("\\.")(0)
val workbook = WorkbookFactory.create(file)
val formulaEvaluator: FormulaEvaluator = workbook.getCreationHelper().createFormulaEvaluator()
val outputFilePaths = workbook.zipWithIndex.map { case (sheet, index) =>
val outputPath = Option(parentFolder).fold(s"""$fileName-$index.csv""")(folder => s"""$folder/$fileName-$index.csv""")
convert(sheet, outputPath, formulaEvaluator)
}.toList
workbook.close()
outputFilePaths
}
private def convert(sheet: Sheet, outputPath: String, formulaEvaluator: FormulaEvaluator): String = {
var headersLength = 0
var isHeadersWritten = false
val csvWriter = new CSVWriter(outputPath)
println("Start CSV writing......... ")
sheet.foreach { case row =>
if (Option(row).isDefined) {
try {
val rowData = (0 until row.getLastCellNum).toArray
.map { index => removeLineBreakingChars(df.formatCellValue(row.getCell(index, Row.CREATE_NULL_AS_BLANK), formulaEvaluator)) }
if (isHeadersWritten) {
if (headersLength == rowData.length) {
println(s"writing row number ${row.getRowNum}")
csvWriter.write(rowData)
} else {
println(s"Invalid row [Row no - ${row.getRowNum}] [Headers count = $headersLength and current row columns count = ${rowData.length}]")
}
} else {
csvWriter.writeHeaders(rowData)
isHeadersWritten = true
headersLength = rowData.length
}
} catch {
case NonFatal(th) =>
th.printStackTrace()
}
}
}
csvWriter.close()
outputPath
}
private def removeLineBreakingChars(cell: String): String = cell.replaceAll("[\\t\\n\\r]", " ")
}
//CSVWriter
import java.io.{BufferedWriter, FileOutputStream, OutputStreamWriter}
import com.univocity.parsers.csv.{CsvWriter, CsvWriterSettings}
class CSVWriter(path: String) {
private val writer = new BufferedWriter((new OutputStreamWriter((new FileOutputStream(path)))))
private val csvSettings = new CsvWriterSettings()
csvSettings.getFormat.setDelimiter('\t')
csvSettings.getFormat.setQuoteEscape('\\')
csvSettings.setQuoteAllFields(true)
private val csvWriter = new CsvWriter(writer, csvSettings)
def writeHeaders(headers: Array[String]): Unit = csvWriter.writeHeaders(headers: _ *)
def write(row: Array[String]): Unit =
csvWriter.writeRow(row)
def close(): Unit = {
csvWriter.flush()
csvWriter.close()
}
}
//dependencies
"org.apache.poi" % "poi" % "3.15"
"org.apache.poi" % "poi-ooxml" % "3.15"
"com.univocity" % "univocity-parsers" % "2.4.1"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment