Skip to content

Instantly share code, notes, and snippets.

@jeiea
Last active November 3, 2018 20:04
Show Gist options
  • Save jeiea/9185e2676673c268d68994108ba52e47 to your computer and use it in GitHub Desktop.
Save jeiea/9185e2676673c268d68994108ba52e47 to your computer and use it in GitHub Desktop.
For those who file.readText().lines().map { it.split('\t') } is not sufficient
import java.io.Reader
/**
* Portable tsv reader supporting multiline
* Usage: File("a.tsv").bufferedReader().use { TsvReader(it).readAll() }
*/
class TsvReader(private val reader: Reader) {
private val sb = StringBuilder()
private val eof = (-1).toChar()
private var character = reader.read().toChar()
private fun peek(): Char = character
private fun read(): Char {
val ch = character
character = reader.read().toChar()
return ch
}
fun readAll(): List<List<String>> {
val rows = mutableListOf<MutableList<String>>()
while (true) {
val row = mutableListOf<String>()
cells@ while (true) {
val ch = read()
when (ch) {
'\t' -> {
}
'"' -> row.add(readEscapedWithInvalids())
'\r', '\n' -> {
if (ch == '\r' && peek() == '\n') {
read()
}
rows.add(row)
break@cells
}
eof -> {
if (row.isNotEmpty()) {
rows.add(row)
}
return rows
}
else -> {
sb.append(ch)
row.add(readUntilTab())
}
}
}
}
}
private fun readEscapedWithInvalids(): String {
val valid = readEscaped()
val ch = peek()
return when (ch) {
'\t', '\r', '\n', eof -> valid
else -> valid + readUntilTab()
}
}
private fun readEscaped(): String {
while (true) {
val ch = read()
when (ch) {
eof -> return resetBuilder()
'"' -> when (peek()) {
'"' -> sb.append('"')
else -> return resetBuilder()
}
else -> sb.append(ch)
}
}
}
private fun readUntilTab(): String {
while (true) {
when (peek()) {
'\t', '\r', '\n', eof -> return resetBuilder()
else -> sb.append(read())
}
}
}
private fun resetBuilder(): String {
val s = sb.toString()
sb.setLength(0)
return s
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment