Last active
November 3, 2018 20:04
-
-
Save jeiea/9185e2676673c268d68994108ba52e47 to your computer and use it in GitHub Desktop.
For those who file.readText().lines().map { it.split('\t') } is not sufficient
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import java.io.Reader | |
/** | |
* Portable tsv reader supporting multiline | |
* Usage: File("a.tsv").bufferedReader().use { TsvReader(it).readAll() } | |
*/ | |
class TsvReader(private val reader: Reader) { | |
private val sb = StringBuilder() | |
private val eof = (-1).toChar() | |
private var character = reader.read().toChar() | |
private fun peek(): Char = character | |
private fun read(): Char { | |
val ch = character | |
character = reader.read().toChar() | |
return ch | |
} | |
fun readAll(): List<List<String>> { | |
val rows = mutableListOf<MutableList<String>>() | |
while (true) { | |
val row = mutableListOf<String>() | |
cells@ while (true) { | |
val ch = read() | |
when (ch) { | |
'\t' -> { | |
} | |
'"' -> row.add(readEscapedWithInvalids()) | |
'\r', '\n' -> { | |
if (ch == '\r' && peek() == '\n') { | |
read() | |
} | |
rows.add(row) | |
break@cells | |
} | |
eof -> { | |
if (row.isNotEmpty()) { | |
rows.add(row) | |
} | |
return rows | |
} | |
else -> { | |
sb.append(ch) | |
row.add(readUntilTab()) | |
} | |
} | |
} | |
} | |
} | |
private fun readEscapedWithInvalids(): String { | |
val valid = readEscaped() | |
val ch = peek() | |
return when (ch) { | |
'\t', '\r', '\n', eof -> valid | |
else -> valid + readUntilTab() | |
} | |
} | |
private fun readEscaped(): String { | |
while (true) { | |
val ch = read() | |
when (ch) { | |
eof -> return resetBuilder() | |
'"' -> when (peek()) { | |
'"' -> sb.append('"') | |
else -> return resetBuilder() | |
} | |
else -> sb.append(ch) | |
} | |
} | |
} | |
private fun readUntilTab(): String { | |
while (true) { | |
when (peek()) { | |
'\t', '\r', '\n', eof -> return resetBuilder() | |
else -> sb.append(read()) | |
} | |
} | |
} | |
private fun resetBuilder(): String { | |
val s = sb.toString() | |
sb.setLength(0) | |
return s | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment