Created
May 1, 2023 11:56
-
-
Save Radiokot/b7bdb3de38b51629dee5ba0b023690c9 to your computer and use it in GitHub Desktop.
Read and parse Kindle MyClippings.txt in reverse order. Kotlin, Apache commons ReversedLinesFileReader
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import org.apache.commons.io.input.ReversedLinesFileReader | |
import org.junit.Test | |
import java.io.File | |
class KindleMyClippings { | |
class Clipping( | |
val bookDetails: String, | |
val date: String, | |
val content: String, | |
) { | |
override fun toString(): String { | |
return "Clipping(" + | |
"\n\tbookDetails='$bookDetails'," + | |
"\n\tdate='$date'," + | |
"\n\tcontent='$content'" + | |
"\n)" | |
} | |
companion object { | |
private val DATE_REGEX = Regex("(?:.+\\|)+\\s(.+)\$") | |
fun fromBlock(block: ClippingBlock): Clipping { | |
val dateMatch = DATE_REGEX.matchEntire(block.clippingDetails) | |
checkNotNull(dateMatch) { | |
"Can't extract date from the clipping details" | |
} | |
val date = dateMatch.groupValues[1] | |
return Clipping( | |
bookDetails = block.bookDetails, | |
date = date, | |
content = block.clippingContent, | |
) | |
} | |
} | |
} | |
class ClippingBlock( | |
val bookDetails: String, | |
val clippingDetails: String, | |
val clippingContent: String, | |
) { | |
override fun toString(): String { | |
return "ClippingBlock(\n\tbookDetails='$bookDetails'," + | |
"\n\tclippingDetails='$clippingDetails'," + | |
"\n\tclippingContent='$clippingContent'" + | |
"\n)" | |
} | |
companion object { | |
const val LINES_COUNT = 4 | |
fun fromLinesReversed(lines: List<String>): ClippingBlock { | |
require(lines.size == LINES_COUNT) { | |
"A block must contain $LINES_COUNT lines" | |
} | |
return ClippingBlock( | |
bookDetails = lines[3], | |
clippingDetails = lines[2], | |
clippingContent = lines[0], | |
) | |
} | |
} | |
} | |
private val DIVIDER = "==========" | |
@Test | |
fun parseClippings() { | |
val file = File("C:\\Users\\spiri\\Desktop\\My Clippings.txt") | |
val clippings = mutableListOf<Clipping>() | |
ReversedLinesFileReader(file, Charsets.UTF_8).use { reader -> | |
var block: ClippingBlock? | |
do { | |
block = readNextBlock(reader) | |
if (block != null) { | |
try { | |
clippings.add(Clipping.fromBlock(block)) | |
} catch (e: Exception) { | |
println(block) | |
e.printStackTrace() | |
} | |
} | |
} while (block != null) | |
} | |
clippings.forEach(::println) | |
} | |
private fun readNextBlock(reader: ReversedLinesFileReader): ClippingBlock? { | |
// Align. | |
do { | |
when (reader.readLine()) { | |
DIVIDER -> break | |
null -> return null | |
} | |
} while (true) | |
// Read. | |
val reversedBlockLines = reader.readLines(ClippingBlock.LINES_COUNT) | |
return if (reversedBlockLines.size == ClippingBlock.LINES_COUNT) | |
ClippingBlock.fromLinesReversed(reversedBlockLines) | |
else | |
null | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment