Skip to content

Instantly share code, notes, and snippets.

@hohonuuli
Created October 19, 2022 03:58
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save hohonuuli/e801c07d2c72527627792a13fc470a7c to your computer and use it in GitHub Desktop.
Save hohonuuli/e801c07d2c72527627792a13fc470a7c to your computer and use it in GitHub Desktop.
Scala script to convert Pascal VOC to CSV. For a medium article
#!/usr/bin/env -S scala-cli shebang --scala-version 3.2.0
//> using lib "org.scala-lang.modules::scala-xml:2.1.0"
import scala.xml.Elem
import java.io.File
import scala.xml.XML
import java.nio.file.Paths
import scala.util.Using
import java.nio.file.Files
case class Voc(
filename: String,
objects: Seq[VocObject] = Nil
)
case class VocObject(
name: String,
xmin: Int,
ymin: Int,
xmax: Int,
ymax: Int
):
val width = xmax - xmin
val height = ymax - ymin
object VocParser:
def parse(xml: Elem): Voc =
val filename = (xml \ "filename").text
val objects = xml \ "object"
val vocObjects = objects.map(n => {
val name = (n \ "name").text
// VOC is 1-based index. Convert to 0 based.
// See https://cv.gluon.ai/_modules/gluoncv/data/pascal_voc/detection.html#VOCDetection
val xmin = (n \ "bndbox" \ "xmin").text.toInt - 1
val ymin = (n \ "bndbox" \ "ymin").text.toInt - 1
val xmax = (n \ "bndbox" \ "xmax").text.toInt - 1
val ymax = (n \ "bndbox" \ "ymax").text.toInt - 1
VocObject(name, xmin, ymin, xmax, ymax)
});
Voc(filename, objects = vocObjects)
if (args.length != 1) {
println("Usage: voc_to_csv.sc <path to VOC dir>")
System.exit(1)
}
val dir = Paths.get(args(0))
Using(Files.list(dir)) { stream =>
stream
.filter(_.toString.endsWith(".xml"))
.map(path => XML.loadFile(path.toFile()))
.map(VocParser.parse)
.forEach { voc =>
voc.objects.foreach { obj =>
println(s"${voc.filename},${obj.name},${obj.xmin},${obj.ymin},${obj.width},${obj.height}")
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment