Skip to content

Instantly share code, notes, and snippets.

@hohonuuli
Last active May 26, 2022 01:26
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save hohonuuli/cd2204c5900b8c7e95559c35fdd3c0e7 to your computer and use it in GitHub Desktop.
Save hohonuuli/cd2204c5900b8c7e95559c35fdd3c0e7 to your computer and use it in GitHub Desktop.
Proof-of-concept to convert https://marine-imaging.com/fair/ifdos/iFDO-overview/ to FathomNet-friendly CSV
#!/usr/bin/env -S scala-cli shebang --scala-version 3.1.2
/*
Proof of concept to convert iFDO YAML to FathomNet CSV
Requirements:
- Install scala-cli https://scala-cli.virtuslab.org
Usage:
ifdo-to-fathomnet.sc <ifdo-yaml-file> > <fathomnet-csv-file>
*/
//> using lib "io.circe::circe-core:0.14.1"
//> using lib "io.circe::circe-parser:0.14.1"
//> using lib "io.circe::circe-yaml:0.14.1"
//> using lib "io.circe::circe-generic:0.14.1"
import _root_.io.circe.yaml.parser
import _root_.io.circe.*
import _root_.io.circe.parser.*
import _root_.io.circe.syntax.*
import _root_.io.circe.generic.semiauto._
import scala.deriving.*
import scala.compiletime.{summonAll}
/**
* --- Transforms to convert Scala case classes to CSV
*/
def transform[A : Transformer](a: A) = summon[Transformer[A]].f(a)
trait Transformer[T]:
def f(t: T): String
given Transformer[String] with
def f(x: String) = x
given Transformer[Int] with
def f(x: Int) = x.toString
given Transformer[Double] with
def f(x: Double) = f"$x%.5f"
given Transformer[Boolean] with
def f(x: Boolean) = x.toString
given [T] (using t: Transformer[T]): Transformer[Option[T]] =
new Transformer[Option[T]]:
def f(x: Option[T]) = x match
case None => ""
case Some(x) => t.f(x)
given [A <: Product] (using t: Transformer[A]): Transformer[List[A]] =
new Transformer[List[A]]:
def f(x: List[A]) =
val rows = asHeader(x.head) :: x.map(transform)
rows.mkString("\n")
inline given [A <: Product] (using m: Mirror.ProductOf[A]): Transformer[A] =
new Transformer[A]:
type ElemTransformers = Tuple.Map[m.MirroredElemTypes, Transformer]
val elemTransformers = summonAll[ElemTransformers].toList.asInstanceOf[List[Transformer[Any]]]
def f(a: A): String =
val elems = a.productIterator.toList
val transformed = elems.zip(elemTransformers) map { (elem, transformer) => transformer.f(elem) }
transformed.mkString(",")
def asHeader[A <: Product](a: A): String = a.productElementNames.toList.mkString(",")
/**
* --- Data model
*/
case class BoundingBox(
image: String,
concept: String,
x: Int,
y: Int,
width: Int,
height: Int,
altitude: Option[Double] = None,
altconcept: Option[String] = None,
depth: Option[Double] = None,
groupof: Option[Boolean] = None,
imagingtype: Option[String] = None,
latitude: Option[Double] = None,
longitude: Option[Double] = None,
observer: Option[String] = None,
occluded: Option[Boolean] = None,
oxygen: Option[Double] = None,
pressure: Option[Double] = None,
salinity: Option[Double] = None,
temperature: Option[Double] = None,
timestamp: Option[String] = None
)
case class ImageAnnotationCreator(id: String, name: String)
case class ImageAnnotationLabel(id: String, info: String, name: String)
case class Label(annotator: String, label: String)
case class ImageAnnotation(coordinates: Seq[Int], shape: String, labels: Seq[Label])
case class Annotation(url: String, coordinates: Seq[Int], labels: Seq[Label]):
def toBoundingBox: BoundingBox =
val xs = Seq(coordinates(0), coordinates(2), coordinates(4), coordinates(6))
val ys = Seq(coordinates(1), coordinates(3), coordinates(5), coordinates(7))
val x = xs.min
val y = ys.min
val width = xs.max - x
val height = ys.max - y
val concept = labels.head.label // just grabbing the first label for now
val observer = labels.head.annotator
BoundingBox(url.toString, concept, x, y, width, height, observer = Some(observer))
/**
* --- Circe decoders to simplify JSON/YAML parsing
*/
given Decoder[Label] = deriveDecoder
given Decoder[ImageAnnotation] = deriveDecoder
given Decoder[ImageAnnotationCreator] = deriveDecoder
given Decoder[ImageAnnotationLabel] = deriveDecoder
/**
* --- Methods to extract the data we're interested in from the YAMl
*/
def extractImageAnnotationCreators(json: Json): Iterable[ImageAnnotationCreator] =
json.hcursor
.downField("image-set-header")
.downField("image-annotation-creators")
.focus
.get
.as[List[ImageAnnotationCreator]] match
case Left(e) => Nil
case Right(xs) => xs
def extractImageAnnotationLabels(json: Json): Iterable[ImageAnnotationLabel] =
json.hcursor
.downField("image-set-header")
.downField("image-annotation-labels")
.focus
.get
.as[List[ImageAnnotationLabel]] match
case Left(e) => Nil
case Right(xs) => xs
def extractImageAnnotations(json: Json): Iterable[Annotation] =
val cur = json.hcursor
val annoCursor = cur.downField("image-set-items")
val listOfAnnotations = for
url <- annoCursor.keys.get
yield
annoCursor
.downField(url)
.downField("image-annotations")
.focus
.get
.as[List[ImageAnnotation]] match
case Left(e) => Nil
case Right(xs) => xs
.filter(_.shape == "rectangle")
.map(x => Annotation(url, x.coordinates, x.labels))
listOfAnnotations.flatten
/**
* --- ifdo encodes relational data in a flat file. We have to unmunge relations.
*/
def resolve(labels: Seq[ImageAnnotationLabel],
creators: Seq[ImageAnnotationCreator],
annotations: Seq[Annotation]): Seq[BoundingBox] =
for
a <- annotations
yield
val creator = creators.find(_.id == a.labels.head.annotator).get
val label = labels.find(_.id == a.labels.head.label).get
val newA = a.copy(labels = Seq(Label(creator.name, label.name)))
newA.toBoundingBox
/**
* --- Main method. Converts a YAML string to a CSV string
*/
def yamlToCsv(yaml: String): String =
parser.parse(yaml) match
case Left(e) => "Parsing failed"
case Right(doc) =>
val annotations = extractImageAnnotations(doc).toSeq
val creators = extractImageAnnotationCreators(doc).toSeq
val labels = extractImageAnnotationLabels(doc).toSeq
val fini = resolve(labels, creators, annotations).toList
transform(fini)
val yaml = scala.io.Source.fromFile(args(0)).mkString
val csv = yamlToCsv(yaml)
println(csv)
@hohonuuli
Copy link
Author

Input:

# General information about the dataset.
image-set-header:
    # UUID, name, handle are mandatory fields of each iFDO.
    image-set-uuid: 2a2360e9-a5ec-4ad2-be04-0ea0b4cbdc58
    image-set-name: SO268-1_21-1_GMR_CAM-23
    # Handles are a superset of DOIs and can be obtained with a Handle server.
    # See: http://handle.net/
    image-set-handle: 20.500.12085/2a2360e9-a5ec-4ad2-be04-0ea0b4cbdc58@data
    # Version must be specified but defaults to v1.0.0 if not present.
    image-set-ifdo-version: v1.0.0
    # List of labels (not annotations) that are used in this dataset.
    image-annotation-labels:
        # Label IDs should be universally unique if possible. It must be unique in this file.
        # The info field can be used for a URL, too.
        - id: urn:lsid:marinespecies.org:taxname:124731
          name: Kolga hyalina
          info: http://www.marinespecies.org/aphia.php?p=taxdetails&id=124731
    # List of persons who created annotations in this dataset.
    image-annotation-creators:
        # ORCID ID can be used as annotator ID, if available.
        - id: 0000-0002-7122-2343
          name: Martin Zurowietz
# List of images of this dataset (list keys are image filenames).
image-set-items:
    SO268-1_21-1_GMR_CAM-23_20190513_131416.jpg:
        # List of annotations on this image.
        image-annotations:
            # Bounding box annotation: x1,y1,x2,y2,x3,y3,x4,y4
            - coordinates: [10,10,10,20,20,20,20,10]
              shape: rectangle
              # An annotation can have one or more labels. Label and annotator are referenced
              # by their ID.
              labels:
                  - label: urn:lsid:marinespecies.org:taxname:124731
                    annotator: 0000-0002-7122-2343

Output:

image,concept,x,y,width,height,altitude,altconcept,depth,groupof,imagingtype,latitude,longitude,observer,occluded,oxygen,pressure,salinity,temperature,timestamp
SO268-1_21-1_GMR_CAM-23_20190513_131416.jpg,Kolga hyalina,10,10,10,10,,,,,,,,Martin Zurowietz,,,,,,

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment