eddyxu/hardhat-rikai.py

## hardhat-rikai.py
import xml.etree.ElementTree as ET
from pyspark.sql import Row
from rikai.types import Box2d, Image

all_images = {p.name: p for p in basedir.glob("**/*.jpg")}
images = []
for split in ["Train", "Test"]:
    for voc_file in basedir.glob(f"{split}/**/*.xml"):
        root = ET.parse(voc_file).getroot()
        annotations = [Row(
            label=obj.find("name").text,
            box=Box2d(
                **{
                    k: int(obj.find(f"./bndbox/{k}").text)
                    for k in ["xmin", "ymin", "xmax", "ymax"]
                }
            ),
        ) for obj in root.iter("object")]
        filename = root.find("filename").text
        images.append(
            {
                "filename": filename,
                "width": int(root.find("./size/width").text),
                "height": int(root.find("./size/height").text),
                "depth": int(root.find("./size/depth").text),
                "annotations": annotations,
                "split": split.lower(),
                "image": Image(all_images[filename]).to_embedded(),
            }
        )
spark.createDataFrame(images).write.format("rikai").save("datasets/hardhat")
	import xml.etree.ElementTree as ET
	from pyspark.sql import Row
	from rikai.types import Box2d, Image

	all_images = {p.name: p for p in basedir.glob("*/.jpg")}
	images = []
	for split in ["Train", "Test"]:
	for voc_file in basedir.glob(f"{split}/*/.xml"):
	root = ET.parse(voc_file).getroot()
	annotations = [Row(
	label=obj.find("name").text,
	box=Box2d(
	**{
	k: int(obj.find(f"./bndbox/{k}").text)
	for k in ["xmin", "ymin", "xmax", "ymax"]
	}
	),
	) for obj in root.iter("object")]
	filename = root.find("filename").text
	images.append(
	{
	"filename": filename,
	"width": int(root.find("./size/width").text),
	"height": int(root.find("./size/height").text),
	"depth": int(root.find("./size/depth").text),
	"annotations": annotations,
	"split": split.lower(),
	"image": Image(all_images[filename]).to_embedded(),
	}
	)
	spark.createDataFrame(images).write.format("rikai").save("datasets/hardhat")