This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# | |
# Generate HD-Vila-100M dataset | |
# | |
# https://github.com/microsoft/XPretrain/tree/main/hd-vila-100m | |
import argparse | |
import datetime | |
import logging |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
SELECT * FROM ( | |
SELECT | |
*, | |
ML_PREDICT(coco_det, image) as det, | |
ML_PREDICT(hardhat, image) as hat | |
FROM hardhat | |
) WHERE array_contains(hat.label, "helmat") | |
AND array_contains(hat.label, "head") | |
AND array_contains(det.label, "truck") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
CREATE MODEL hardhat -- Load from model registry | |
FLAVOR pytorch | |
MODEL_TYPE ssd | |
USING 's3://bucket/to/hardhat.pth'; | |
CREATE MODEL coco_det -- Load pretrained models. | |
FLAVOR pytorch | |
MODEL_TYPE fasterrcnn; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from rikai.pytorch.data import Dataset | |
def train( | |
name: str, | |
uri: Dataset, | |
model_type: str, | |
epochs: int = 150, | |
batch_size: int = 4, | |
num_workers: int = 4, | |
lr: float = 0.02, |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import xml.etree.ElementTree as ET | |
from pyspark.sql import Row | |
from rikai.types import Box2d, Image | |
all_images = {p.name: p for p in basedir.glob("**/*.jpg")} | |
images = [] | |
for split in ["Train", "Test"]: | |
for voc_file in basedir.glob(f"{split}/**/*.xml"): | |
root = ET.parse(voc_file).getroot() | |
annotations = [Row( |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
SELECT image_id, image, detection.* FROM ( | |
SELECT image_id, image, explode(ML_PREDICT(ssd, image)) as detection FROM raw_data | |
) WHERE 100 < area(detection.box) AND area(detection.box) < 10000 | |
AND coco_name(detection.label_id) IN ('chair', 'remote', 'cell phone') | |
AND detection.score < 0.6 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
SELECT box_area, count(*) AS cnt FROM ( | |
SELECT | |
CAST(area(detection.box) / 500 AS int) * 500 AS box_area | |
FROM least_margin | |
WHERE coco_name(detection.label_ids[0]) IN ('chair', 'person') | |
AND coco_name(detection.label_ids[1]) IN ('chair', 'person') | |
) GROUP BY box_area ORDER BY box_area |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
SELECT | |
coco_name(detection.label_id) as label, | |
count(detection.label_id) AS cnt | |
FROM ( | |
SELECT | |
explode(ML_PREDICT(ssd, image)) AS detection | |
FROM coco | |
) WHERE coco_name(detection.label_id) IN ('person', 'chair', 'remote', 'cell phone', 'sheep', 'cow') | |
GROUP BY detection.label_id | |
ORDER BY cnt DESC |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
WITH least_margin AS ( | |
SELECT | |
image_id, image, detection, | |
detection.scores[0] - detection.scores[1] as margin FROM ( | |
SELECT | |
image_id, | |
image, | |
explode(ML_PREDICT(class_scores, image)) AS detection | |
FROM coco | |
) ORDER BY margin LIMIT 1000 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
SELECT | |
image_id, | |
image, | |
detection, | |
entropy(detection.scores) as entropy | |
FROM ( | |
SELECT | |
image_id, | |
image, | |
explode(ML_PREDICT(class_scores, image)) AS detection |
NewerOlder