Skip to content

Instantly share code, notes, and snippets.

@tibaes
Last active January 4, 2024 21:40
Show Gist options
  • Save tibaes/97f07450038eb470b15a27748e7123f9 to your computer and use it in GitHub Desktop.
Save tibaes/97f07450038eb470b15a27748e7123f9 to your computer and use it in GitHub Desktop.
yolov8 object segmentation
#!/bin/bash
# Assuming AWS Sagemaker conda_pytorch_p310 environment
sudo yum install -y openssl-devel openssl11-libs libcurl
pip install --upgrade pip setuptools wheel
pip install fiftyone
pip install fiftyone-db-rhel7 --force-reinstall
pip install shapely polars
# choose your prefered path to download the dataset
# a folder named open-images-v7 will be created automatically inside of it
dataset_path = '/home/ec2-user/SageMaker/dataset'
import os
import torch
import torchvision
import fiftyone as fo
fo.config.default_ml_backend = "torch"
fo.config.dataset_zoo_dir = dataset_path
def download_dataset(split, classes, max_samples=None):
print(f'>> Split: {split}, classes: {classes}, max_samples: {max_samples}')
return fo.zoo.load_zoo_dataset(
"open-images-v7",
label_types=["segmentations"],
drop_existing_dataset=False,
split=split,
classes=classes,
max_samples=max_samples,
)
target_split = {'train': 0.7, 'validation': 0.2, 'test': 0.1}
target_classes = {
"Person": 1_000,
"Car": None
}
for cls_name, total in target_classes.items():
for split_name, split_pct in target_split.items():
max_samples = int(total * split_pct) if total is not None else None
download_dataset(split=split_name, classes=[cls_name], max_samples=max_samples)
base_path = '/home/ec2-user/SageMaker/dataset'
target_path = '/home/ec2-user/SageMaker/dataset-yolo'
# a list with same keys as on fetch.py
target_classes = [
"Person",
"Car"
]
import os
import cv2
import yaml
import shutil
import pandas as pd
import polars as pl
import multiprocessing
import numpy as np
from tqdm import tqdm
from joblib import Parallel, delayed
from shapely.geometry import Polygon
from matplotlib import pyplot as plt
### Mask to Poly ###
def mask_to_polygon(mask_path):
mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)
contours, _ = cv2.findContours(
mask,
cv2.RETR_EXTERNAL,
cv2.CHAIN_APPROX_SIMPLE
)
polygons = []
for contour in contours:
polygon = contour.reshape(-1, 2)
polygon_norm = polygon.astype(float)
polygon_norm[:, 0] /= mask.shape[1] # X
polygon_norm[:, 1] /= mask.shape[0] # Y
polygon_norm = np.round(polygon_norm, 4)
polygon_shapely = Polygon(polygon_norm)
polygon_simplified = polygon_shapely.simplify(0.002, preserve_topology=True)
polygons.append(polygon_simplified)
return polygons
def polygon_to_yolo(polygon):
x, y = polygon.exterior.coords.xy
xy = []
for xx, yy in zip(x, y):
xy.append(xx)
xy.append(yy)
return xy
def polygon_to_mask(polygon, shape):
mk = np.zeros(shape, dtype=np.uint8)
x, y = polygon.exterior.coords.xy
xy = [
[int(xx * shape[1]), int(yy * shape[0])]
for xx, yy in zip(x, y)
]
cv2.fillConvexPoly(mk, np.array(xy, dtype='int32'), color=255)
return mk
### loading openimagesv7 labels ###
class_list_filepath = os.path.join(base_path, 'train/metadata/classes.csv')
class_df = pd.read_csv(class_list_filepath, header=None, names=['URI', 'ClassName'])
class_map_r = dict(zip(class_df.URI, class_df.ClassName))
class_map_r = {k: v for k, v in class_map_r.items() if v in target_classes}
# convert from openimagev7 label hash to an integer
class_map = { k: i for i, k in enumerate(list(class_map_r.keys()))}
# class_map = {
# '/m/01g317': 0, # 'Person'
# '/m/0k4j': 1, # 'Car'
# }
def get_image_file_names(directory):
image_extensions = ['.jpg', '.jpeg', '.png', '.gif', '.bmp'] # Add more extensions if needed
image_file_names = set()
for filename in os.listdir(directory):
nm, ext = os.path.splitext(filename)
if ext in image_extensions:
image_file_names.add(nm)
return image_file_names
def load_labels(split_name):
df = pl.read_csv(os.path.join(base_path, split_name, 'labels/segmentations.csv'))
df = df[['MaskPath', 'ImageID', 'LabelName']]
image_ids = get_image_file_names(os.path.join(base_path, split_name, 'data'))
df = df.filter(pl.col('ImageID').is_in(image_ids))
target_ids = set(class_map.keys())
df = df.filter(pl.col('LabelName').is_in(target_ids))
df = df.with_columns(pl.col('MaskPath').map_elements(lambda x: x[0].upper()).alias('Subdir'))
df = df.with_columns((base_path + f'{split_name}/labels/masks/' + pl.col('Subdir') + '/' + pl.col('MaskPath')).alias('MaskFullPath'))
df = df.with_columns(pl.col(['LabelName']).map_dict(class_map).alias('LabelID'))
return df
train_df = load_labels('train')
valid_df = load_labels('validation')
test_df = load_labels('test')
### convert mask labels to yolo polygon points
def macro_mask2yolopoly(p):
try:
poly = mask_to_polygon(p)
xy = polygon_to_yolo(poly[0])
return xy
except:
return []
return []
def conv_mask_xy(df):
return df.with_columns(
pl.col('MaskFullPath').map_elements(
lambda p: macro_mask2yolopoly(p)
).alias('XY')
)
train_df = conv_mask_xy(train_df)
valid_df = conv_mask_xy(valid_df)
test_df = conv_mask_xy(test_df)
def write_yolo_labels(df, subset, persistence=True):
df = df.filter(pl.col('XY').map_elements(len) > 0)
df = df.with_columns(
pl.col('XY').map_elements(lambda xy: xy.map_elements(lambda e: str(e))).list.join(' ').alias('TXY'))
df = df.with_columns(
(pl.col('LabelID').cast(pl.Utf8) + ' ' + pl.col('TXY')).alias('Sample'))
g = df.group_by('ImageID').agg(['Sample'])
g = g.with_columns(pl.col('Sample').list.join('\n').alias('StrSamples'))
g = g.with_columns((target_path + subset + '/' + pl.col('ImageID') + '.txt').alias('Path'))
if persistence:
os.makedirs(os.path.join(target_path, subset), exist_ok=True)
for row in g.iter_rows(named=True):
with open(row['Path'], 'w') as f:
f.write(row['StrSamples'])
return g
train_df = write_yolo_labels(train_df, 'train')
valid_df = write_yolo_labels(valid_df, 'validation')
test_df = write_yolo_labels(test_df, 'test')
def copy_data(df, subset):
for iid in df.select(pl.col('ImageID')).get_columns()[0].to_list():
try:
fnm = f"{iid}.jpg"
src = os.path.join(base_path, subset, "data", fnm)
dst = os.path.join(target_path, subset)
# print(f'{src} -> {dst}')
shutil.copy2(src, dst)
except:
continue
copy_data(valid_df, 'validation')
copy_data(test_df, 'test')
copy_data(train_df, 'train')
from pathlib import Path
yaml_content = f'''
path: /home/ec2-user/SageMaker/SegVehicleYolo
train: train
val: validation
test: test
# Classes
names:
0: vehicle
'''
with Path(os.path.join(target_path, 'seg_vehicle.yaml')).open('w') as f:
f.write(yaml_content)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment