Skip to content

Instantly share code, notes, and snippets.

@bepuca
Last active June 14, 2022 15:19
Show Gist options
  • Save bepuca/b0d8656410b9b71e5685384068ccd321 to your computer and use it in GitHub Desktop.
Save bepuca/b0d8656410b9b71e5685384068ccd321 to your computer and use it in GitHub Desktop.
object detection error analysis blog - Load a COCO style dataset and convert it into a covenient pandas DataFrame
# Copyright © 2022 Bernat Puig Camps
import json
from pathlib import Path
from typing import Tuple
import pandas as pd
DATA_PATH = Path("./coco_val2017")
def load_dataset(
data_path: Path = DATA_PATH,
) -> Tuple[pd.DataFrame, pd.DataFrame]:
"""Read the COCO style json dataset and transform it into convenient DataFrames
:return (images_df, targets_df):
images_df: Columns "image_id" and "file_name"
targets_df: Columns
"target_id", "image_id", "xmin", "ymin", "xmax", "ymax", "label_id"
"""
annotations_path = data_path / "annotations.json"
with open(annotations_path, "r") as f:
targets_json = json.load(f)
images_df = pd.DataFrame.from_records(targets_json["images"])
images_df.rename(columns={"id": "image_id"}, inplace=True)
images_df = images_df[["image_id", "file_name"]]
targets_df = pd.DataFrame.from_records(targets_json["annotations"])
targets_df[["xmin", "ymin", "w", "h"]] = targets_df["bbox"].tolist()
targets_df["xmax"] = targets_df["xmin"] + targets_df["w"]
targets_df["ymax"] = targets_df["ymin"] + targets_df["h"]
targets_df.reset_index(inplace=True)
targets_df.rename(
columns={"index": "target_id", "category_id": "label_id"}, inplace=True
)
targets_df = targets_df[
["target_id", "image_id", "label_id", "xmin", "ymin", "xmax", "ymax"]
]
return images_df, targets_df
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment