Skip to content

Instantly share code, notes, and snippets.

@bepuca
Last active June 14, 2022 15:20
Show Gist options
  • Save bepuca/6094d73509f37000c8bea2574ddf32e8 to your computer and use it in GitHub Desktop.
Save bepuca/6094d73509f37000c8bea2574ddf32e8 to your computer and use it in GitHub Desktop.
object detection error analysis blog - Get predictions and losses of a Faster RCNN model
# Copyright © 2022 Bernat Puig Camps
from pathlib import Path
import pandas as pd
from PIL import Image
import torch
import torchvision
def get_predictions(
images_path: Path, images_df: pd.DataFrame, targets_df: pd.DataFrame
):
"""Get predictions and losses of `model` for all images in `images_df`
:param model: Faster-RCNN PyTorch model.
:param images_df: DataFrame with images.
:param targets_df: DataFrame with ground truth target for images.
:return preds_df: DataFrame with columns
[
"pred_id", "image_id", "image_loss", "label_id", "score",
"xmin", "ymin", "xmax", "ymax"
]
"""
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(
pretrained=True
)
device = (
torch.device("cuda:0")
if torch.cuda.is_available()
else torch.device("cpu")
)
model = model.to(device)
preds_dfs = []
for sample in images_df.itertuples():
# We iterate over single samples (batch size of 1) because we need one
# loss per image and PyTorch Faster-RCNN outputs losses per batch,
# not sample.
t_df = targets_df.query("image_id == @sample.image_id")
image = Image.open(images_path / sample.file_name).convert("RGB")
image_tensor = torchvision.transforms.ToTensor()(image).to(device)
bboxes = t_df[["xmin", "ymin", "xmax", "ymax"]].values
labels = t_df["label_id"].values
if bboxes.size == 0:
# PyTorch Faster-RCNN expects targets to be tensors that fulfill
# len(boxes.shape) == 2 & boxes.shape[-1] == 4
bboxes = torch.empty(0, 4)
targets = {
"boxes": torch.as_tensor(bboxes, dtype=torch.float32).to(device),
"labels": torch.as_tensor(labels, dtype=torch.int64).to(device),
}
with torch.no_grad():
# Faster-RCNN outputs losses only when train mode
model.train()
losses = model([image_tensor], [targets])
# Faster-RCNN outputs predictions only when eval mode
model.eval()
preds = model([image_tensor])
# Unify all sublosses into one (this is just one way of doing it)
loss = sum(losses.values()).item()
preds_dfs.append(
pd.DataFrame(
{
"image_id": sample.image_id,
"image_loss": loss,
"label_id": preds[0]["labels"].to("cpu"),
"score": preds[0]["scores"].to("cpu"),
"xmin": preds[0]["boxes"][:, 0].to("cpu"),
"ymin": preds[0]["boxes"][:, 1].to("cpu"),
"xmax": preds[0]["boxes"][:, 2].to("cpu"),
"ymax": preds[0]["boxes"][:, 3].to("cpu"),
}
)
)
preds_df = pd.concat(preds_dfs, ignore_index=True)
preds_df = preds_df.reset_index().rename(columns={"index": "pred_id"})
return preds_df[
[
"pred_id",
"image_id",
"label_id",
"xmin",
"ymin",
"xmax",
"ymax",
"score",
"image_loss",
]
]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment