bepuca/get_obj_det_predictions.py

## get_obj_det_predictions.py
# Copyright © 2022 Bernat Puig Camps
from pathlib import Path

import pandas as pd
from PIL import Image
import torch
import torchvision


def get_predictions(
    images_path: Path, images_df: pd.DataFrame, targets_df: pd.DataFrame
):
    """Get predictions and losses of `model` for all images in `images_df`

    :param model: Faster-RCNN PyTorch model.
    :param images_df: DataFrame with images.
    :param targets_df: DataFrame with ground truth target for images.
    :return preds_df: DataFrame with columns
        [
            "pred_id", "image_id", "image_loss", "label_id", "score",
            "xmin", "ymin", "xmax", "ymax"
        ]
    """
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(
        pretrained=True
    )

    device = (
        torch.device("cuda:0")
        if torch.cuda.is_available()
        else torch.device("cpu")
    )
    model = model.to(device)

    preds_dfs = []
    for sample in images_df.itertuples():
        # We iterate over single samples (batch size of 1) because we need one
        # loss per image and PyTorch Faster-RCNN outputs losses per batch,
        # not sample.
        t_df = targets_df.query("image_id == @sample.image_id")

        image = Image.open(images_path / sample.file_name).convert("RGB")
        image_tensor = torchvision.transforms.ToTensor()(image).to(device)

        bboxes = t_df[["xmin", "ymin", "xmax", "ymax"]].values
        labels = t_df["label_id"].values
        if bboxes.size == 0:
            # PyTorch Faster-RCNN expects targets to be tensors that fulfill
            # len(boxes.shape) == 2 & boxes.shape[-1] == 4
            bboxes = torch.empty(0, 4)

        targets = {
            "boxes": torch.as_tensor(bboxes, dtype=torch.float32).to(device),
            "labels": torch.as_tensor(labels, dtype=torch.int64).to(device),
        }
        with torch.no_grad():
            # Faster-RCNN outputs losses only when train mode
            model.train()
            losses = model([image_tensor], [targets])
            # Faster-RCNN outputs predictions only when eval mode
            model.eval()
            preds = model([image_tensor])
        # Unify all sublosses into one (this is just one way of doing it)
        loss = sum(losses.values()).item()

        preds_dfs.append(
            pd.DataFrame(
                {
                    "image_id": sample.image_id,
                    "image_loss": loss,
                    "label_id": preds[0]["labels"].to("cpu"),
                    "score": preds[0]["scores"].to("cpu"),
                    "xmin": preds[0]["boxes"][:, 0].to("cpu"),
                    "ymin": preds[0]["boxes"][:, 1].to("cpu"),
                    "xmax": preds[0]["boxes"][:, 2].to("cpu"),
                    "ymax": preds[0]["boxes"][:, 3].to("cpu"),
                }
            )
        )

    preds_df = pd.concat(preds_dfs, ignore_index=True)
    preds_df = preds_df.reset_index().rename(columns={"index": "pred_id"})
    return preds_df[
        [
            "pred_id",
            "image_id",
            "label_id",
            "xmin",
            "ymin",
            "xmax",
            "ymax",
            "score",
            "image_loss",
        ]
    ]
	# Copyright © 2022 Bernat Puig Camps
	from pathlib import Path

	import pandas as pd
	from PIL import Image
	import torch
	import torchvision


	def get_predictions(
	images_path: Path, images_df: pd.DataFrame, targets_df: pd.DataFrame
	):
	"""Get predictions and losses of `model` for all images in `images_df`

	:param model: Faster-RCNN PyTorch model.
	:param images_df: DataFrame with images.
	:param targets_df: DataFrame with ground truth target for images.
	:return preds_df: DataFrame with columns
	[
	"pred_id", "image_id", "image_loss", "label_id", "score",
	"xmin", "ymin", "xmax", "ymax"
	]
	"""
	model = torchvision.models.detection.fasterrcnn_resnet50_fpn(
	pretrained=True
	)

	device = (
	torch.device("cuda:0")
	if torch.cuda.is_available()
	else torch.device("cpu")
	)
	model = model.to(device)

	preds_dfs = []
	for sample in images_df.itertuples():
	# We iterate over single samples (batch size of 1) because we need one
	# loss per image and PyTorch Faster-RCNN outputs losses per batch,
	# not sample.
	t_df = targets_df.query("image_id == @sample.image_id")

	image = Image.open(images_path / sample.file_name).convert("RGB")
	image_tensor = torchvision.transforms.ToTensor()(image).to(device)

	bboxes = t_df[["xmin", "ymin", "xmax", "ymax"]].values
	labels = t_df["label_id"].values
	if bboxes.size == 0:
	# PyTorch Faster-RCNN expects targets to be tensors that fulfill
	# len(boxes.shape) == 2 & boxes.shape[-1] == 4
	bboxes = torch.empty(0, 4)

	targets = {
	"boxes": torch.as_tensor(bboxes, dtype=torch.float32).to(device),
	"labels": torch.as_tensor(labels, dtype=torch.int64).to(device),
	}
	with torch.no_grad():
	# Faster-RCNN outputs losses only when train mode
	model.train()
	losses = model([image_tensor], [targets])
	# Faster-RCNN outputs predictions only when eval mode
	model.eval()
	preds = model([image_tensor])
	# Unify all sublosses into one (this is just one way of doing it)
	loss = sum(losses.values()).item()

	preds_dfs.append(
	pd.DataFrame(
	{
	"image_id": sample.image_id,
	"image_loss": loss,
	"label_id": preds[0]["labels"].to("cpu"),
	"score": preds[0]["scores"].to("cpu"),
	"xmin": preds[0]["boxes"][:, 0].to("cpu"),
	"ymin": preds[0]["boxes"][:, 1].to("cpu"),
	"xmax": preds[0]["boxes"][:, 2].to("cpu"),
	"ymax": preds[0]["boxes"][:, 3].to("cpu"),
	}
	)
	)

	preds_df = pd.concat(preds_dfs, ignore_index=True)
	preds_df = preds_df.reset_index().rename(columns={"index": "pred_id"})
	return preds_df[
	[
	"pred_id",
	"image_id",
	"label_id",
	"xmin",
	"ymin",
	"xmax",
	"ymax",
	"score",
	"image_loss",
	]
	]