EckoTan0804/measure_performance.py

## measure_performance.py
import math
import torch
import numpy as np
from torchvision.models import resnet18, ResNet18_Weights # TODO: Import your model
from tqdm import tqdm
from rich import print

# ========== TODO: Adjust based on your needs ==========
MODEL = resnet18(weights=ResNet18_Weights.IMAGENET1K_V1)

HEIGHT = 1024
WIDTH = 2048

WARMUP_REPETITION = 100
MEASURE_REPETITION = 300
# ======================================================


def init_measurement():
    starter = torch.cuda.Event(enable_timing=True)
    ender = torch.cuda.Event(enable_timing=True)
    infer_durations = np.zeros((MEASURE_REPETITION,1))
    return starter,ender,infer_durations


def get_measurement_stats(infer_durations):
    duration_mean = np.mean(infer_durations)
    duration_std = np.std(infer_durations)
    return duration_mean, duration_std


def get_num_params(model):
    num_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    return num_params


def main():
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    device_properties = torch.cuda.get_device_properties(device)
    device_memory = math.floor(getattr(device_properties, "total_memory") / 1e9) # unit: GB

    starter, ender, infer_durations = init_measurement()

    model = MODEL.to(device)
    num_params = get_num_params(model)
    model.eval()

    # Mock input
    dummy_input = torch.randn([1, 3, HEIGHT, WIDTH], dtype=torch.float, device=device)

    with torch.no_grad():
        # GPU warm-up
        for _ in tqdm(range(WARMUP_REPETITION), desc="GPU warm-up", total=WARMUP_REPETITION):
            _ = model(dummy_input)

        for rep in tqdm(range(MEASURE_REPETITION), desc="Measuring inference time", total=MEASURE_REPETITION):
            starter.record()
            _ = model(dummy_input)
            ender.record()

            # Wait for GPU sync
            torch.cuda.synchronize()

            curr_time = starter.elapsed_time(ender) # time unit is milliseconds
            curr_time = curr_time / 1000 # ms -> s
            infer_durations[rep] = curr_time

    duration_mean, duration_std = get_measurement_stats(infer_durations)

    summary = {
        "Device": f"{torch.cuda.get_device_name(device)} ({device_memory} GB)",
        "Image size (H, W)": (HEIGHT, WIDTH),
        "#Parameters": f"{num_params/1e6:.2f} M",
        "Inference time": f"Mean: {duration_mean:.3f}s, Std: {duration_std:.3f}s",
        "FPS": f"{1 / duration_mean:.3f}",
    }

    print(summary)


if __name__ == '__main__':
    main()
	import math
	import torch
	import numpy as np
	from torchvision.models import resnet18, ResNet18_Weights # TODO: Import your model
	from tqdm import tqdm
	from rich import print

	# ========== TODO: Adjust based on your needs ==========
	MODEL = resnet18(weights=ResNet18_Weights.IMAGENET1K_V1)

	HEIGHT = 1024
	WIDTH = 2048

	WARMUP_REPETITION = 100
	MEASURE_REPETITION = 300
	# ======================================================


	def init_measurement():
	starter = torch.cuda.Event(enable_timing=True)
	ender = torch.cuda.Event(enable_timing=True)
	infer_durations = np.zeros((MEASURE_REPETITION,1))
	return starter,ender,infer_durations


	def get_measurement_stats(infer_durations):
	duration_mean = np.mean(infer_durations)
	duration_std = np.std(infer_durations)
	return duration_mean, duration_std


	def get_num_params(model):
	num_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
	return num_params


	def main():
	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	device_properties = torch.cuda.get_device_properties(device)
	device_memory = math.floor(getattr(device_properties, "total_memory") / 1e9) # unit: GB

	starter, ender, infer_durations = init_measurement()

	model = MODEL.to(device)
	num_params = get_num_params(model)
	model.eval()

	# Mock input
	dummy_input = torch.randn([1, 3, HEIGHT, WIDTH], dtype=torch.float, device=device)

	with torch.no_grad():
	# GPU warm-up
	for _ in tqdm(range(WARMUP_REPETITION), desc="GPU warm-up", total=WARMUP_REPETITION):
	_ = model(dummy_input)

	for rep in tqdm(range(MEASURE_REPETITION), desc="Measuring inference time", total=MEASURE_REPETITION):
	starter.record()
	_ = model(dummy_input)
	ender.record()

	# Wait for GPU sync
	torch.cuda.synchronize()

	curr_time = starter.elapsed_time(ender) # time unit is milliseconds
	curr_time = curr_time / 1000 # ms -> s
	infer_durations[rep] = curr_time

	duration_mean, duration_std = get_measurement_stats(infer_durations)

	summary = {
	"Device": f"{torch.cuda.get_device_name(device)} ({device_memory} GB)",
	"Image size (H, W)": (HEIGHT, WIDTH),
	"#Parameters": f"{num_params/1e6:.2f} M",
	"Inference time": f"Mean: {duration_mean:.3f}s, Std: {duration_std:.3f}s",
	"FPS": f"{1 / duration_mean:.3f}",
	}

	print(summary)


	if __name__ == '__main__':
	main()