Skip to content

Instantly share code, notes, and snippets.

@EckoTan0804
Last active February 10, 2023 22:33
Show Gist options
  • Save EckoTan0804/b1236473ea325c61ecbd4c003561ad5e to your computer and use it in GitHub Desktop.
Save EckoTan0804/b1236473ea325c61ecbd4c003561ad5e to your computer and use it in GitHub Desktop.
Measure runtime performance of PyTorch model
import math
import torch
import numpy as np
from torchvision.models import resnet18, ResNet18_Weights # TODO: Import your model
from tqdm import tqdm
from rich import print
# ========== TODO: Adjust based on your needs ==========
MODEL = resnet18(weights=ResNet18_Weights.IMAGENET1K_V1)
HEIGHT = 1024
WIDTH = 2048
WARMUP_REPETITION = 100
MEASURE_REPETITION = 300
# ======================================================
def init_measurement():
starter = torch.cuda.Event(enable_timing=True)
ender = torch.cuda.Event(enable_timing=True)
infer_durations = np.zeros((MEASURE_REPETITION,1))
return starter,ender,infer_durations
def get_measurement_stats(infer_durations):
duration_mean = np.mean(infer_durations)
duration_std = np.std(infer_durations)
return duration_mean, duration_std
def get_num_params(model):
num_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
return num_params
def main():
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device_properties = torch.cuda.get_device_properties(device)
device_memory = math.floor(getattr(device_properties, "total_memory") / 1e9) # unit: GB
starter, ender, infer_durations = init_measurement()
model = MODEL.to(device)
num_params = get_num_params(model)
model.eval()
# Mock input
dummy_input = torch.randn([1, 3, HEIGHT, WIDTH], dtype=torch.float, device=device)
with torch.no_grad():
# GPU warm-up
for _ in tqdm(range(WARMUP_REPETITION), desc="GPU warm-up", total=WARMUP_REPETITION):
_ = model(dummy_input)
for rep in tqdm(range(MEASURE_REPETITION), desc="Measuring inference time", total=MEASURE_REPETITION):
starter.record()
_ = model(dummy_input)
ender.record()
# Wait for GPU sync
torch.cuda.synchronize()
curr_time = starter.elapsed_time(ender) # time unit is milliseconds
curr_time = curr_time / 1000 # ms -> s
infer_durations[rep] = curr_time
duration_mean, duration_std = get_measurement_stats(infer_durations)
summary = {
"Device": f"{torch.cuda.get_device_name(device)} ({device_memory} GB)",
"Image size (H, W)": (HEIGHT, WIDTH),
"#Parameters": f"{num_params/1e6:.2f} M",
"Inference time": f"Mean: {duration_mean:.3f}s, Std: {duration_std:.3f}s",
"FPS": f"{1 / duration_mean:.3f}",
}
print(summary)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment