Last active
February 10, 2023 22:33
-
-
Save EckoTan0804/b1236473ea325c61ecbd4c003561ad5e to your computer and use it in GitHub Desktop.
Measure runtime performance of PyTorch model
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import math | |
import torch | |
import numpy as np | |
from torchvision.models import resnet18, ResNet18_Weights # TODO: Import your model | |
from tqdm import tqdm | |
from rich import print | |
# ========== TODO: Adjust based on your needs ========== | |
MODEL = resnet18(weights=ResNet18_Weights.IMAGENET1K_V1) | |
HEIGHT = 1024 | |
WIDTH = 2048 | |
WARMUP_REPETITION = 100 | |
MEASURE_REPETITION = 300 | |
# ====================================================== | |
def init_measurement(): | |
starter = torch.cuda.Event(enable_timing=True) | |
ender = torch.cuda.Event(enable_timing=True) | |
infer_durations = np.zeros((MEASURE_REPETITION,1)) | |
return starter,ender,infer_durations | |
def get_measurement_stats(infer_durations): | |
duration_mean = np.mean(infer_durations) | |
duration_std = np.std(infer_durations) | |
return duration_mean, duration_std | |
def get_num_params(model): | |
num_params = sum(p.numel() for p in model.parameters() if p.requires_grad) | |
return num_params | |
def main(): | |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
device_properties = torch.cuda.get_device_properties(device) | |
device_memory = math.floor(getattr(device_properties, "total_memory") / 1e9) # unit: GB | |
starter, ender, infer_durations = init_measurement() | |
model = MODEL.to(device) | |
num_params = get_num_params(model) | |
model.eval() | |
# Mock input | |
dummy_input = torch.randn([1, 3, HEIGHT, WIDTH], dtype=torch.float, device=device) | |
with torch.no_grad(): | |
# GPU warm-up | |
for _ in tqdm(range(WARMUP_REPETITION), desc="GPU warm-up", total=WARMUP_REPETITION): | |
_ = model(dummy_input) | |
for rep in tqdm(range(MEASURE_REPETITION), desc="Measuring inference time", total=MEASURE_REPETITION): | |
starter.record() | |
_ = model(dummy_input) | |
ender.record() | |
# Wait for GPU sync | |
torch.cuda.synchronize() | |
curr_time = starter.elapsed_time(ender) # time unit is milliseconds | |
curr_time = curr_time / 1000 # ms -> s | |
infer_durations[rep] = curr_time | |
duration_mean, duration_std = get_measurement_stats(infer_durations) | |
summary = { | |
"Device": f"{torch.cuda.get_device_name(device)} ({device_memory} GB)", | |
"Image size (H, W)": (HEIGHT, WIDTH), | |
"#Parameters": f"{num_params/1e6:.2f} M", | |
"Inference time": f"Mean: {duration_mean:.3f}s, Std: {duration_std:.3f}s", | |
"FPS": f"{1 / duration_mean:.3f}", | |
} | |
print(summary) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment