Created
January 17, 2025 13:18
-
-
Save richbs/fb089f82dba87c4d200048661686f9e0 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
import time | |
from typing import List, Tuple | |
import platform | |
def get_available_devices() -> List[str]: | |
"""Return list of available devices for PyTorch computation. | |
Returns: | |
List[str]: List containing available devices ('cpu', 'cuda', 'mps'). | |
CUDA is available on NVIDIA GPUs, MPS on Apple Silicon. | |
""" | |
devices = ['cpu'] # CPU is always available | |
# Check for NVIDIA GPU support | |
if torch.cuda.is_available(): | |
devices.append('cuda') | |
# Check for Apple Silicon (M1/M2/M3) GPU support | |
if torch.backends.mps.is_available(): | |
devices.append('mps') | |
return devices | |
def create_tensors(size: Tuple[int, int], device: str) -> Tuple[torch.Tensor, torch.Tensor]: | |
"""Create random tensors of specified size on given device. | |
Args: | |
size (Tuple[int, int]): Dimensions of the tensors to create (rows, cols) | |
device (str): Device to place tensors on ('cpu', 'cuda', or 'mps') | |
Returns: | |
Tuple[torch.Tensor, torch.Tensor]: Two random tensors of the specified size | |
""" | |
return ( | |
torch.randn(size, device=device), | |
torch.randn(size, device=device) | |
) | |
def benchmark_matmul(size: Tuple[int, int], device: str, num_iterations: int = 100) -> float: | |
"""Benchmark matrix multiplication for given size and device. | |
Args: | |
size (Tuple[int, int]): Size of matrices to multiply | |
device (str): Device to run benchmark on | |
num_iterations (int): Number of iterations to average over | |
Returns: | |
float: Average time per operation in seconds | |
""" | |
# Create input tensors on the specified device | |
a, b = create_tensors(size, device) | |
# Perform warmup iterations to ensure GPU is at full speed | |
# and avoid including compilation/optimization time | |
for _ in range(10): | |
_ = torch.matmul(a, b) | |
# Ensure all operations are completed before timing | |
# Different devices require different synchronization methods | |
if device == 'cuda': | |
torch.cuda.synchronize() | |
elif device == 'mps': | |
torch.mps.synchronize() | |
# Time the actual benchmark iterations | |
start_time = time.perf_counter() | |
for _ in range(num_iterations): | |
_ = torch.matmul(a, b) | |
# Ensure all operations are completed before stopping timer | |
if device == 'cuda': | |
torch.cuda.synchronize() | |
elif device == 'mps': | |
torch.mps.synchronize() | |
end_time = time.perf_counter() | |
# Calculate average time per operation | |
return (end_time - start_time) / num_iterations | |
def run_benchmarks(): | |
"""Run benchmarks across all available devices. | |
This function: | |
1. Detects available devices | |
2. Tests multiple matrix sizes | |
3. Reports timing results in a formatted table | |
4. Handles errors gracefully | |
""" | |
# Get list of available compute devices | |
devices = get_available_devices() | |
# Define matrix sizes to test | |
# Larger sizes will show more pronounced differences between devices | |
sizes = [ | |
(1000, 1000), # 1K x 1K elements | |
(2000, 2000), # 2K x 2K elements | |
(4000, 4000) # 4K x 4K elements | |
] | |
# Print system information and available devices | |
print(f"PyTorch version: {torch.__version__}") | |
print(f"System: {platform.system()} {platform.machine()}\n") | |
print("Available devices:", devices) | |
print("\nMatrix Multiplication Benchmark Results (seconds per operation):\n") | |
# Create and print table header | |
header = "Size".ljust(15) | |
for device in devices: | |
header += f"{device.upper().ljust(15)}" | |
print(header) | |
print("-" * (15 + 15 * len(devices))) | |
# Run benchmarks for each matrix size | |
for size in sizes: | |
size_str = f"{size[0]}x{size[1]}".ljust(15) | |
# Test each available device | |
for device in devices: | |
try: | |
time_taken = benchmark_matmul(size, device) | |
size_str += f"{time_taken:.6f}".ljust(15) | |
except Exception as e: | |
# Handle any errors (out of memory, device not supported, etc.) | |
size_str += f"Error: {str(e)[:10]}".ljust(15) | |
print(size_str) | |
if __name__ == "__main__": | |
run_benchmarks() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment