mehdidc/plot.py

## plot.py
import matplotlib as mpl
mpl.use('Agg')
import argparse
import pandas as pd
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

def plot_scaling_and_efficiency(df):
    """
    Render scaling and efficiency plots

    - Scaling plot is the relationship between the number of GPUs used
      and the speedup in the throughput
    - Efficiency is the ratio of throughput to ideal reference throughput
    """
    df = df.copy()
    # Number of GPUs per run
    df["GPUs"] = df["NODES"] * df["GPUS_PER_NODE"]
    df = df.sort_values(by="GPUs")

    # reference number of GPUs is the run with minimal number of GPUs
    refrence_nb_gpus = df.iloc[0]['GPUs']
    # reference throughput is the throughput of the configuration with the reference number of GPUs
    reference_throughput = df.iloc[0]["TOTAL_IMAGES_PER_SEC"]
    df["Speedup"] = df["TOTAL_IMAGES_PER_SEC"] / reference_throughput
    # efficiency is the  $E(K) = 100 \times \frac{T(K)}{K \times T(1)}$, where:
    # - $K$ is the number of GPUs
    # - $T(K)$ is the throughput for $K$ GPUs
    # - $T(1)$ the reference throughput (for $K=1$)
    df['efficiency'] =  df["TOTAL_IMAGES_PER_SEC"] / ( (df["GPUs"]/refrence_nb_gpus) * reference_throughput)

    # plot
    fig, ax1 = plt.subplots()
    ax1.plot(df.GPUs, df.Speedup, 'go--', label='speedup', color='blue')
    ax1.set_xlabel('GPUs')
    ax1.set_ylabel('Speedup',color='blue')
    ax1.set_xticks(df.GPUs)
    ax1.plot(df.GPUs, df.GPUs / refrence_nb_gpus, 'go-', label='ideal speedup', color='orange')
    ax1.tick_params(axis='y', labelcolor='blue')
    scale = df.Speedup.max() * 1.5
    top = 0
    text_spacing = 0.1
    y = df['efficiency'] * 100#* scale + top
    ax2 = ax1.twinx()
    ax2.set_ylabel('Efficiency (%)', color='red')
    ax2.plot(df.GPUs, y, c='red', marker="o", label='efficiency')
    ax2.tick_params(axis='y', labelcolor='red')
    for nb_gpus, effval, yval in zip(df.GPUs, df['efficiency'], y):
        ax2.text(nb_gpus, yval+text_spacing, f"{effval*100:.2f}%", size=11, c='red')

def plot_images_per_sec(df):
    """
    Render raw images per sec plot
    """
    df = df.copy()
    df["GPUs"] = df["NODES"] * df["GPUS_PER_NODE"]
    df = df.sort_values(by="GPUs")
    df.plot(x="GPUs",  y="TOTAL_IMAGES_PER_SEC", color='pink', edgecolor='red', kind='bar')

def plot_images_per_sec_per_batch_size(df):
    df = df.copy()
    df["GPUs"] = df["NODES"] * df["GPUS_PER_NODE"]
    df = df.sort_values(by="GPUs")
    df.plot(x="LOCAL_BATCH_SIZE",  y="TOTAL_IMAGES_PER_SEC", color='pink', edgecolor='red', kind='bar')
    ticks = [f"({int(row['LOCAL_BATCH_SIZE'])})x({int(row['NODES'])})x({int(row['GPUS_PER_NODE'])})" for _, row in df.iterrows()]
    plt.xticks(range(len(df)), ticks)
    plt.xlabel("B_ref x Nodes x GPUs per node")


if __name__ == "__main__":

    parser = argparse.ArgumentParser('plot', add_help=False)
    parser.add_argument('--file', default='file.csv', type=str)
    parser.add_argument('--plot_type', default='scaling_and_efficiency', type=str, choices=['scaling_and_efficiency', 'images_per_sec', 'images_per_sec_per_batch_size'])
    parser.add_argument('--local_batch_size', default=None, type=int)
    parser.add_argument('--target', default="", type=str)

    args = parser.parse_args()
    target = args.target
    df = pd.read_csv(args.file)
    if args.plot_type == 'scaling_and_efficiency':
        if args.local_batch_size:
            df = df[df.LOCAL_BATCH_SIZE == args.local_batch_size]
            extra = f"bs{args.local_batch_size}"
        else:
            extra = ""
        plot_scaling_and_efficiency(df)
        plt.savefig(target if target else args.file + f"_scaling_and_efficiency{extra}.pdf")
    elif args.plot_type == "images_per_sec":
        plot_images_per_sec(df)
        plt.savefig(target if target else args.file + "_images_per_sec.pdf")
    elif args.plot_type == "images_per_sec_per_batch_size":
        plot_images_per_sec_per_batch_size(df)
        plt.savefig(target if target else args.file + "_images_per_sec_per_batch_size.pdf")
	import matplotlib as mpl
	mpl.use('Agg')
	import argparse
	import pandas as pd
	import matplotlib.pyplot as plt
	import pandas as pd
	import numpy as np

	def plot_scaling_and_efficiency(df):
	"""
	Render scaling and efficiency plots

	- Scaling plot is the relationship between the number of GPUs used
	and the speedup in the throughput
	- Efficiency is the ratio of throughput to ideal reference throughput
	"""
	df = df.copy()
	# Number of GPUs per run
	df["GPUs"] = df["NODES"] * df["GPUS_PER_NODE"]
	df = df.sort_values(by="GPUs")

	# reference number of GPUs is the run with minimal number of GPUs
	refrence_nb_gpus = df.iloc[0]['GPUs']
	# reference throughput is the throughput of the configuration with the reference number of GPUs
	reference_throughput = df.iloc[0]["TOTAL_IMAGES_PER_SEC"]
	df["Speedup"] = df["TOTAL_IMAGES_PER_SEC"] / reference_throughput
	# efficiency is the $E(K) = 100 \times \frac{T(K)}{K \times T(1)}$, where:
	# - $K$ is the number of GPUs
	# - $T(K)$ is the throughput for $K$ GPUs
	# - $T(1)$ the reference throughput (for $K=1$)
	df['efficiency'] = df["TOTAL_IMAGES_PER_SEC"] / ( (df["GPUs"]/refrence_nb_gpus) * reference_throughput)

	# plot
	fig, ax1 = plt.subplots()
	ax1.plot(df.GPUs, df.Speedup, 'go--', label='speedup', color='blue')
	ax1.set_xlabel('GPUs')
	ax1.set_ylabel('Speedup',color='blue')
	ax1.set_xticks(df.GPUs)
	ax1.plot(df.GPUs, df.GPUs / refrence_nb_gpus, 'go-', label='ideal speedup', color='orange')
	ax1.tick_params(axis='y', labelcolor='blue')
	scale = df.Speedup.max() * 1.5
	top = 0
	text_spacing = 0.1
	y = df['efficiency'] * 100#* scale + top
	ax2 = ax1.twinx()
	ax2.set_ylabel('Efficiency (%)', color='red')
	ax2.plot(df.GPUs, y, c='red', marker="o", label='efficiency')
	ax2.tick_params(axis='y', labelcolor='red')
	for nb_gpus, effval, yval in zip(df.GPUs, df['efficiency'], y):
	ax2.text(nb_gpus, yval+text_spacing, f"{effval*100:.2f}%", size=11, c='red')

	def plot_images_per_sec(df):
	"""
	Render raw images per sec plot
	"""
	df = df.copy()
	df["GPUs"] = df["NODES"] * df["GPUS_PER_NODE"]
	df = df.sort_values(by="GPUs")
	df.plot(x="GPUs", y="TOTAL_IMAGES_PER_SEC", color='pink', edgecolor='red', kind='bar')

	def plot_images_per_sec_per_batch_size(df):
	df = df.copy()
	df["GPUs"] = df["NODES"] * df["GPUS_PER_NODE"]
	df = df.sort_values(by="GPUs")
	df.plot(x="LOCAL_BATCH_SIZE", y="TOTAL_IMAGES_PER_SEC", color='pink', edgecolor='red', kind='bar')
	ticks = [f"({int(row['LOCAL_BATCH_SIZE'])})x({int(row['NODES'])})x({int(row['GPUS_PER_NODE'])})" for _, row in df.iterrows()]
	plt.xticks(range(len(df)), ticks)
	plt.xlabel("B_ref x Nodes x GPUs per node")


	if __name__ == "__main__":

	parser = argparse.ArgumentParser('plot', add_help=False)
	parser.add_argument('--file', default='file.csv', type=str)
	parser.add_argument('--plot_type', default='scaling_and_efficiency', type=str, choices=['scaling_and_efficiency', 'images_per_sec', 'images_per_sec_per_batch_size'])
	parser.add_argument('--local_batch_size', default=None, type=int)
	parser.add_argument('--target', default="", type=str)

	args = parser.parse_args()
	target = args.target
	df = pd.read_csv(args.file)
	if args.plot_type == 'scaling_and_efficiency':
	if args.local_batch_size:
	df = df[df.LOCAL_BATCH_SIZE == args.local_batch_size]
	extra = f"bs{args.local_batch_size}"
	else:
	extra = ""
	plot_scaling_and_efficiency(df)
	plt.savefig(target if target else args.file + f"_scaling_and_efficiency{extra}.pdf")
	elif args.plot_type == "images_per_sec":
	plot_images_per_sec(df)
	plt.savefig(target if target else args.file + "_images_per_sec.pdf")
	elif args.plot_type == "images_per_sec_per_batch_size":
	plot_images_per_sec_per_batch_size(df)
	plt.savefig(target if target else args.file + "_images_per_sec_per_batch_size.pdf")