anarazel/plot2.py

## plot2.py
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import sys

def quantplot(df, percentile_limit=0.001, n=10, semilog = False):
    quantiles = np.linspace(0+percentile_limit, 1-percentile_limit, num=n*2+1)
    alpha = 1/n

    # XXX: Caller
    interval = '1s'
    df_rs = df.resample(interval, on='time_us')

    x_source = df_rs['elapsed_us']

    # quantile() generates a multi-index with an index "column" for each
    # computed quantile. Unstack moves those to result set columns.
    #
    # XXX: Column
    df_q = x_source.quantile(quantiles).unstack()

    # XXX, should perhaps be determined on the caller level?
    y = df_q.index.seconds.values

    # XXX, add optional smoothing?
    #df_q = df_q.rolling(3).max()

    fig, ax = plt.subplots()

    if semilog:
        ax.semilogy()

    # Plot "area" between 50% quantile and the "lower" quantiles. By
    # overlapping multiple transparent areas the more common quantiles become
    # darker. Separate from "higher" quantiles so a different color can be used.
    ranges = []
    for i in range(0, n):
        #print('below', quantiles[i], quantiles[n])
        ranges.append(ax.fill_between(y, df_q[quantiles[i]], df_q[quantiles[n]], alpha=alpha, color='g', edgecolor=None))

    # Same as above, but for "higher quantiles".
    for i in range(n+1, (n*2)):
        #print('above', quantiles[n], quantiles[i])
        ranges.append(ax.fill_between(y, df_q[quantiles[n]], df_q[quantiles[i]], alpha=alpha, color='g', edgecolor=None))

    # Plot median quantile as a line.
    ax.plot(y, df_q[quantiles[n]], color='g', label='median quantile')

    # Also add mean as a line
    ax.plot(y, x_source.mean(), color='b', label = 'mean')

    ax.set_xlabel('time in s')
    ax.set_ylabel('duration in us')
    fig.legend()

    return fig, ax, ranges

if len(sys.argv) < 2:
    print("pass file(s) as args", file=sys.stderr)
    sys.exit(1)

for fname in sys.argv[1:]:
    df = pd.read_csv(fname,
                     sep = ' ',
                     names = ['client', 'tx', 'elapsed_us', 'script_no', 'srctime_s', 'srctime_us'],
                     usecols = ['elapsed_us', 'srctime_s', 'srctime_us'],
                     engine = 'c')

    # combine time-in-seconds with the microseconds column
    usec_per_s = 1_000_000
    df['time_us'] = df['srctime_s'] * usec_per_s + df['srctime_us']

    # make time relative to start
    start = df.loc[0,'time_us']
    df['time_us'] = (df['time_us'] - start).astype('timedelta64[us]')

    fig, ax, ranges = quantplot(df)
    fig.set_figwidth(15)
    fig.set_figheight(4)
    fig.show()

plt.show(block=True)
	import matplotlib
	import matplotlib.pyplot as plt
	import numpy as np
	import pandas as pd
	import sys

	def quantplot(df, percentile_limit=0.001, n=10, semilog = False):
	quantiles = np.linspace(0+percentile_limit, 1-percentile_limit, num=n*2+1)
	alpha = 1/n

	# XXX: Caller
	interval = '1s'
	df_rs = df.resample(interval, on='time_us')

	x_source = df_rs['elapsed_us']

	# quantile() generates a multi-index with an index "column" for each
	# computed quantile. Unstack moves those to result set columns.
	#
	# XXX: Column
	df_q = x_source.quantile(quantiles).unstack()

	# XXX, should perhaps be determined on the caller level?
	y = df_q.index.seconds.values

	# XXX, add optional smoothing?
	#df_q = df_q.rolling(3).max()

	fig, ax = plt.subplots()

	if semilog:
	ax.semilogy()

	# Plot "area" between 50% quantile and the "lower" quantiles. By
	# overlapping multiple transparent areas the more common quantiles become
	# darker. Separate from "higher" quantiles so a different color can be used.
	ranges = []
	for i in range(0, n):
	#print('below', quantiles[i], quantiles[n])
	ranges.append(ax.fill_between(y, df_q[quantiles[i]], df_q[quantiles[n]], alpha=alpha, color='g', edgecolor=None))

	# Same as above, but for "higher quantiles".
	for i in range(n+1, (n*2)):
	#print('above', quantiles[n], quantiles[i])
	ranges.append(ax.fill_between(y, df_q[quantiles[n]], df_q[quantiles[i]], alpha=alpha, color='g', edgecolor=None))

	# Plot median quantile as a line.
	ax.plot(y, df_q[quantiles[n]], color='g', label='median quantile')

	# Also add mean as a line
	ax.plot(y, x_source.mean(), color='b', label = 'mean')

	ax.set_xlabel('time in s')
	ax.set_ylabel('duration in us')
	fig.legend()

	return fig, ax, ranges

	if len(sys.argv) < 2:
	print("pass file(s) as args", file=sys.stderr)
	sys.exit(1)

	for fname in sys.argv[1:]:
	df = pd.read_csv(fname,
	sep = ' ',
	names = ['client', 'tx', 'elapsed_us', 'script_no', 'srctime_s', 'srctime_us'],
	usecols = ['elapsed_us', 'srctime_s', 'srctime_us'],
	engine = 'c')

	# combine time-in-seconds with the microseconds column
	usec_per_s = 1_000_000
	df['time_us'] = df['srctime_s'] * usec_per_s + df['srctime_us']

	# make time relative to start
	start = df.loc[0,'time_us']
	df['time_us'] = (df['time_us'] - start).astype('timedelta64[us]')

	fig, ax, ranges = quantplot(df)
	fig.set_figwidth(15)
	fig.set_figheight(4)
	fig.show()

	plt.show(block=True)