Skip to content

Instantly share code, notes, and snippets.

@charmoniumQ
Last active July 20, 2018 21:32
Show Gist options
  • Save charmoniumQ/618d3cd18f8bdcfc6cdc910faf362849 to your computer and use it in GitHub Desktop.
Save charmoniumQ/618d3cd18f8bdcfc6cdc910faf362849 to your computer and use it in GitHub Desktop.
Malloc performance test
#!/usr/bin/env python3.6
import numpy as np
import re
data = {}
run_envs = ['linux', 'nautk']
series = ['mem', 'chunks', 'time']
files = {'linux': 'data/linux_run_mem2.out', 'nautk': 'data/nautk_run_mem2.out'}
data_pattern = re.compile(r'(\d*),(\d*),(\d*)')
data = {}
for run_env in run_envs:
data[run_env] = {}
data[run_env]['mem'] = []
data[run_env]['chunks'] = []
data[run_env]['time'] = []
with open(files[run_env]) as fil:
for line in fil:
m = data_pattern.match(line)
if m:
mem, chunks, time = map(int, m.groups())
data[run_env]['mem'].append(mem)
data[run_env]['chunks'].append(chunks)
data[run_env]['time'].append(time)
data[run_env]['xs'] = np.vstack((data[run_env]['chunks'],
data[run_env]['mem' ])).T
data[run_env]['time'] = np.array(data[run_env]['time'])
def aggregate(xs, ys, fn):
unique_x = sorted(list(frozenset(map(tuple, xs))))
result = []
for x in unique_x:
matching = (xs == x).all(axis=1)
result.append(fn(ys[matching]))
unique_x = np.array(unique_x)
return unique_x, np.array(result)
from scipy.interpolate import interp2d
xy_grid, linux_ts = aggregate(data['linux']['xs'], data['linux']['time'], np.mean)
linux_ts = np.log2(linux_ts)
xy_grid = np.log2(xy_grid)
chunks = xy_grid[:, 0]
mem = xy_grid[:, 1]
linux_f = interp2d(chunks, mem, linux_ts, kind='linear')
xy_grid_, nautk_ts = aggregate(data['nautk']['xs'], data['nautk']['time'], np.mean)
nautk_ts = np.log2(nautk_ts)
nautk_f = interp2d(chunks, mem, nautk_ts, kind='linear')
v_chunks = np.linspace(np.min(chunks), np.max(chunks), 100)
v_mem = np.linspace(np.min(mem ), np.max(mem ), 100)
tmax = max(linux_ts.max(), nautk_ts.max())
tmin = min(linux_ts.min(), nautk_ts.min())
from bokeh.models import ColumnDataSource, Legend, ColorBar, LinearColorMapper, Plot
from bokeh.plotting import figure, curdoc
from bokeh.layouts import widgetbox, column, row, gridplot
from bokeh.models.widgets import Slider, RadioButtonGroup, Div, Paragraph, PreText
from bokeh.document import Document
width = 350
height = 220
chunks_fig = figure(plot_width=width, plot_height=height, x_range=(v_chunks.min(), v_chunks.max()), y_range=(tmin, tmax))
chunks_linux_line = chunks_fig.line(x=[], y=[], color='red' , legend='linux')
chunks_nautk_line = chunks_fig.line(x=[], y=[], color='blue', legend='nautk')
chunks_line = chunks_fig.line(x=[], y=[], color='gray', alpha=0.5)
chunks_fig.xaxis.axis_label = 'log num_chunks'
chunks_fig.yaxis.axis_label = 'log time'
chunks_fig.legend.location = 'top_left'
mem_fig = figure(plot_width=width, plot_height=height, y_range=(v_mem.min(), v_mem.max()), x_range=(tmin, tmax))
mem_linux_line = mem_fig.line(x=[], y=[], color='red' )
mem_nautk_line = mem_fig.line(x=[], y=[], color='blue')
mem_line = mem_fig.line(x=[], y=[], color='gray', alpha=0.5)
mem_fig.yaxis.axis_label = 'log memory'
mem_fig.xaxis.axis_label = 'log time'
center_fig = figure(plot_width=width, plot_height=height, x_range=(v_chunks.min(), v_chunks.max()), y_range=(v_mem.min(), v_mem.max()))
color_mapper = LinearColorMapper(palette='Viridis256', low=tmin, high=tmax)
image = center_fig.image([],
x=v_chunks.min(),
y=v_mem.min(),
dw=v_chunks.max() - v_chunks.min(),
dh=v_mem.max() - v_mem.min(),
color_mapper=color_mapper,
)
center_fig.xaxis.axis_label = 'log num_chunks'
center_fig.yaxis.axis_label = 'log mem'
mem_line2 = center_fig.line(x=[], y=[], color='gray')
chunks_line2 = center_fig.line(x=[], y=[], color='gray')
center_fig.circle(
x=xy_grid[:, 0],
y=xy_grid[:, 1],
color='red',
size=1
)
color_bar = ColorBar(color_mapper=color_mapper)
color_bar.orientation = 'horizontal'
color_bar.width = width
cb_height = 80
color_bar.width = int(0.75 * width)
color_bar.height = 20
color_bar.title = 'log time'
import bokeh.palettes
def change_image(_, __, button_i):
if buttons[button_i] == 'linux':
data = linux_f(v_chunks, v_mem)
color_mapper.palette = bokeh.palettes.Viridis[256]
color_mapper.low = tmin
color_mapper.high = tmax
image.data_source.data = {
'image': [data],
}
elif buttons[button_i] == 'nautk':
data = nautk_f(v_chunks, v_mem)
color_mapper.palette = bokeh.palettes.Viridis[256]
color_mapper.low = tmin
color_mapper.high = tmax
image.data_source.data = {
'image': [data],
}
elif buttons[button_i] == 'linux / nautk':
data = linux_f(v_chunks, v_mem) - nautk_f(v_chunks, v_mem)
color_mapper.palette = bokeh.palettes.RdBu[11]
color_mapper.low = min( data.min(), -data.max())
color_mapper.high = max(-data.min(), data.max())
image.data_source.data = {
'image': [data],
}
def change_mem(_, __, c_mem):
if c_mem:
chunks_linux_line.data_source.data = {
'x': v_chunks,
'y': linux_f(v_chunks, c_mem),
}
chunks_nautk_line.data_source.data = {
'x': v_chunks,
'y': nautk_f(v_chunks, c_mem),
}
mem_line.data_source.data = {
'x': [0, tmax],
'y': [c_mem, c_mem],
}
mem_line2.data_source.data = {
'x': [v_chunks.min(), v_chunks.max()],
'y': [c_mem, c_mem],
}
def change_chunks(_, __, c_chunks):
if c_chunks:
mem_linux_line.data_source.data = {
'x': linux_f(c_chunks, v_mem),
'y': v_mem,
}
mem_nautk_line.data_source.data = {
'x': nautk_f(c_chunks, v_mem),
'y': v_mem,
}
chunks_line.data_source.data = {
'x': [c_chunks, c_chunks],
'y': [0, tmax],
}
chunks_line2.data_source.data = {
'x': [c_chunks, c_chunks],
'y': [v_mem.min(), v_mem.max()],
}
chunks_slider = Slider(start=v_chunks.min(), end=v_chunks.max(), value=0, step=1)
chunks_slider.on_change('value', change_chunks)
chunks_slider.value = v_chunks.mean()
top_cell = column(chunks_fig, widgetbox(chunks_slider))
mem_slider = Slider(start=v_mem.min(), end=v_mem.max(), value=0, step=1)
mem_slider.on_change('value', change_mem)
mem_slider.value = v_mem.mean()
mem_slider.orientation = 'vertical'
mem_slider.direction = 'rtl'
mem_slider.height = int(height * 0.6)
right_cell = row(widgetbox(mem_slider, width=90, height=mem_slider.height), mem_fig)
buttons = ["linux", "nautk", "linux / nautk"]
center_buttons = RadioButtonGroup(
labels=buttons,
)
center_buttons.on_change('active', change_image)
center_buttons.active = 0
control_fig = figure(plot_width=width, height=cb_height)
# color_bar.margin = 0
# color_bar.padding = 0
# control_fig.min_border = 0
color_bar.border_line_alpha = 0
color_bar.location = (0,0)
control_fig.add_layout(color_bar)
control_cell = column(control_fig, center_buttons)
description = Div(text="""
<h2>How to read this graph</h2>
<p>This shows the <i>time</i> (in CPU cycles) to allocate <i>num_chunks</i> equal chunks whose combined size is <i>memory</i> (in bytes). All axes are in log2 space, so a value of 23 means 8Mi.</p>
<p><b>The center</b> shows both parameter sampled simultaneously on a 2d grid. The red dots is where the actual samples are. Use the radio-buttons to select which value is plotted on that chart. <i>linux / nautk</i> shows the difference in log-space, i.e. <code>log2(linux_time) - log2(nautk_time)</code>.</p>
<p><b>The side-plots</b> show a 1d slice of the 2d plot, where one parameter is fixed and the other moves. Move the sliders with your mouse or the left/right cursor keys to changed the fixed-value. The right side-plot is 'sideways' with <i>memory</i> on the vertical axis and <i>time</i> on the horizontal one.</p>
<h2>The experiment</h2>
<ul>
<li>I use <code>rdtsc</code> to count cycles.</li>
<li>I use <code>-O3</code> and <code>GCC 6.3.1</code> to compile both versions.</li>
<li>I average over 10 repetitions.</li>
</ul>
<pre><code>
uint64_t start = 0; rdtsc(start);
for(size_t i = 0; i < num_chunks; ++i) {
try_nnull(allocs[i] = malloc(chunk_size));
}
for(size_t i = 0; i < num_chunks; ++i) {
free(allocs[i]);
}
uint64_t stop = 0; rdtsc(stop);
</code></pre>
<h2><a href="https://gist.github.com/charmoniumQ/618d3cd18f8bdcfc6cdc910faf362849">full code</a></h2>
""")
#layout = gridplot([description], [top_cell, None], [center_fig, right_cell], [control_cell, None], sizing_mode='scale_width', plot_width=int(width * 2.2))
layout = gridplot([top_cell, None], [center_fig, right_cell], [control_cell, None])
curdoc().add_root(gridplot([layout], [description], sizing_mode='scale_width'))
from pathlib import Path
title = Path(__file__).name[:-3].replace('_',' ').capitalize()
curdoc().title = title
#ifndef NDEBUG
#warning Compiling with diagnostics
#define try(call) \
int ret = (call); \
if(!ret) { \
printf("%s:%d: %s returned %d\n", \
__FILE__, __LINE__, #call, ret); \
return -1; \
}
#define try_nnull(call) try((call) != NULL)
#define try_zero(call) try(!(call))
#else
#define try(call) (call)
#define try_nnull(call) (call)
#define try_zero(call) (call)
#endif
#ifndef MAIN_H
#define MAIN_H
#ifdef __NAUTILUS__
void app_main();
#endif
#endif
#define rdtsc(val) \
{ \
uint32_t hi, lo; \
__asm volatile("rdtsc" : "=a" (lo), "=d" (hi)); \
*(uint64_t*) &val = (((uint64_t) hi) << 32) | lo; \
};
#ifdef __NAUTILUS__
#include <nautilus/libccompat.h>
#else
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#endif
#include "app/main.h"
#include "app/error.h"
#include "app/rdtsc.h"
int malloc_time_test(size_t chunk_size, size_t num_chunks, uint64_t* result) {
void** allocs = malloc(num_chunks * sizeof(void*));
uint64_t start = 0; rdtsc(start);
for(size_t i = 0; i < num_chunks; ++i) {
try_nnull(allocs[i] = malloc(chunk_size));
}
for(size_t i = 0; i < num_chunks; ++i) {
free(allocs[i]);
}
uint64_t stop = 0; rdtsc(stop);
free(allocs);
*result = (uint64_t) (stop - start);
return 0;
}
int mem_test_main() {
const size_t kilo = 1024 * 1 ;
const size_t mega = 1024 * kilo;
const size_t giga = 1024 * mega;
const size_t mem_step = 2;
const size_t mem_min = 1 * mega;
const size_t mem_max = 8 * giga;
const size_t num_chunks_step = 2;
const size_t num_chunks_min = 1;
const size_t num_chunks_max = 128 * mega;
const size_t num_reps = 10;
printf("total mem (bytes),chunks,time (cycles)\n");
for(size_t rep = 0; rep < num_reps; ++rep) {
for(size_t mem = mem_min; mem <= mem_max; mem *= mem_step) {
for(size_t num_chunks = num_chunks_min;
num_chunks <= num_chunks_max && num_chunks <= mem / 4;
num_chunks *= num_chunks_step) {
size_t chunk_size = mem / num_chunks;
uint64_t time = 0;
try_zero(malloc_time_test(chunk_size, num_chunks, &time));
printf("%ld,%lu,%lu\n", chunk_size * num_chunks, num_chunks, time);
fflush(stdout);
}
}
}
return 0;
}
#ifdef __NAUTILUS__
void app_main() {
mem_test_main();
}
#else
int main() {
try_zero(mem_test_main());
return 0;
}
#endif
#CC = clang
CFLAGS += -Wall -Wextra -I./include/ -std=gnu99
CFLAGS_DBG = -Og -g
CFLAGS_OPT = -O3 -DNDEBUG
main: main.out
cp main.out main
main.out: main.c
$(CC) $(CFLAGS) $(CFLAGS_OPT) -o $@ $<
main.out_dbg: main.c
$(CC) $(CFLAGS) $(CFLAGS_DBG) -o $@ $<
run: main.out
./main.out
.PHONY: out
debug: main.out_dbg
gdb -quiet ./main.out_dbg -ex r
.PHONY: debug
memtest: main.out_dbg
valgrind ./main.out_dbg
.PHONY: memtest
clean:
$(RM) main.out main.out_dbg
CFLAGS += -Wall -Wextra -O3 -DNDEBUG
obj-y += main.o
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment