Skip to content

Instantly share code, notes, and snippets.

@jjerphan
Last active September 18, 2022 08:56
Show Gist options
  • Save jjerphan/17d38a21a85931b448886087b11d2d19 to your computer and use it in GitHub Desktop.
Save jjerphan/17d38a21a85931b448886087b11d2d19 to your computer and use it in GitHub Desktop.
Experimentations with and without mimalloc -- scikit-learn#22320

mimalloc can be installed from conda-forge:

export ENV_NAME=your_env_name
conda install -n $ENV_NAME -c conda-forge mimalloc
conda activate $ENV_NAME

Then the experimentations above can be launched with mimalloc using LD_PRELOAD:

LD_PRELOAD=$CONDA_PREFIX/lib/libmimalloc.so python plots.py | tee scalability_mimalloc.txt
import numpy as np
import pandas as pd
import seaborn as sns
import threadpoolctl
import subprocess
from io import StringIO
from time import perf_counter
from matplotlib import pyplot as plt
from sklearn.neighbors import NearestNeighbors
from joblib import Memory
memory = Memory(".tmp", verbose=0)
commit = (
subprocess
.check_output(['git', 'rev-parse', '--short', 'HEAD'])
.decode('ascii')
.strip()
)
@memory.cache
def execute_bench(
n_train=100,
n_test=100,
n_features_list=None,
n_threads_list=None,
):
rng = np.random.RandomState(0)
n_features_list = n_features_list or [50, 100, 500]
n_threads_list = n_threads_list or [1, 2, 4, 8, 16, 32, 64, 128]
lists = []
controler = threadpoolctl.ThreadpoolController()
for n_features in n_features_list:
X_train = rng.rand(n_train, n_features)
X_test = rng.rand(n_test, n_features)
for n_threads in n_threads_list:
with controler.limit(limits=n_threads, user_api=None):
nn = NearestNeighbors(radius=np.log(n_features), algorithm='brute', n_jobs=n_threads)
nn.fit(X_train)
start = perf_counter()
nn.radius_neighbors(X_test, return_distance=True)
end = perf_counter()
lists.append([n_threads, n_train, n_test, n_features, end - start, 0])
columns = [
"n_threads",
"n_train",
"n_test",
"n_features",
"mean_runtime",
"stderr_runtime",
]
return pd.DataFrame(lists, columns=columns)
def plot_results(df, save=False, n_train=None, n_test=None):
fig = plt.figure(figsize=(30, 15))
ax = plt.gca()
df_p = df.query("n_features == 50")
ax.loglog(
df_p["n_threads"],
df_p["n_threads"],
linestyle="--",
color="black",
label="linear",
alpha=.5,
)
speed_up = float(df_p.query("n_threads == 1")["mean_runtime"]) / df_p["mean_runtime"]
ax.loglog(
df_p["n_threads"],
speed_up,
color="yellow",
label="50 features",
alpha=.5,
)
df_p = df.query("n_features == 100")
speed_up = float(df_p.query("n_threads == 1")["mean_runtime"]) / df_p["mean_runtime"]
ax.loglog(
df_p["n_threads"],
speed_up,
color="blue",
label="100 features",
alpha=.5,
)
df_p = df.query("n_features == 500")
speed_up = float(df_p.query("n_threads == 1")["mean_runtime"]) / df_p["mean_runtime"]
ax.loglog(
df_p["n_threads"],
speed_up,
color="red",
label="500 features",
alpha=.5,
)
ax.set(
xlabel="Number of threads",
ylabel="Speed-up",
xticks=df["n_threads"],
xticklabels=df["n_threads"],
yticks=df["n_threads"],
yticklabels=[f"×{i}" for i in df["n_threads"]],
)
plt.legend()
title= (
f"Scalability of sklearn.NearestNeighbors.radius_neighbors "
f" - (method, n_train, n_test) = ('brute', {n_train}, {n_test})"
f" - commit: {commit}"
)
plt.title(title)
if save:
plt.savefig(f'speed_up_{n_train}_{n_test}_log.png')
else:
plt.show()
if __name__ == "__main__":
columns = [
"n_threads",
"n_train",
"n_test",
"n_features",
"mean_runtime",
"stderr_runtime",
]
n_train = 100_000
n_test = 100_000
df = execute_bench(n_train, n_test)
print(df)
plot_results(df, save=True, n_train=n_train, n_test=n_test)
@jjerphan
Copy link
Author

jjerphan commented Feb 22, 2022

With mimalloc:

speed_up_100000_100000_log_with_mimalloc

Raw results
    n_threads  n_train  n_test  n_features  mean_runtime  stderr_runtime
0           1    10000   10000          50      1.843863               0
1           2    10000   10000          50      0.878143               0
2           4    10000   10000          50      0.477720               0
3           8    10000   10000          50      0.248385               0
4          16    10000   10000          50      0.202744               0
5          32    10000   10000          50      0.155233               0
6          64    10000   10000          50      0.123474               0
7         128    10000   10000          50      0.243421               0
8           1    10000   10000         100      2.043319               0
9           2    10000   10000         100      1.030469               0
10          4    10000   10000         100      0.528292               0
11          8    10000   10000         100      0.308843               0
12         16    10000   10000         100      0.219315               0
13         32    10000   10000         100      0.165821               0
14         64    10000   10000         100      0.144672               0
15        128    10000   10000         100      0.268027               0
16          1    10000   10000         500      3.942212               0
17          2    10000   10000         500      2.009313               0
18          4    10000   10000         500      1.010291               0
19          8    10000   10000         500      0.537854               0
20         16    10000   10000         500      0.387427               0
21         32    10000   10000         500      0.314703               0
22         64    10000   10000         500      0.254417               0
23        128    10000   10000         500      0.378934               0

Without mimalloc:

speed_up_100000_100000_log_without_mimalloc

Raw results
    n_threads  n_train  n_test  n_features  mean_runtime  stderr_runtime
0           1    10000   10000          50      1.858132               0
1           2    10000   10000          50      0.943718               0
2           4    10000   10000          50      0.595347               0
3           8    10000   10000          50      0.350731               0
4          16    10000   10000          50      0.522424               0
5          32    10000   10000          50      0.444270               0
6          64    10000   10000          50      0.559822               0
7         128    10000   10000          50      0.396564               0
8           1    10000   10000         100      2.063410               0
9           2    10000   10000         100      1.191579               0
10          4    10000   10000         100      0.623694               0
11          8    10000   10000         100      0.359428               0
12         16    10000   10000         100      0.353618               0
13         32    10000   10000         100      0.283564               0
14         64    10000   10000         100      0.616601               0
15        128    10000   10000         100      0.439544               0
16          1    10000   10000         500      4.059608               0
17          2    10000   10000         500      2.084081               0
18          4    10000   10000         500      1.088404               0
19          8    10000   10000         500      0.607854               0
20         16    10000   10000         500      0.585447               0
21         32    10000   10000         500      0.539778               0
22         64    10000   10000         500      0.728474               0
23        128    10000   10000         500      0.497490               0

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment