Last active
May 5, 2019 12:02
-
-
Save Veedrac/d25148faf20669589993 to your computer and use it in GitHub Desktop.
Speed test. Warning: Please read first comment
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <iostream> | |
#include <iterator> | |
#include <numeric> | |
#include <unordered_set> | |
#include <vector> | |
int32_t some_calculations(int32_t number) { | |
std::vector<int32_t> a; | |
std::unordered_set<int32_t> s; | |
// This is the fraction that PyPy uses: | |
// http://stackoverflow.com/q/25968487/1763356 | |
// | |
// You can go faster by making this even smaller, | |
// but I'm already letting C++ use 32 bit integers | |
// and calling reserve on the vector. | |
s.max_load_factor(2./3); | |
a.reserve(number); | |
int32_t x = 0; | |
for (int32_t i=0; i<number; ++i) { | |
x += i; | |
int item = i%2 ? -x : x; | |
s.insert(item); | |
a.emplace_back(item); | |
} | |
int32_t tot = 0; | |
for (auto x=std::begin(a); x != std::end(a); ++x) { | |
for (auto y=std::next(x); y != std::end(a); ++y) { | |
if (-(*x+*y) != *x && -(*x+*y) != *y && s.find(-(*x+*y)) != std::end(s)) { | |
++tot; | |
} | |
} | |
} | |
return tot / 3; | |
} | |
int main(int, char **) { | |
int32_t tot = 0; | |
for (int i=0; i<500; ++i) { | |
tot += some_calculations(i); | |
} | |
std::cout << tot << std::endl; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def some_calculations(number): | |
a = [] | |
x = 0 | |
for i in range(number): | |
x += i; | |
a.append(-x if i%2 else x); | |
s = set(a) | |
tot = 0 | |
for i, x in enumerate(a): | |
for y in a[i+1:]: | |
if -(x + y) not in (x, y) and -(x + y) in s: | |
tot += 1 | |
return tot // 3 | |
print(sum(map(some_calculations, range(500)))) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Graph the time to run the input commands. | |
Usage: | |
plot_times.py <n> <command>... | |
""" | |
import docopt | |
import numpy | |
import resource | |
import seaborn | |
import shlex | |
import subprocess | |
import sys | |
from matplotlib import pyplot | |
options = docopt.docopt(__doc__) | |
try: | |
repeats = int(options["<n>"]) | |
except ValueError: | |
print("<n> has to be an integer.") | |
raise SystemExit | |
datas = [] | |
# Time | |
for raw_command in options["<command>"]: | |
command = shlex.split(raw_command) | |
data = [resource.getrusage(resource.RUSAGE_CHILDREN).ru_utime] | |
for i in range(repeats): | |
print("\r{}: {} of {}".format(raw_command, i+1, repeats), end="") | |
sys.stdout.flush() | |
subprocess.check_output(command) | |
data.append(resource.getrusage(resource.RUSAGE_CHILDREN).ru_utime) | |
print() | |
datas.append(data) | |
# Plot | |
figure = pyplot.figure(figsize=(24, 6)) | |
seaborn.set(style="white", font_scale=2) | |
for command, data in zip(options["<command>"], datas): | |
times = numpy.diff(data) | |
seaborn.distplot( | |
times, | |
label=command, | |
bins=len(set(times.round(2))), | |
norm_hist=True, | |
kde_kws={"clip": (min(times), max(times))}, | |
hist_kws={"histtype": "stepfilled", "alpha": 0.2} | |
) | |
seaborn.despine() | |
pyplot.title("Time to run") | |
pyplot.legend() | |
figure.savefig("time_to_run.png") |
I suspect this is due to python storing smaller numbers as singletons. Thoughts?
@jkholodnov That wouldn't be in: a) PyPy doesn't have that optimization, b) the reason CPython has that optimization is that int
objects are Python objects which are allocated and stored on the heap, for C++, int
and friends are just words on the stack, no need for heap allocation, and therefore no need to optimize away allocations.
I know that this gist is old, but why didn't yo use the optimized C++ compilation?
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Edit:
Like all benchmarks, this one is flawed. PyPy does resize at a load factor of 2/3, yes, but it resizes by a factor of 4, not a factor of 2 as C++ does. Accounting for this,
makes C++ come a few percentage points faster than PyPy. Saddeningly, one can no longer claim PyPy to be faster, although getting C++ to a competitive point took a lot more effort.
I wouldn't just dismiss parity with heavily-optimized C++, though!
Versions, on reminder from wyldphyre:
perf stat
output, on request from Imxset21:msiemens wanted more data than just a single time, so I plotted it: