Skip to content

Instantly share code, notes, and snippets.

@bbengfort
Last active May 2, 2017 20:07
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save bbengfort/bed86721ecb20fd96269606c05741851 to your computer and use it in GitHub Desktop.
Save bbengfort/bed86721ecb20fd96269606c05741851 to your computer and use it in GitHub Desktop.
benchmarks for python unique functions
#!/usr/bin/env python3
import csv
import time
import string
import random
import numpy as np
from sklearn.preprocessing import LabelEncoder
def py_unique(data):
return list(set(data))
def np_unique(data):
return np.unique(data)
def sk_unique(data):
encoder = LabelEncoder()
encoder.fit(data)
return encoder.classes_
def make_data(uniques=10, length=10000):
chars = string.ascii_letters
if uniques > len(chars):
raise ValueError("too many uniques for the choices")
return [
random.choice(chars[:uniques])
for idx in range(length)
]
def timeit(func):
start = time.time()
func()
return ((time.time() - start) * 1000000.0)
def benchmark(func, data, n=10000):
delta = sum([
timeit(lambda: func(data))
for _ in range(n)
])
return (float(delta) / float(n))
if __name__ == '__main__':
with open('results.csv', 'w') as f:
writer = csv.writer(f)
writer.writerow(['method', 'dtype', 'uniques', 'length', 'mean μs per operation'])
for n in range(1, 7):
n = 10 ** n
for u in (1, 5, 10, 15, 20, 25, 30, 35, 40):
data = make_data(u,n)
mt = benchmark(py_unique, data)
writer.writerow(['py_unique', 'list', u, n, mt])
mt = benchmark(np_unique, data)
writer.writerow(['np_unique', 'list', u, n, mt])
mt = benchmark(sk_unique, data)
writer.writerow(['sk_unique', 'list', u, n, mt])
data = np.array(data)
mt = benchmark(py_unique, data)
writer.writerow(['py_unique', 'array', u, n, mt])
mt = benchmark(np_unique, data)
writer.writerow(['np_unique', 'array', u, n, mt])
mt = benchmark(sk_unique, data)
writer.writerow(['sk_unique', 'array', u, n, mt])
Display the source blob
Display the rendered blob
Raw
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment