Skip to content

Instantly share code, notes, and snippets.

@mjhea0
Created August 28, 2018 13:35
Show Gist options
  • Star 3 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mjhea0/64a19b32ffe061ae637eaf598e3b9d2a to your computer and use it in GitHub Desktop.
Save mjhea0/64a19b32ffe061ae637eaf598e3b9d2a to your computer and use it in GitHub Desktop.
dask example
import time
import pandas as pd
import dask.dataframe as ddf
def benchmark(function, repetitions, name):
start = time.time()
function(repetitions)
end = time.time()
print('{0} seconds for {1}'.format((end - start), name))
def test_pandas(repetitions):
df = pd.read_csv('random_people.csv')
df['bonus'] = df['salary'] * .5
df2 = pd.concat([df for _ in range(repetitions)])
return df2.salary.mean()
def test_pandas_dask(repetitions):
df = pd.read_csv('random_people.csv')
df2 = pd.concat([df for _ in range(repetitions)])
dd = ddf.from_pandas(df2, npartitions=3) # why only 3 cores?
dd['bonus'] = dd['salary'] * .5
return dd.salary.mean().compute()
# 1,000
benchmark(test_pandas, 1000, f'{test_pandas.__name__} 1k')
benchmark(test_pandas_dask, 1000, f'{test_pandas_dask.__name__} 1k')
print('\n')
# 10,000
benchmark(test_pandas, 10000, f'{test_pandas.__name__} 10k')
benchmark(test_pandas_dask, 10000, f'{test_pandas_dask.__name__} 10k')
print('\n')
# 100,000
benchmark(test_pandas, 100000, f'{test_pandas.__name__} 100k')
benchmark(test_pandas_dask, 100000, f'{test_pandas_dask.__name__} 100k')
name surname salary
0 Henry Joneson 5000
1 Albert Goodman 10000
2 William Goodman 10000
3 John Joneson 10000
4 Albert Black 10000
5 Henry Joneson 12000
6 Richard Green 5500
7 Henry Joneson 11000
8 Henry Goodman 12000
9 Albert Joneson 11000
10 William Joneson 10000
11 John White 10000
12 Henry Black 11000
13 Albert Goodman 10000
14 Richard Green 5500
15 Henry Black 13500
16 Richard White 11000
17 Albert Black 5500
18 Henry Green 10000
19 Albert Joneson 11000
20 William Goodman 12000
21 William Goodman 5000
22 John Green 9500
23 John Black 13500
24 Richard Green 13500
25 Henry Joneson 12000
26 Henry Goodman 10000
27 John Joneson 9500
28 Henry Goodman 11000
29 William Green 12000
30 Henry Goodman 10000
31 Richard Black 10000
32 Richard Joneson 5500
33 Richard Joneson 5000
34 Henry Black 9500
35 John White 13500
36 Henry Green 11000
37 John Black 5500
38 William Green 12000
39 Albert Green 10000
40 Richard Joneson 9500
41 William Joneson 12000
42 John Joneson 10000
43 William Black 10000
44 Albert Black 12000
45 John Goodman 13500
46 John Joneson 10000
47 John Joneson 9500
48 Richard Black 9500
49 Albert White 10000
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment