Created
June 25, 2018 18:01
-
-
Save mrocklin/be0f3e674a76944a8c91e6a9610ea9e0 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
distributed: | |
version: 2 | |
scheduler: | |
bandwidth: 1000000000 # 100 MB/s estimated worker-worker bandwidth | |
worker: | |
memory: | |
target: 0.90 # target fraction to stay below | |
spill: False # fraction at which we spill to disk | |
pause: 0.80 # fraction at which we pause worker threads | |
terminate: 0.95 # fraction at which we terminate the worker | |
comm: | |
compression: null | |
jobqueue: | |
pbs: | |
name: dask-worker | |
threads: 9 | |
processes: 4 | |
memory: 36GB | |
interface: ib0 | |
local-directory: $TMPDIR | |
queue: regular | |
project: UCLB0022 | |
walltime: '00:30:00' | |
resource-spec: select=9:ncpus=4:mem=16G |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Launch a modest cluster" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import dask\n", | |
"dask.__version__" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"from dask_jobqueue import PBSCluster\n", | |
"cluster = PBSCluster()\n", | |
"cluster.scale(50)\n", | |
"cluster" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"from dask.distributed import Client\n", | |
"client = Client(cluster)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Create a Dask Dataframe" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import dask.dataframe as dd\n", | |
"\n", | |
"df = dd.demo.make_timeseries(start='2000-01-01',\n", | |
" end='2010-12-31',\n", | |
" dtypes={'x': float, 'y': float, 'id': int},\n", | |
" freq='10ms',\n", | |
" partition_freq='24h')\n", | |
"df" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"df = df.persist()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"%time df.groupby('id')[['x', 'y']].mean().compute()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"df.x.rolling('1min').std().loc['2000-01-02':'2010-12-30'].idxmax().compute()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import dask.array as da\n", | |
"u, s, v = da.linalg.svd(df.values + 1)\n", | |
"s" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"ss = s.persist()" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.5" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment