-
-
Save eddienko/54a7d7f105bd09bd48673b1fb1ff74ef to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
distributed: | |
version: 2 | |
scheduler: | |
bandwidth: 1000000000 # 100 MB/s estimated worker-worker bandwidth | |
worker: | |
memory: | |
target: 0.90 # target fraction to stay below | |
spill: False # fraction at which we spill to disk | |
pause: 0.80 # fraction at which we pause worker threads | |
terminate: 0.95 # fraction at which we terminate the worker | |
comm: | |
compression: null | |
jobqueue: | |
pbs: | |
name: dask-worker | |
threads: 9 | |
processes: 4 | |
memory: 36GB | |
interface: ib0 | |
local-directory: $TMPDIR | |
queue: regular | |
project: UCLB0022 | |
walltime: '00:30:00' | |
resource-spec: select=9:ncpus=4:mem=16G |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Launch a modest cluster" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import dask\n", | |
"dask.__version__" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"from dask_jobqueue import PBSCluster\n", | |
"cluster = PBSCluster()\n", | |
"cluster.scale(50)\n", | |
"cluster" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"from dask.distributed import Client\n", | |
"client = Client(cluster)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Create a Dask Dataframe" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import dask.dataframe as dd\n", | |
"\n", | |
"df = dd.demo.make_timeseries(start='2000-01-01',\n", | |
" end='2010-12-31',\n", | |
" dtypes={'x': float, 'y': float, 'id': int},\n", | |
" freq='10ms',\n", | |
" partition_freq='24h')\n", | |
"df" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"df = df.persist()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"%time df.groupby('id')[['x', 'y']].mean().compute()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"df.x.rolling('1min').std().loc['2000-01-02':'2010-12-30'].idxmax().compute()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import dask.array as da\n", | |
"u, s, v = da.linalg.svd(df.values + 1)\n", | |
"s" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"ss = s.persist()" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.5" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment