Skip to content

Instantly share code, notes, and snippets.

@ncclementi
Last active May 14, 2021 21:55
Show Gist options
  • Save ncclementi/9a41539fcd3e8df566ed4a33a2f90cac to your computer and use it in GitHub Desktop.
Save ncclementi/9a41539fcd3e8df566ed4a33a2f90cac to your computer and use it in GitHub Desktop.
best_try_for_layout.ipynb
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"metadata": {
"trusted": true
},
"id": "4ae2fbf9",
"cell_type": "code",
"source": "import dask\nimport dask.array as da\nimport distributed",
"execution_count": null,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"id": "41544160",
"cell_type": "code",
"source": "client = distributed.Client()\nclient",
"execution_count": null,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"id": "8c057a76",
"cell_type": "code",
"source": "x = da.random.random((10000, 10000))\ny = x + x.T - x.mean(axis=0) #+ x.var(axis=0)\ny = y.persist()\ndistributed.wait(y);",
"execution_count": null,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"id": "984b475b",
"cell_type": "code",
"source": "df = dask.datasets.timeseries(partition_freq=\"1H\")\ndf = df.groupby(df.index.minute).agg({\n \"name\": \"max\",\n \"x\": \"min\",\n \"y\": \"mean\",\n}).persist()\ndistributed.wait(df);",
"execution_count": null,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"id": "be66b8d0",
"cell_type": "code",
"source": "groups = client.cluster.scheduler.task_groups",
"execution_count": null,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"id": "cb671b16",
"cell_type": "code",
"source": "groups",
"execution_count": null,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"id": "3096b8f8",
"cell_type": "code",
"source": "dependencies = {\n k: [ds.name for ds in ts.dependencies if ds.name != k]\n for k, ts in groups.items()\n }",
"execution_count": null,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"id": "b0d206be",
"cell_type": "code",
"source": "dependencies",
"execution_count": null,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"id": "8842c06a",
"cell_type": "code",
"source": "def get_depth(deps, key):\n if len(deps[key]) == 0:\n return 0\n return max(get_depth(deps, val) + 1 for val in deps[key])",
"execution_count": null,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"id": "2f798c72",
"cell_type": "code",
"source": "dependencies_depth = {k: get_depth(dependencies, k) for k in dependencies.keys()}",
"execution_count": null,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"id": "909f0d22",
"cell_type": "code",
"source": "dependencies_depth",
"execution_count": null,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"id": "dcbf20b6",
"cell_type": "code",
"source": "dependents = {k: [] for k in dependencies}\n\nfor k, v in dependencies.items():\n for dep in v:\n dependents[dep].append(k) ",
"execution_count": null,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"id": "7a7363fe",
"cell_type": "code",
"source": "dependents",
"execution_count": null,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"id": "2c976191",
"cell_type": "code",
"source": "def toposort_layers(dependencies):\n \"\"\"Sort the layers in a high level graph topologically\n Parameters\n ----------\n hlg : HighLevelGraph\n The high level graph's layers to sort\n Returns\n -------\n sorted: list\n List of layer names sorted topologically\n \"\"\"\n degree = {k: len(v) for k, v in dependencies.items()}\n\n reverse_deps = {k: [] for k in dependencies}\n\n ready = []\n for k, v in dependencies.items():\n for dep in v:\n reverse_deps[dep].append(k) ## this are the dependents\n if not v:\n ready.append(k)\n ret = []\n\n \n while len(ready) > 0:\n layer = ready.pop()\n ret.append(layer)\n for rdep in reverse_deps[layer]:\n degree[rdep] -= 1\n if degree[rdep] == 0:\n ready.append(rdep)\n return ret",
"execution_count": null,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"id": "917dca72",
"cell_type": "code",
"source": "stack_order = toposort_layers(dependencies)",
"execution_count": null,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"id": "664fa0ee",
"cell_type": "code",
"source": "# groups[stack_order[0]].__dict__",
"execution_count": null,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"id": "6ba0c39d",
"cell_type": "code",
"source": "# groups[stack_order[0]].states.values()",
"execution_count": null,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"id": "2dd1da85",
"cell_type": "code",
"source": "# import numpy as np\n\n# np.log(sum(groups[stack_order[0]].states.values()))",
"execution_count": null,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"id": "d7f21e6d",
"cell_type": "code",
"source": "def get_ycoords(stack, depth_dict, dependents_dict):\n ycoords = {}\n stack_it = stack[::-1].copy()\n ynext = 0\n while stack_it:\n tg = stack_it.pop()\n if depth_dict[tg] == 0:\n ycoords[tg] = ynext\n ynext += 1 #maybe use whatever is the size of the bar of the prev root\n\n sort_dependents = [ele for ele in stack if ele in dependents_dict[tg]]\n for dep in sort_dependents:\n if dep not in ycoords:\n #print('Im here')\n ycoords[dep] = ycoords[tg] + sort_dependents.index(dep)\n \n return ycoords",
"execution_count": null,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"id": "1b82a925",
"cell_type": "code",
"source": "ycoords = get_ycoords(stack_order, dependencies_depth, dependents)",
"execution_count": null,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"id": "0f564cfc",
"cell_type": "code",
"source": "ycoords",
"execution_count": null,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"id": "12bab78d",
"cell_type": "code",
"source": "from distributed.utils import color_of\nimport collections\nimport numpy as np",
"execution_count": null,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"id": "8a7ed09c",
"cell_type": "code",
"source": "stack_order",
"execution_count": null,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"id": "c3e92e09",
"cell_type": "code",
"source": "data_nodes = collections.defaultdict(list)\nfor tg in stack_order:\n data_nodes ['index'].append(tg) \n data_nodes ['name'].append(groups[tg].prefix.name)\n data_nodes ['x'].append(dependencies_depth[tg])\n data_nodes ['y'].append(ycoords[tg])\n data_nodes ['color'].append(color_of(groups[tg].prefix.name))\n data_nodes ['start'] += dependencies[tg] #start nodes for edges\n data_nodes ['end'] += [tg] * len(dependencies[tg]) #end nodes for edges\n #data_nodes ['top_bar'].append(ycoords[tg] + sum(groups[tg].all_durations.values()))\n data_nodes ['top_bar'].append(ycoords[tg] + max(np.log(sum(groups[tg].states.values())), 1))\n #data_nodes ['all_durations'].append(sum(groups[tg].all_durations.values()))\n data_nodes ['total_tasks'].append(sum(groups[tg].states.values()))\n data_nodes ['log_total_tasks'].append(np.log(sum(groups[tg].states.values())))",
"execution_count": null,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"id": "202b6d07",
"cell_type": "code",
"source": "data_arrows = {}\n\ndata_arrows['start'] = data_nodes.pop('start')\ndata_arrows['end'] = data_nodes.pop('end')\n\n",
"execution_count": null,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"id": "61e21b2d",
"cell_type": "code",
"source": "data_arrows['xs'] = [dependencies_depth[s] for s in data_arrows['start']]\ndata_arrows['ys'] = [ycoords[s] for s in data_arrows['start']]",
"execution_count": null,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"id": "ea6caf38",
"cell_type": "code",
"source": "data_arrows['xe'] = [dependencies_depth[e] for e in data_arrows['end']]\ndata_arrows['ye'] = [ycoords[e] for e in data_arrows['end']]",
"execution_count": null,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"id": "6078a3d4",
"cell_type": "code",
"source": "data_arrows",
"execution_count": null,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"id": "f4a6106b",
"cell_type": "code",
"source": "# data['alpha'] += [0.5]*len(stack_order)",
"execution_count": null,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"id": "dafa9b79",
"cell_type": "code",
"source": "from bokeh.plotting import figure, show, output_notebook\nfrom bokeh.models import Plot, Arrow, VBar, VeeHead, ColumnDataSource, GraphRenderer, StaticLayoutProvider, HBar, Ellipse, LabelSet\noutput_notebook()",
"execution_count": null,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"id": "cebd3eff",
"cell_type": "code",
"source": "#data",
"execution_count": null,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"id": "4beb29da",
"cell_type": "code",
"source": "#THIS NEEDS TO HAVE A BETTER LOGIC\nname_chop = [] \nfor name in data_nodes['name']:\n if len(name) <= 6:\n name_chop.append(name)\n else:\n name_chop.append(name[:6])",
"execution_count": null,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"id": "03bd3638",
"cell_type": "code",
"source": "data_nodes['name_chop'] = name_chop",
"execution_count": null,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"id": "2da54b26",
"cell_type": "code",
"source": "source_nodes = ColumnDataSource(data_nodes)",
"execution_count": null,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"cell_type": "code",
"source": "source_arrows = ColumnDataSource(data_arrows)",
"execution_count": null,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"id": "6af07761",
"cell_type": "code",
"source": "TOOLTIPS = [\n (\"tg\", \"@name\"),\n (\"num_task\",\"@total_tasks\")\n #(\"all_dur\", \"@all_durations\")\n]",
"execution_count": null,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"id": "07f01609",
"cell_type": "code",
"source": "plot = figure( title=\"Graph layout demonstration\", tools=\"\", tooltips=TOOLTIPS ,toolbar_location=None, \n x_range=[-1, 4], y_range=[-1, 4])\n\nplot.square('x', 'y', size=30, color='color', alpha=0.5, source=source_nodes)\n\nplot.add_layout(Arrow(end=VeeHead(size=10), line_color='red', line_alpha=0.5, line_width=2,\n x_start='xs', y_start='ys', x_end='xe', y_end='ye', source=source_arrows))\n\nplot.add_layout(LabelSet(x='x', y='y', text='name_chop',source=source_nodes, \n background_fill_color=None, x_offset=-1))\nshow(plot)",
"execution_count": null,
"outputs": []
},
{
"metadata": {
"trusted": false
},
"cell_type": "markdown",
"source": "We need to think better how to of set root nodes. If we are using bars that represent let's say the amount of tasks (see below) they would overlap if we only offset them by a fix amount (in this case +1)"
},
{
"metadata": {
"trusted": true
},
"id": "262a1d0d",
"cell_type": "code",
"source": "plot_2 = figure( title=\"Graph layout demonstration v2\", tools=\"\", tooltips=TOOLTIPS ,toolbar_location=None, \n x_range=[-1, 4], y_range=[-1, 8])\n\nplot_2.vbar(x=\"x\", top=\"top_bar\", bottom='y', width=0.3, fill_color=\"color\", alpha=0.5, source=source_nodes)\n\n\nplot_2.add_layout(Arrow(end=VeeHead(size=10), line_color='red', line_alpha=0.5, line_width=2,\n x_start='xs', y_start='ys', x_end='xe', y_end='ye', source=source_arrows))\n\nplot_2.add_layout(LabelSet(x='x', y='y', text='name_chop',source=source_nodes, \n background_fill_color=None, x_offset=-1))\nshow(plot_2)",
"execution_count": null,
"outputs": []
},
{
"metadata": {
"trusted": true
},
"id": "7e0fc9dc",
"cell_type": "code",
"source": "",
"execution_count": null,
"outputs": []
}
],
"metadata": {
"gist": {
"id": "",
"data": {
"description": "best_try_for_layout.ipynb",
"public": true
}
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3",
"language": "python"
},
"language_info": {
"name": "python",
"version": "3.8.8",
"mimetype": "text/x-python",
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"pygments_lexer": "ipython3",
"nbconvert_exporter": "python",
"file_extension": ".py"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment