Skip to content

Instantly share code, notes, and snippets.

@crusaderky
Created March 3, 2023 16:36
Show Gist options
  • Save crusaderky/a97f870c51260e63a1c14c20b762f666 to your computer and use it in GitHub Desktop.
Save crusaderky/a97f870c51260e63a1c14c20b762f666 to your computer and use it in GitHub Desktop.
Fine performance metrics demo (distributed#7586)
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"id": "0652f264-b2c3-43b2-9310-7c00e50e7cdc",
"metadata": {},
"source": [
"# Feature Engineering In Advance of HPO\n",
"\n",
"Demo notebook for https://github.com/dask/distributed/pull/7586, showcasing task I/O and spilling.\n",
"The workflow being executed is a copy-paste of https://github.com/coiled/dask-xgboost-nyctaxi/blob/main/Feature%20Engineering.ipynb\n",
"\n",
"The only difference is that all calls to `optimize()` have been replaced with `(optimize_graph=False)` in order to have clearer task prefixes in the metrics.\n",
"\n",
"In order to meter parquet I/O, you need to apply a 3-lines patch to dask/dask:"
]
},
{
"cell_type": "raw",
"id": "e0f617bd",
"metadata": {},
"source": [
"--- a/dask/dataframe/io/parquet/core.py\n",
"+++ b/dask/dataframe/io/parquet/core.py\n",
"@@ -27,6 +27,8 @@ from dask.highlevelgraph import HighLevelGraph\n",
" from dask.layers import DataFrameIOLayer\n",
" from dask.utils import apply, import_required, natural_sort_key, parse_bytes\n",
" \n",
"+from distributed.metrics import context_meter\n",
"+\n",
" __all__ = (\"read_parquet\", \"to_parquet\")\n",
" \n",
" NONE_LABEL = \"__null_dask_index__\"\n",
"@@ -158,6 +160,7 @@ class ToParquetFunctionWrapper:\n",
" self.kwargs_pass,\n",
" )\n",
" \n",
"+ @context_meter.meter(\"I/O\")\n",
" def __call__(self, df, block_index: tuple[int]):\n",
" # Get partition index from block index tuple\n",
" part_i = block_index[0]\n",
"@@ -643,6 +646,7 @@ def check_multi_support(engine):\n",
" return hasattr(engine, \"multi_support\") and engine.multi_support()\n",
" \n",
" \n",
"+@context_meter.meter(\"I/O\")\n",
" def read_parquet_part(\n",
" fs, engine, meta, part, columns, index, use_nullable_dtypes, kwargs\n",
" ):"
]
},
{
"cell_type": "markdown",
"id": "b6ff1c43",
"metadata": {},
"source": [
"### Start Coiled cluster"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "3b929987-5089-4c13-be35-c1812d65fbec",
"metadata": {},
"outputs": [],
"source": [
"import coiled\n",
"import dask.config\n",
"import dask.dataframe as dd\n",
"import distributed\n",
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "3abab0dd-e0ed-4763-b740-5bc4cf0ecb0f",
"metadata": {
"scrolled": false,
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Processing /home/crusaderky/github/dask\n",
" Installing build dependencies: started\n",
" Installing build dependencies: finished with status 'done'\n",
" Getting requirements to build wheel: started\n",
" Getting requirements to build wheel: finished with status 'done'\n",
" Preparing metadata (pyproject.toml): started\n",
" Preparing metadata (pyproject.toml): finished with status 'done'\n",
"Building wheels for collected packages: dask\n",
" Building wheel for dask (pyproject.toml): started\n",
" Building wheel for dask (pyproject.toml): finished with status 'done'\n",
" Created wheel for dask: filename=dask-2023.3.0+4.g3e05886ec-py3-none-any.whl size=1162552 sha256=60b74404076badeaab0d72c6d0d1d493b01e2cb434e094e9244f97ab2b8117f8\n",
" Stored in directory: /tmp/pip-ephem-wheel-cache-eb_uhmce/wheels/eb/1a/fb/1189c654380fe182ec1b147cbdd21a7a0bb67bd5c8a8a6d2b8\n",
"Successfully built dask\n",
"Processing /home/crusaderky/github/distributed\n",
" Installing build dependencies: started\n",
" Installing build dependencies: finished with status 'done'\n",
" Getting requirements to build wheel: started\n",
" Getting requirements to build wheel: finished with status 'done'\n",
" Preparing metadata (pyproject.toml): started\n",
" Preparing metadata (pyproject.toml): finished with status 'done'\n",
"Building wheels for collected packages: distributed\n",
" Building wheel for distributed (pyproject.toml): started\n",
" Building wheel for distributed (pyproject.toml): finished with status 'done'\n",
" Created wheel for distributed: filename=distributed-2023.3.0+40.g0b73bfa2f-py3-none-any.whl size=1295944 sha256=33bc36aa62810c68cae191564cf483bf870532b985b28a3c84216f2d0e86e98e\n",
" Stored in directory: /tmp/pip-ephem-wheel-cache-e5sk3vs6/wheels/60/25/b9/915660abc977d0d56038cb407d33358095856354b234ca3039\n",
"Successfully built distributed\n"
]
},
{
"data": {
"text/html": [
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">╭─────────────────────────────────────── <span style=\"color: #008000; text-decoration-color: #008000; font-weight: bold\">Package Sync Notes</span> ───────────────────────────────────────╮\n",
"│ ╷ │\n",
"│ <span style=\"font-weight: bold\"> Package </span>│<span style=\"font-weight: bold\"> Note </span> │\n",
"│ ╶──────────────────────┼───────────────────────────────────────────────────────────────────────╴ │\n",
"│ PyQt5-sip │ Package ignored │\n",
"│ coiled-runtime │ Package ignored │\n",
"│ dask │ Wheel built from /home/crusaderky/github/dask │\n",
"│ distributed │ Wheel built from /home/crusaderky/github/distributed │\n",
"│ libabseil │ Package ignored │\n",
"│ openssl │ Package ignored │\n",
"│ pyqt5-sip │ Package ignored │\n",
"│ ╵ │\n",
"╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n",
"</pre>\n"
],
"text/plain": [
"╭─────────────────────────────────────── \u001b[1;32mPackage Sync Notes\u001b[0m ───────────────────────────────────────╮\n",
"│ ╷ │\n",
"│ \u001b[1m \u001b[0m\u001b[1mPackage \u001b[0m\u001b[1m \u001b[0m│\u001b[1m \u001b[0m\u001b[1mNote \u001b[0m\u001b[1m \u001b[0m │\n",
"│ ╶──────────────────────┼───────────────────────────────────────────────────────────────────────╴ │\n",
"│ PyQt5-sip │ Package ignored │\n",
"│ coiled-runtime │ Package ignored │\n",
"│ dask │ Wheel built from /home/crusaderky/github/dask │\n",
"│ distributed │ Wheel built from /home/crusaderky/github/distributed │\n",
"│ libabseil │ Package ignored │\n",
"│ openssl │ Package ignored │\n",
"│ pyqt5-sip │ Package ignored │\n",
"│ ╵ │\n",
"╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"></pre>\n"
],
"text/plain": [
"\u001b[?25l"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "d751b205ddae43e69fb4267542ee89c4",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Output()"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">\n",
"</pre>\n"
],
"text/plain": [
"\n",
"\u001b[?25h"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/crusaderky/github/distributed/distributed/client.py:1361: VersionMismatchWarning: Mismatched versions found\n",
"\n",
"+---------+--------+-----------+---------+\n",
"| Package | Client | Scheduler | Workers |\n",
"+---------+--------+-----------+---------+\n",
"| pandas | 1.5.2 | 1.5.3 | 1.5.3 |\n",
"+---------+--------+-----------+---------+\n",
" warnings.warn(version_module.VersionMismatchWarning(msg[0][\"warning\"]))\n"
]
}
],
"source": [
"dask.config.set({\"dataframe.dtype_backend\": \"pyarrow\"})\n",
"\n",
"cluster = coiled.Cluster(\n",
" worker_vm_types=[\"m6i.xlarge\"],\n",
" scheduler_vm_types=[\"m6i.large\"],\n",
" package_sync=True, # align remote packages to local ones\n",
" n_workers=10,\n",
" backend_options={\n",
" \"region\": \"us-east-2\",\n",
" \"multizone\": True,\n",
" \"spot\": True,\n",
" \"spot_on_demand_fallback\": True,\n",
" },\n",
" account=\"dask-engineering\",\n",
")\n",
"client = distributed.Client(cluster)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "23735db0",
"metadata": {},
"outputs": [],
"source": [
"class SetPandasOptions(distributed.WorkerPlugin):\n",
" def setup(self, worker):\n",
" pd.set_option(\"string_storage\", \"pyarrow\")\n",
"\n",
"\n",
"pd.set_option(\"string_storage\", \"pyarrow\") # Set on the client\n",
"_ = client.register_worker_plugin(SetPandasOptions()) # Set on the workers"
]
},
{
"cell_type": "markdown",
"id": "d46702f2",
"metadata": {},
"source": [
"### Sample start time"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "1f98607f",
"metadata": {},
"outputs": [],
"source": [
"import time\n",
"client.restart()\n",
"t0 = time.perf_counter()"
]
},
{
"cell_type": "markdown",
"id": "1dd461c3-e551-420a-a3e1-1ca2515bb613",
"metadata": {},
"source": [
"### Run workflow"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "edbb2133",
"metadata": {},
"outputs": [],
"source": [
"ddf = dd.read_parquet(\n",
" \"s3://coiled-datasets/prefect-dask/nyc-uber-lyft/processed_data.parquet\",\n",
" index=False,\n",
" columns=[\n",
" \"hvfhs_license_num\",\n",
" \"PULocationID\",\n",
" \"DOLocationID\",\n",
" \"trip_miles\",\n",
" \"trip_time\",\n",
" \"tolls\",\n",
" \"congestion_surcharge\",\n",
" \"airport_fee\",\n",
" \"wav_request_flag\",\n",
" \"on_scene_datetime\",\n",
" \"pickup_datetime\",\n",
" ],\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "012ff7aa",
"metadata": {},
"outputs": [],
"source": [
"ddf = ddf.repartition(partition_size=\"100MB\")"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "ddb6ea59",
"metadata": {},
"outputs": [],
"source": [
"ddf = ddf.assign(\n",
" accessible_vehicle=ddf.on_scene_datetime.isnull(),\n",
" pickup_month=ddf.pickup_datetime.dt.month,\n",
" pickup_dow=ddf.pickup_datetime.dt.dayofweek,\n",
" pickup_hour=ddf.pickup_datetime.dt.hour,\n",
")\n",
"ddf = ddf.drop(columns=[\"on_scene_datetime\", \"pickup_datetime\"])\n",
"ddf[\"airport_fee\"] = ddf[\"airport_fee\"].replace(\"None\", 0).astype(float).fillna(0)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "29bd3401",
"metadata": {},
"outputs": [],
"source": [
"ddf = ddf.dropna(how=\"any\")\n",
"lower_bound = 0\n",
"Q3 = ddf[\"trip_time\"].quantile(0.75)\n",
"upper_bound = Q3 + (1.5 * (Q3 - lower_bound))\n",
"ddf = ddf.loc[(ddf[\"trip_time\"] >= lower_bound) & (ddf[\"trip_time\"] <= upper_bound)]"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "30bd3bf0-f8c2-4240-b12b-8ee6e4c772c0",
"metadata": {},
"outputs": [],
"source": [
"taxi_zone_lookup = pd.read_csv(\n",
" \"s3://coiled-datasets/prefect-dask/nyc-uber-lyft/taxi+_zone_lookup.csv\",\n",
" usecols=[\"LocationID\", \"Borough\"],\n",
")\n",
"BOROUGH_MAPPING = {\n",
" \"Manhattan\": \"Superborough 1\",\n",
" \"Bronx\": \"Superborough 1\",\n",
" \"EWR\": \"Superborough 1\",\n",
" \"Brooklyn\": \"Superborough 2\",\n",
" \"Queens\": \"Superborough 2\",\n",
" \"Staten Island\": \"Superborough 3\",\n",
" \"Unknown\": \"Unknown\",\n",
"}\n",
"\n",
"taxi_zone_lookup[\"Superborough\"] = [\n",
" BOROUGH_MAPPING[k] for k in taxi_zone_lookup[\"Borough\"]\n",
"]\n",
"taxi_zone_lookup = taxi_zone_lookup.astype(\n",
" {\"Borough\": \"string[pyarrow]\", \"Superborough\": \"string[pyarrow]\"}\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "36ea15a2",
"metadata": {},
"outputs": [],
"source": [
"ddf = dd.merge(\n",
" ddf,\n",
" taxi_zone_lookup,\n",
" left_on=\"PULocationID\",\n",
" right_on=\"LocationID\",\n",
" how=\"inner\",\n",
")\n",
"ddf = ddf.rename(columns={\"Borough\": \"PUBorough\", \"Superborough\": \"PUSuperborough\"})\n",
"ddf = ddf.drop(columns=\"LocationID\")\n",
"\n",
"ddf = dd.merge(\n",
" ddf,\n",
" taxi_zone_lookup,\n",
" left_on=\"DOLocationID\",\n",
" right_on=\"LocationID\",\n",
" how=\"inner\",\n",
")\n",
"ddf = ddf.rename(columns={\"Borough\": \"DOBorough\", \"Superborough\": \"DOSuperborough\"})\n",
"ddf = ddf.drop(columns=\"LocationID\")\n",
"\n",
"ddf[\"PUSuperborough_DOSuperborough\"] = ddf.PUSuperborough.str.cat(\n",
" ddf.DOSuperborough, sep=\"-\"\n",
")\n",
"ddf = ddf.drop(columns=[\"PUSuperborough\", \"DOSuperborough\"])"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "04b66acb",
"metadata": {},
"outputs": [],
"source": [
"categories = [\n",
" \"hvfhs_license_num\",\n",
" \"PULocationID\",\n",
" \"DOLocationID\",\n",
" \"wav_request_flag\",\n",
" \"accessible_vehicle\",\n",
" \"pickup_month\",\n",
" \"pickup_dow\",\n",
" \"pickup_hour\",\n",
" \"PUBorough\",\n",
" \"DOBorough\",\n",
" \"PUSuperborough_DOSuperborough\",\n",
"]\n",
"\n",
"ddf = ddf.astype(dict.fromkeys(categories, \"category\"))\n",
"ddf = ddf.persist(optimize_graph=False)\n",
"ddf = ddf.categorize(categories)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "0334799f",
"metadata": {},
"outputs": [],
"source": [
"ddf = ddf.persist(optimize_graph=False)\n",
"ddf = ddf.repartition(partition_size=\"100MB\")"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "6ae76b35",
"metadata": {},
"outputs": [],
"source": [
"ddf = ddf.astype(\n",
" {\n",
" col: pd.CategoricalDtype(dt.categories.astype(object))\n",
" for col, dt in ddf.dtypes.items()\n",
" if isinstance(dt, pd.CategoricalDtype)\n",
" and dt.categories.dtype == \"string[pyarrow]\"\n",
" }\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "4d929b09",
"metadata": {},
"outputs": [],
"source": [
"# ddf.to_parquet(\n",
"# \"s3://guido-scratch/feature_table.parquet\",\n",
"# overwrite=True,\n",
"# )\n",
"_ = distributed.wait(ddf.map_partitions(lambda x: None))"
]
},
{
"cell_type": "markdown",
"id": "668b6cf5",
"metadata": {},
"source": [
"### Sample start of computation from the client and retrieve the metrics"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "092121aa",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"366.0367508749987"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"t1 = time.perf_counter()\n",
"metrics = client.run(lambda dask_worker: dask_worker.digests_total)\n",
"t1 - t0"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "68922a18-0f7b-465f-a567-4762d0023ec6",
"metadata": {},
"outputs": [],
"source": [
"client.shutdown()"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "b7382504",
"metadata": {},
"outputs": [],
"source": [
"import pandas\n",
"pd.options.display.max_rows = None\n",
"\n",
"rows = []\n",
"for addr, digests_total in metrics.items():\n",
" for k, v in digests_total.items():\n",
" if isinstance(k, str):\n",
" continue\n",
" if len(k) == 3:\n",
" rows.append([addr, k[0], \"\", k[1], k[2], v])\n",
" else:\n",
" assert len(k) == 4\n",
" rows.append([addr, *k, v])\n",
"df = pandas.DataFrame(rows, columns=[\"worker\", \"activity\", \"prefix\", \"action\", \"unit\", \"value\"])\n",
"df = df.groupby([\"activity\", \"prefix\", \"action\", \"unit\"]).sum(numeric_only=True)"
]
},
{
"cell_type": "markdown",
"id": "6f406560",
"metadata": {},
"source": [
"### Full metrics, aggregated by worker"
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "60255732",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th>value</th>\n",
" </tr>\n",
" <tr>\n",
" <th>activity</th>\n",
" <th>prefix</th>\n",
" <th>action</th>\n",
" <th>unit</th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th rowspan=\"275\" valign=\"top\">execute</th>\n",
" <th rowspan=\"6\" valign=\"top\">add</th>\n",
" <th>deserialize</th>\n",
" <th>seconds</th>\n",
" <td>1.963000e-05</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"2\" valign=\"top\">memory-read</th>\n",
" <th>bytes</th>\n",
" <td>6.400000e+01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>2.000000e+00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>other</th>\n",
" <th>seconds</th>\n",
" <td>5.759396e-04</td>\n",
" </tr>\n",
" <tr>\n",
" <th>thread-cpu</th>\n",
" <th>seconds</th>\n",
" <td>2.852000e-06</td>\n",
" </tr>\n",
" <tr>\n",
" <th>thread-noncpu</th>\n",
" <th>seconds</th>\n",
" <td>1.502939e-05</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"10\" valign=\"top\">and</th>\n",
" <th>decompress</th>\n",
" <th>seconds</th>\n",
" <td>2.106854e+01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>deserialize</th>\n",
" <th>seconds</th>\n",
" <td>8.231467e-02</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"3\" valign=\"top\">disk-read</th>\n",
" <th>bytes</th>\n",
" <td>3.561756e+09</td>\n",
" </tr>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>2.760000e+02</td>\n",
" </tr>\n",
" <tr>\n",
" <th>seconds</th>\n",
" <td>4.465114e+00</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"2\" valign=\"top\">memory-read</th>\n",
" <th>bytes</th>\n",
" <td>1.078697e+10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>1.456000e+03</td>\n",
" </tr>\n",
" <tr>\n",
" <th>other</th>\n",
" <th>seconds</th>\n",
" <td>8.946212e+01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>thread-cpu</th>\n",
" <th>seconds</th>\n",
" <td>3.873119e+00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>thread-noncpu</th>\n",
" <th>seconds</th>\n",
" <td>1.790220e+01</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"11\" valign=\"top\">assign</th>\n",
" <th>compress</th>\n",
" <th>seconds</th>\n",
" <td>5.037202e+00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>deserialize</th>\n",
" <th>seconds</th>\n",
" <td>6.600524e-02</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"3\" valign=\"top\">disk-write</th>\n",
" <th>bytes</th>\n",
" <td>1.025898e+09</td>\n",
" </tr>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>5.600000e+01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>seconds</th>\n",
" <td>5.690627e-01</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"2\" valign=\"top\">memory-read</th>\n",
" <th>bytes</th>\n",
" <td>8.231894e+11</td>\n",
" </tr>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>1.039200e+04</td>\n",
" </tr>\n",
" <tr>\n",
" <th>other</th>\n",
" <th>seconds</th>\n",
" <td>1.446904e+02</td>\n",
" </tr>\n",
" <tr>\n",
" <th>serialize</th>\n",
" <th>seconds</th>\n",
" <td>9.702521e-02</td>\n",
" </tr>\n",
" <tr>\n",
" <th>thread-cpu</th>\n",
" <th>seconds</th>\n",
" <td>8.564522e+01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>thread-noncpu</th>\n",
" <th>seconds</th>\n",
" <td>5.076369e+01</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"6\" valign=\"top\">astype</th>\n",
" <th>deserialize</th>\n",
" <th>seconds</th>\n",
" <td>2.387048e-02</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"2\" valign=\"top\">memory-read</th>\n",
" <th>bytes</th>\n",
" <td>2.049516e+11</td>\n",
" </tr>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>1.732000e+03</td>\n",
" </tr>\n",
" <tr>\n",
" <th>other</th>\n",
" <th>seconds</th>\n",
" <td>1.104970e+02</td>\n",
" </tr>\n",
" <tr>\n",
" <th>thread-cpu</th>\n",
" <th>seconds</th>\n",
" <td>4.481759e+02</td>\n",
" </tr>\n",
" <tr>\n",
" <th>thread-noncpu</th>\n",
" <th>seconds</th>\n",
" <td>6.304594e+02</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"6\" valign=\"top\">categorize_block</th>\n",
" <th>deserialize</th>\n",
" <th>seconds</th>\n",
" <td>4.063422e-02</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"2\" valign=\"top\">memory-read</th>\n",
" <th>bytes</th>\n",
" <td>6.502900e+10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>8.660000e+02</td>\n",
" </tr>\n",
" <tr>\n",
" <th>other</th>\n",
" <th>seconds</th>\n",
" <td>5.076183e+00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>thread-cpu</th>\n",
" <th>seconds</th>\n",
" <td>1.189111e+02</td>\n",
" </tr>\n",
" <tr>\n",
" <th>thread-noncpu</th>\n",
" <th>seconds</th>\n",
" <td>5.199587e+01</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"11\" valign=\"top\">drop_by_shallow_copy</th>\n",
" <th>compress</th>\n",
" <th>seconds</th>\n",
" <td>5.581967e-01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>deserialize</th>\n",
" <th>seconds</th>\n",
" <td>8.662685e-02</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"3\" valign=\"top\">disk-write</th>\n",
" <th>bytes</th>\n",
" <td>1.039745e+08</td>\n",
" </tr>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>5.000000e+00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>seconds</th>\n",
" <td>5.186886e-02</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"2\" valign=\"top\">memory-read</th>\n",
" <th>bytes</th>\n",
" <td>7.398934e+11</td>\n",
" </tr>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>3.464000e+03</td>\n",
" </tr>\n",
" <tr>\n",
" <th>other</th>\n",
" <th>seconds</th>\n",
" <td>1.789927e+02</td>\n",
" </tr>\n",
" <tr>\n",
" <th>serialize</th>\n",
" <th>seconds</th>\n",
" <td>1.816554e-02</td>\n",
" </tr>\n",
" <tr>\n",
" <th>thread-cpu</th>\n",
" <th>seconds</th>\n",
" <td>1.469329e+02</td>\n",
" </tr>\n",
" <tr>\n",
" <th>thread-noncpu</th>\n",
" <th>seconds</th>\n",
" <td>6.786118e+01</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"11\" valign=\"top\">dropna</th>\n",
" <th>compress</th>\n",
" <th>seconds</th>\n",
" <td>1.379998e+00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>deserialize</th>\n",
" <th>seconds</th>\n",
" <td>2.179858e-02</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"3\" valign=\"top\">disk-write</th>\n",
" <th>bytes</th>\n",
" <td>3.136404e+08</td>\n",
" </tr>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>1.700000e+01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>seconds</th>\n",
" <td>1.615429e-01</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"2\" valign=\"top\">memory-read</th>\n",
" <th>bytes</th>\n",
" <td>1.164748e+11</td>\n",
" </tr>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>1.732000e+03</td>\n",
" </tr>\n",
" <tr>\n",
" <th>other</th>\n",
" <th>seconds</th>\n",
" <td>3.533512e+01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>serialize</th>\n",
" <th>seconds</th>\n",
" <td>3.112212e-02</td>\n",
" </tr>\n",
" <tr>\n",
" <th>thread-cpu</th>\n",
" <th>seconds</th>\n",
" <td>7.334023e+01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>thread-noncpu</th>\n",
" <th>seconds</th>\n",
" <td>6.504962e+01</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"6\" valign=\"top\">dt-dayofweek</th>\n",
" <th>deserialize</th>\n",
" <th>seconds</th>\n",
" <td>1.171772e-02</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"2\" valign=\"top\">memory-read</th>\n",
" <th>bytes</th>\n",
" <td>1.743413e+10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>8.660000e+02</td>\n",
" </tr>\n",
" <tr>\n",
" <th>other</th>\n",
" <th>seconds</th>\n",
" <td>4.081135e+01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>thread-cpu</th>\n",
" <th>seconds</th>\n",
" <td>7.334018e+01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>thread-noncpu</th>\n",
" <th>seconds</th>\n",
" <td>2.022717e+01</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"11\" valign=\"top\">dt-hour</th>\n",
" <th>compress</th>\n",
" <th>seconds</th>\n",
" <td>3.543061e-01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>deserialize</th>\n",
" <th>seconds</th>\n",
" <td>1.280281e-02</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"3\" valign=\"top\">disk-write</th>\n",
" <th>bytes</th>\n",
" <td>7.677210e+07</td>\n",
" </tr>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>4.000000e+00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>seconds</th>\n",
" <td>3.805964e-02</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"2\" valign=\"top\">memory-read</th>\n",
" <th>bytes</th>\n",
" <td>1.743413e+10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>8.660000e+02</td>\n",
" </tr>\n",
" <tr>\n",
" <th>other</th>\n",
" <th>seconds</th>\n",
" <td>3.615897e+01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>serialize</th>\n",
" <th>seconds</th>\n",
" <td>7.050185e-03</td>\n",
" </tr>\n",
" <tr>\n",
" <th>thread-cpu</th>\n",
" <th>seconds</th>\n",
" <td>6.193667e+01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>thread-noncpu</th>\n",
" <th>seconds</th>\n",
" <td>1.932684e+01</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"11\" valign=\"top\">dt-month</th>\n",
" <th>compress</th>\n",
" <th>seconds</th>\n",
" <td>3.132066e-01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>deserialize</th>\n",
" <th>seconds</th>\n",
" <td>1.075102e-02</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"3\" valign=\"top\">disk-write</th>\n",
" <th>bytes</th>\n",
" <td>7.230944e+07</td>\n",
" </tr>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>3.000000e+00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>seconds</th>\n",
" <td>3.577726e-02</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"2\" valign=\"top\">memory-read</th>\n",
" <th>bytes</th>\n",
" <td>1.743413e+10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>8.660000e+02</td>\n",
" </tr>\n",
" <tr>\n",
" <th>other</th>\n",
" <th>seconds</th>\n",
" <td>4.042804e+01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>serialize</th>\n",
" <th>seconds</th>\n",
" <td>1.749212e-02</td>\n",
" </tr>\n",
" <tr>\n",
" <th>thread-cpu</th>\n",
" <th>seconds</th>\n",
" <td>6.166762e+01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>thread-noncpu</th>\n",
" <th>seconds</th>\n",
" <td>1.556672e+01</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"11\" valign=\"top\">fillna</th>\n",
" <th>compress</th>\n",
" <th>seconds</th>\n",
" <td>2.103873e-01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>deserialize</th>\n",
" <th>seconds</th>\n",
" <td>1.057908e-02</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"3\" valign=\"top\">disk-write</th>\n",
" <th>bytes</th>\n",
" <td>6.552972e+07</td>\n",
" </tr>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>2.000000e+00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>seconds</th>\n",
" <td>3.389299e-02</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"2\" valign=\"top\">memory-read</th>\n",
" <th>bytes</th>\n",
" <td>1.743413e+10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>8.660000e+02</td>\n",
" </tr>\n",
" <tr>\n",
" <th>other</th>\n",
" <th>seconds</th>\n",
" <td>2.228680e+01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>serialize</th>\n",
" <th>seconds</th>\n",
" <td>7.386675e-03</td>\n",
" </tr>\n",
" <tr>\n",
" <th>thread-cpu</th>\n",
" <th>seconds</th>\n",
" <td>9.035824e+00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>thread-noncpu</th>\n",
" <th>seconds</th>\n",
" <td>9.276757e+00</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"4\" valign=\"top\">from_pandas</th>\n",
" <th>deserialize</th>\n",
" <th>seconds</th>\n",
" <td>2.684000e-06</td>\n",
" </tr>\n",
" <tr>\n",
" <th>other</th>\n",
" <th>seconds</th>\n",
" <td>1.270471e-03</td>\n",
" </tr>\n",
" <tr>\n",
" <th>thread-cpu</th>\n",
" <th>seconds</th>\n",
" <td>3.502000e-06</td>\n",
" </tr>\n",
" <tr>\n",
" <th>thread-noncpu</th>\n",
" <th>seconds</th>\n",
" <td>1.437939e-05</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"11\" valign=\"top\">ge</th>\n",
" <th>compress</th>\n",
" <th>seconds</th>\n",
" <td>8.832241e-02</td>\n",
" </tr>\n",
" <tr>\n",
" <th>deserialize</th>\n",
" <th>seconds</th>\n",
" <td>8.445263e-03</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"3\" valign=\"top\">disk-write</th>\n",
" <th>bytes</th>\n",
" <td>1.819005e+07</td>\n",
" </tr>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>1.000000e+00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>seconds</th>\n",
" <td>9.253741e-03</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"2\" valign=\"top\">memory-read</th>\n",
" <th>bytes</th>\n",
" <td>1.176653e+10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>8.660000e+02</td>\n",
" </tr>\n",
" <tr>\n",
" <th>other</th>\n",
" <th>seconds</th>\n",
" <td>2.528699e+01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>serialize</th>\n",
" <th>seconds</th>\n",
" <td>1.567120e-04</td>\n",
" </tr>\n",
" <tr>\n",
" <th>thread-cpu</th>\n",
" <th>seconds</th>\n",
" <td>1.376355e+00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>thread-noncpu</th>\n",
" <th>seconds</th>\n",
" <td>1.409568e+00</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"6\" valign=\"top\">get-categories-agg</th>\n",
" <th>deserialize</th>\n",
" <th>seconds</th>\n",
" <td>1.408131e-03</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"2\" valign=\"top\">memory-read</th>\n",
" <th>bytes</th>\n",
" <td>2.398915e+07</td>\n",
" </tr>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>9.250000e+02</td>\n",
" </tr>\n",
" <tr>\n",
" <th>other</th>\n",
" <th>seconds</th>\n",
" <td>1.943847e+00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>thread-cpu</th>\n",
" <th>seconds</th>\n",
" <td>4.902417e-01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>thread-noncpu</th>\n",
" <th>seconds</th>\n",
" <td>4.559618e+00</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"6\" valign=\"top\">get-categories-chunk</th>\n",
" <th>deserialize</th>\n",
" <th>seconds</th>\n",
" <td>2.088719e-02</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"2\" valign=\"top\">memory-read</th>\n",
" <th>bytes</th>\n",
" <td>6.502901e+10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>8.660000e+02</td>\n",
" </tr>\n",
" <tr>\n",
" <th>other</th>\n",
" <th>seconds</th>\n",
" <td>4.001039e+01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>thread-cpu</th>\n",
" <th>seconds</th>\n",
" <td>1.461580e+00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>thread-noncpu</th>\n",
" <th>seconds</th>\n",
" <td>3.922195e-01</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"11\" valign=\"top\">getitem</th>\n",
" <th>compress</th>\n",
" <th>seconds</th>\n",
" <td>1.663258e-01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>deserialize</th>\n",
" <th>seconds</th>\n",
" <td>6.212383e-02</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"3\" valign=\"top\">disk-write</th>\n",
" <th>bytes</th>\n",
" <td>3.115353e+07</td>\n",
" </tr>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>3.000000e+00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>seconds</th>\n",
" <td>2.198169e-02</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"2\" valign=\"top\">memory-read</th>\n",
" <th>bytes</th>\n",
" <td>7.733279e+11</td>\n",
" </tr>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>5.196000e+03</td>\n",
" </tr>\n",
" <tr>\n",
" <th>other</th>\n",
" <th>seconds</th>\n",
" <td>1.008451e+02</td>\n",
" </tr>\n",
" <tr>\n",
" <th>serialize</th>\n",
" <th>seconds</th>\n",
" <td>4.681260e-04</td>\n",
" </tr>\n",
" <tr>\n",
" <th>thread-cpu</th>\n",
" <th>seconds</th>\n",
" <td>6.541796e-01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>thread-noncpu</th>\n",
" <th>seconds</th>\n",
" <td>1.097014e-01</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"6\" valign=\"top\">isnull</th>\n",
" <th>deserialize</th>\n",
" <th>seconds</th>\n",
" <td>8.831830e-03</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"2\" valign=\"top\">memory-read</th>\n",
" <th>bytes</th>\n",
" <td>1.743413e+10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>8.660000e+02</td>\n",
" </tr>\n",
" <tr>\n",
" <th>other</th>\n",
" <th>seconds</th>\n",
" <td>1.839326e+01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>thread-cpu</th>\n",
" <th>seconds</th>\n",
" <td>1.916485e+00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>thread-noncpu</th>\n",
" <th>seconds</th>\n",
" <td>1.843588e+00</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"10\" valign=\"top\">le</th>\n",
" <th>decompress</th>\n",
" <th>seconds</th>\n",
" <td>2.328353e+01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>deserialize</th>\n",
" <th>seconds</th>\n",
" <td>8.452431e-02</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"3\" valign=\"top\">disk-read</th>\n",
" <th>bytes</th>\n",
" <td>5.173490e+09</td>\n",
" </tr>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>2.920000e+02</td>\n",
" </tr>\n",
" <tr>\n",
" <th>seconds</th>\n",
" <td>6.826033e+00</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"2\" valign=\"top\">memory-read</th>\n",
" <th>bytes</th>\n",
" <td>6.015780e+09</td>\n",
" </tr>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>1.440000e+03</td>\n",
" </tr>\n",
" <tr>\n",
" <th>other</th>\n",
" <th>seconds</th>\n",
" <td>7.423455e+01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>thread-cpu</th>\n",
" <th>seconds</th>\n",
" <td>2.143544e+00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>thread-noncpu</th>\n",
" <th>seconds</th>\n",
" <td>3.932911e+00</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"15\" valign=\"top\">loc-series</th>\n",
" <th>compress</th>\n",
" <th>seconds</th>\n",
" <td>9.286848e-01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>decompress</th>\n",
" <th>seconds</th>\n",
" <td>6.460255e+01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>deserialize</th>\n",
" <th>seconds</th>\n",
" <td>1.848922e+00</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"3\" valign=\"top\">disk-read</th>\n",
" <th>bytes</th>\n",
" <td>1.121431e+10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>2.970000e+02</td>\n",
" </tr>\n",
" <tr>\n",
" <th>seconds</th>\n",
" <td>1.038736e+01</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"3\" valign=\"top\">disk-write</th>\n",
" <th>bytes</th>\n",
" <td>1.866309e+08</td>\n",
" </tr>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>8.000000e+00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>seconds</th>\n",
" <td>1.739051e-01</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"2\" valign=\"top\">memory-read</th>\n",
" <th>bytes</th>\n",
" <td>7.417926e+10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>1.435000e+03</td>\n",
" </tr>\n",
" <tr>\n",
" <th>other</th>\n",
" <th>seconds</th>\n",
" <td>9.591003e+01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>serialize</th>\n",
" <th>seconds</th>\n",
" <td>2.044885e-02</td>\n",
" </tr>\n",
" <tr>\n",
" <th>thread-cpu</th>\n",
" <th>seconds</th>\n",
" <td>9.156853e+01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>thread-noncpu</th>\n",
" <th>seconds</th>\n",
" <td>1.374590e+02</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"11\" valign=\"top\">merge_chunk</th>\n",
" <th>compress</th>\n",
" <th>seconds</th>\n",
" <td>6.296406e-01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>deserialize</th>\n",
" <th>seconds</th>\n",
" <td>4.459591e-02</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"3\" valign=\"top\">disk-write</th>\n",
" <th>bytes</th>\n",
" <td>1.373803e+08</td>\n",
" </tr>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>6.000000e+00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>seconds</th>\n",
" <td>6.721934e-02</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"2\" valign=\"top\">memory-read</th>\n",
" <th>bytes</th>\n",
" <td>2.631508e+11</td>\n",
" </tr>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>3.464000e+03</td>\n",
" </tr>\n",
" <tr>\n",
" <th>other</th>\n",
" <th>seconds</th>\n",
" <td>1.961889e+02</td>\n",
" </tr>\n",
" <tr>\n",
" <th>serialize</th>\n",
" <th>seconds</th>\n",
" <td>1.260667e-02</td>\n",
" </tr>\n",
" <tr>\n",
" <th>thread-cpu</th>\n",
" <th>seconds</th>\n",
" <td>8.940670e+02</td>\n",
" </tr>\n",
" <tr>\n",
" <th>thread-noncpu</th>\n",
" <th>seconds</th>\n",
" <td>1.163169e+03</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"6\" valign=\"top\">mul</th>\n",
" <th>deserialize</th>\n",
" <th>seconds</th>\n",
" <td>2.368600e-05</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"2\" valign=\"top\">memory-read</th>\n",
" <th>bytes</th>\n",
" <td>3.200000e+01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>1.000000e+00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>other</th>\n",
" <th>seconds</th>\n",
" <td>4.977354e-04</td>\n",
" </tr>\n",
" <tr>\n",
" <th>thread-cpu</th>\n",
" <th>seconds</th>\n",
" <td>3.760000e-06</td>\n",
" </tr>\n",
" <tr>\n",
" <th>thread-noncpu</th>\n",
" <th>seconds</th>\n",
" <td>1.793609e-05</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"10\" valign=\"top\">quantiles</th>\n",
" <th>decompress</th>\n",
" <th>seconds</th>\n",
" <td>1.620960e-04</td>\n",
" </tr>\n",
" <tr>\n",
" <th>deserialize</th>\n",
" <th>seconds</th>\n",
" <td>3.868425e-02</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"3\" valign=\"top\">disk-read</th>\n",
" <th>bytes</th>\n",
" <td>1.234200e+04</td>\n",
" </tr>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>3.300000e+01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>seconds</th>\n",
" <td>1.027950e-03</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"2\" valign=\"top\">memory-read</th>\n",
" <th>bytes</th>\n",
" <td>1.176662e+10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>1.699000e+03</td>\n",
" </tr>\n",
" <tr>\n",
" <th>other</th>\n",
" <th>seconds</th>\n",
" <td>1.844720e+01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>thread-cpu</th>\n",
" <th>seconds</th>\n",
" <td>1.946719e+01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>thread-noncpu</th>\n",
" <th>seconds</th>\n",
" <td>7.694072e+00</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"10\" valign=\"top\">read-parquet</th>\n",
" <th>I/O</th>\n",
" <th>seconds</th>\n",
" <td>2.280538e+03</td>\n",
" </tr>\n",
" <tr>\n",
" <th>compress</th>\n",
" <th>seconds</th>\n",
" <td>1.787037e+00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>deserialize</th>\n",
" <th>seconds</th>\n",
" <td>2.358927e-02</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"3\" valign=\"top\">disk-write</th>\n",
" <th>bytes</th>\n",
" <td>3.919784e+08</td>\n",
" </tr>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>2.100000e+01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>seconds</th>\n",
" <td>2.059484e-01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>other</th>\n",
" <th>seconds</th>\n",
" <td>4.874290e+01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>serialize</th>\n",
" <th>seconds</th>\n",
" <td>4.412802e-02</td>\n",
" </tr>\n",
" <tr>\n",
" <th>thread-cpu</th>\n",
" <th>seconds</th>\n",
" <td>0.000000e+00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>thread-noncpu</th>\n",
" <th>seconds</th>\n",
" <td>1.003523e-01</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"11\" valign=\"top\">rename</th>\n",
" <th>compress</th>\n",
" <th>seconds</th>\n",
" <td>1.310685e+00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>deserialize</th>\n",
" <th>seconds</th>\n",
" <td>4.025434e-02</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"3\" valign=\"top\">disk-write</th>\n",
" <th>bytes</th>\n",
" <td>2.735803e+08</td>\n",
" </tr>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>1.200000e+01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>seconds</th>\n",
" <td>2.001524e-01</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"2\" valign=\"top\">memory-read</th>\n",
" <th>bytes</th>\n",
" <td>3.723729e+11</td>\n",
" </tr>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>1.732000e+03</td>\n",
" </tr>\n",
" <tr>\n",
" <th>other</th>\n",
" <th>seconds</th>\n",
" <td>1.429023e+02</td>\n",
" </tr>\n",
" <tr>\n",
" <th>serialize</th>\n",
" <th>seconds</th>\n",
" <td>2.803826e-02</td>\n",
" </tr>\n",
" <tr>\n",
" <th>thread-cpu</th>\n",
" <th>seconds</th>\n",
" <td>2.028315e+02</td>\n",
" </tr>\n",
" <tr>\n",
" <th>thread-noncpu</th>\n",
" <th>seconds</th>\n",
" <td>2.777587e+02</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"11\" valign=\"top\">repartition</th>\n",
" <th>compress</th>\n",
" <th>seconds</th>\n",
" <td>2.022075e+00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>deserialize</th>\n",
" <th>seconds</th>\n",
" <td>1.382049e-02</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"3\" valign=\"top\">disk-write</th>\n",
" <th>bytes</th>\n",
" <td>4.198203e+08</td>\n",
" </tr>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>2.700000e+01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>seconds</th>\n",
" <td>2.140520e-01</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"2\" valign=\"top\">memory-read</th>\n",
" <th>bytes</th>\n",
" <td>8.198742e+10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>1.294000e+03</td>\n",
" </tr>\n",
" <tr>\n",
" <th>other</th>\n",
" <th>seconds</th>\n",
" <td>2.047477e+01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>serialize</th>\n",
" <th>seconds</th>\n",
" <td>3.934864e-02</td>\n",
" </tr>\n",
" <tr>\n",
" <th>thread-cpu</th>\n",
" <th>seconds</th>\n",
" <td>6.999167e+01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>thread-noncpu</th>\n",
" <th>seconds</th>\n",
" <td>2.744740e+01</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"11\" valign=\"top\">repartition-split</th>\n",
" <th>compress</th>\n",
" <th>seconds</th>\n",
" <td>2.732344e+00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>deserialize</th>\n",
" <th>seconds</th>\n",
" <td>8.215349e-03</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"3\" valign=\"top\">disk-write</th>\n",
" <th>bytes</th>\n",
" <td>5.980626e+08</td>\n",
" </tr>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>3.400000e+01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>seconds</th>\n",
" <td>3.062755e-01</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"2\" valign=\"top\">memory-read</th>\n",
" <th>bytes</th>\n",
" <td>4.649859e+11</td>\n",
" </tr>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>1.294000e+03</td>\n",
" </tr>\n",
" <tr>\n",
" <th>other</th>\n",
" <th>seconds</th>\n",
" <td>1.319431e+01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>serialize</th>\n",
" <th>seconds</th>\n",
" <td>7.675315e-02</td>\n",
" </tr>\n",
" <tr>\n",
" <th>thread-cpu</th>\n",
" <th>seconds</th>\n",
" <td>6.208843e-03</td>\n",
" </tr>\n",
" <tr>\n",
" <th>thread-noncpu</th>\n",
" <th>seconds</th>\n",
" <td>2.789240e-02</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"6\" valign=\"top\">replace</th>\n",
" <th>deserialize</th>\n",
" <th>seconds</th>\n",
" <td>1.007822e-02</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"2\" valign=\"top\">memory-read</th>\n",
" <th>bytes</th>\n",
" <td>1.359723e+10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>8.660000e+02</td>\n",
" </tr>\n",
" <tr>\n",
" <th>other</th>\n",
" <th>seconds</th>\n",
" <td>1.772804e+01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>thread-cpu</th>\n",
" <th>seconds</th>\n",
" <td>4.456288e+01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>thread-noncpu</th>\n",
" <th>seconds</th>\n",
" <td>1.729192e+01</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"11\" valign=\"top\">split</th>\n",
" <th>compress</th>\n",
" <th>seconds</th>\n",
" <td>2.689857e+01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>deserialize</th>\n",
" <th>seconds</th>\n",
" <td>8.523750e-04</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"3\" valign=\"top\">disk-write</th>\n",
" <th>bytes</th>\n",
" <td>6.223999e+09</td>\n",
" </tr>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>3.450000e+02</td>\n",
" </tr>\n",
" <tr>\n",
" <th>seconds</th>\n",
" <td>3.094027e+00</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"2\" valign=\"top\">memory-read</th>\n",
" <th>bytes</th>\n",
" <td>4.098688e+10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>4.500000e+01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>other</th>\n",
" <th>seconds</th>\n",
" <td>5.599360e+00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>serialize</th>\n",
" <th>seconds</th>\n",
" <td>6.142642e-01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>thread-cpu</th>\n",
" <th>seconds</th>\n",
" <td>6.445540e-02</td>\n",
" </tr>\n",
" <tr>\n",
" <th>thread-noncpu</th>\n",
" <th>seconds</th>\n",
" <td>2.589523e-02</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"6\" valign=\"top\">str_cat</th>\n",
" <th>deserialize</th>\n",
" <th>seconds</th>\n",
" <td>1.654157e-02</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"2\" valign=\"top\">memory-read</th>\n",
" <th>bytes</th>\n",
" <td>7.512703e+10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>1.732000e+03</td>\n",
" </tr>\n",
" <tr>\n",
" <th>other</th>\n",
" <th>seconds</th>\n",
" <td>6.471378e+01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>thread-cpu</th>\n",
" <th>seconds</th>\n",
" <td>6.781931e+02</td>\n",
" </tr>\n",
" <tr>\n",
" <th>thread-noncpu</th>\n",
" <th>seconds</th>\n",
" <td>9.797700e+01</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"6\" valign=\"top\">sub</th>\n",
" <th>deserialize</th>\n",
" <th>seconds</th>\n",
" <td>3.815400e-05</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"2\" valign=\"top\">memory-read</th>\n",
" <th>bytes</th>\n",
" <td>3.200000e+01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>1.000000e+00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>other</th>\n",
" <th>seconds</th>\n",
" <td>1.622431e-03</td>\n",
" </tr>\n",
" <tr>\n",
" <th>thread-cpu</th>\n",
" <th>seconds</th>\n",
" <td>5.666000e-06</td>\n",
" </tr>\n",
" <tr>\n",
" <th>thread-noncpu</th>\n",
" <th>seconds</th>\n",
" <td>2.032163e-05</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"7\" valign=\"top\">total_mem_usage</th>\n",
" <th>I/O</th>\n",
" <th>seconds</th>\n",
" <td>2.919971e+03</td>\n",
" </tr>\n",
" <tr>\n",
" <th>deserialize</th>\n",
" <th>seconds</th>\n",
" <td>3.251624e+00</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"2\" valign=\"top\">memory-read</th>\n",
" <th>bytes</th>\n",
" <td>6.502950e+10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>8.660000e+02</td>\n",
" </tr>\n",
" <tr>\n",
" <th>other</th>\n",
" <th>seconds</th>\n",
" <td>1.538037e+01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>thread-cpu</th>\n",
" <th>seconds</th>\n",
" <td>1.386170e+00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>thread-noncpu</th>\n",
" <th>seconds</th>\n",
" <td>2.262479e+00</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"12\" valign=\"top\">gather-dep</th>\n",
" <th rowspan=\"12\" valign=\"top\"></th>\n",
" <th>busy</th>\n",
" <th>seconds</th>\n",
" <td>1.905684e+00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>cancelled</th>\n",
" <th>seconds</th>\n",
" <td>2.891501e+02</td>\n",
" </tr>\n",
" <tr>\n",
" <th>compress</th>\n",
" <th>seconds</th>\n",
" <td>6.130827e-01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>decompress</th>\n",
" <th>seconds</th>\n",
" <td>1.296874e+02</td>\n",
" </tr>\n",
" <tr>\n",
" <th>deserialize</th>\n",
" <th>seconds</th>\n",
" <td>1.180477e+01</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"3\" valign=\"top\">disk-write</th>\n",
" <th>bytes</th>\n",
" <td>1.237622e+08</td>\n",
" </tr>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>7.000000e+00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>seconds</th>\n",
" <td>6.144308e-02</td>\n",
" </tr>\n",
" <tr>\n",
" <th>missing</th>\n",
" <th>seconds</th>\n",
" <td>2.871231e+00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>network</th>\n",
" <th>seconds</th>\n",
" <td>1.084494e+03</td>\n",
" </tr>\n",
" <tr>\n",
" <th>other</th>\n",
" <th>seconds</th>\n",
" <td>7.154693e+01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>serialize</th>\n",
" <th>seconds</th>\n",
" <td>1.452420e-02</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"10\" valign=\"top\">get-data</th>\n",
" <th rowspan=\"10\" valign=\"top\"></th>\n",
" <th>compress</th>\n",
" <th>seconds</th>\n",
" <td>1.499470e+02</td>\n",
" </tr>\n",
" <tr>\n",
" <th>decompress</th>\n",
" <th>seconds</th>\n",
" <td>8.457677e+00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>deserialize</th>\n",
" <th>seconds</th>\n",
" <td>1.146060e-01</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"3\" valign=\"top\">disk-read</th>\n",
" <th>bytes</th>\n",
" <td>1.298182e+09</td>\n",
" </tr>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>2.820000e+02</td>\n",
" </tr>\n",
" <tr>\n",
" <th>seconds</th>\n",
" <td>1.565931e+00</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"2\" valign=\"top\">memory-read</th>\n",
" <th>bytes</th>\n",
" <td>6.626876e+10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>4.898000e+03</td>\n",
" </tr>\n",
" <tr>\n",
" <th>network</th>\n",
" <th>seconds</th>\n",
" <td>1.244312e+03</td>\n",
" </tr>\n",
" <tr>\n",
" <th>serialize</th>\n",
" <th>seconds</th>\n",
" <td>4.049585e+01</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"5\" valign=\"top\">memory-monitor</th>\n",
" <th rowspan=\"5\" valign=\"top\"></th>\n",
" <th>compress</th>\n",
" <th>seconds</th>\n",
" <td>6.887015e+01</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"3\" valign=\"top\">disk-write</th>\n",
" <th>bytes</th>\n",
" <td>1.118508e+10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>6.300000e+02</td>\n",
" </tr>\n",
" <tr>\n",
" <th>seconds</th>\n",
" <td>9.565273e+00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>serialize</th>\n",
" <th>seconds</th>\n",
" <td>1.538035e+00</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" value\n",
"activity prefix action unit \n",
"execute add deserialize seconds 1.963000e-05\n",
" memory-read bytes 6.400000e+01\n",
" count 2.000000e+00\n",
" other seconds 5.759396e-04\n",
" thread-cpu seconds 2.852000e-06\n",
" thread-noncpu seconds 1.502939e-05\n",
" and decompress seconds 2.106854e+01\n",
" deserialize seconds 8.231467e-02\n",
" disk-read bytes 3.561756e+09\n",
" count 2.760000e+02\n",
" seconds 4.465114e+00\n",
" memory-read bytes 1.078697e+10\n",
" count 1.456000e+03\n",
" other seconds 8.946212e+01\n",
" thread-cpu seconds 3.873119e+00\n",
" thread-noncpu seconds 1.790220e+01\n",
" assign compress seconds 5.037202e+00\n",
" deserialize seconds 6.600524e-02\n",
" disk-write bytes 1.025898e+09\n",
" count 5.600000e+01\n",
" seconds 5.690627e-01\n",
" memory-read bytes 8.231894e+11\n",
" count 1.039200e+04\n",
" other seconds 1.446904e+02\n",
" serialize seconds 9.702521e-02\n",
" thread-cpu seconds 8.564522e+01\n",
" thread-noncpu seconds 5.076369e+01\n",
" astype deserialize seconds 2.387048e-02\n",
" memory-read bytes 2.049516e+11\n",
" count 1.732000e+03\n",
" other seconds 1.104970e+02\n",
" thread-cpu seconds 4.481759e+02\n",
" thread-noncpu seconds 6.304594e+02\n",
" categorize_block deserialize seconds 4.063422e-02\n",
" memory-read bytes 6.502900e+10\n",
" count 8.660000e+02\n",
" other seconds 5.076183e+00\n",
" thread-cpu seconds 1.189111e+02\n",
" thread-noncpu seconds 5.199587e+01\n",
" drop_by_shallow_copy compress seconds 5.581967e-01\n",
" deserialize seconds 8.662685e-02\n",
" disk-write bytes 1.039745e+08\n",
" count 5.000000e+00\n",
" seconds 5.186886e-02\n",
" memory-read bytes 7.398934e+11\n",
" count 3.464000e+03\n",
" other seconds 1.789927e+02\n",
" serialize seconds 1.816554e-02\n",
" thread-cpu seconds 1.469329e+02\n",
" thread-noncpu seconds 6.786118e+01\n",
" dropna compress seconds 1.379998e+00\n",
" deserialize seconds 2.179858e-02\n",
" disk-write bytes 3.136404e+08\n",
" count 1.700000e+01\n",
" seconds 1.615429e-01\n",
" memory-read bytes 1.164748e+11\n",
" count 1.732000e+03\n",
" other seconds 3.533512e+01\n",
" serialize seconds 3.112212e-02\n",
" thread-cpu seconds 7.334023e+01\n",
" thread-noncpu seconds 6.504962e+01\n",
" dt-dayofweek deserialize seconds 1.171772e-02\n",
" memory-read bytes 1.743413e+10\n",
" count 8.660000e+02\n",
" other seconds 4.081135e+01\n",
" thread-cpu seconds 7.334018e+01\n",
" thread-noncpu seconds 2.022717e+01\n",
" dt-hour compress seconds 3.543061e-01\n",
" deserialize seconds 1.280281e-02\n",
" disk-write bytes 7.677210e+07\n",
" count 4.000000e+00\n",
" seconds 3.805964e-02\n",
" memory-read bytes 1.743413e+10\n",
" count 8.660000e+02\n",
" other seconds 3.615897e+01\n",
" serialize seconds 7.050185e-03\n",
" thread-cpu seconds 6.193667e+01\n",
" thread-noncpu seconds 1.932684e+01\n",
" dt-month compress seconds 3.132066e-01\n",
" deserialize seconds 1.075102e-02\n",
" disk-write bytes 7.230944e+07\n",
" count 3.000000e+00\n",
" seconds 3.577726e-02\n",
" memory-read bytes 1.743413e+10\n",
" count 8.660000e+02\n",
" other seconds 4.042804e+01\n",
" serialize seconds 1.749212e-02\n",
" thread-cpu seconds 6.166762e+01\n",
" thread-noncpu seconds 1.556672e+01\n",
" fillna compress seconds 2.103873e-01\n",
" deserialize seconds 1.057908e-02\n",
" disk-write bytes 6.552972e+07\n",
" count 2.000000e+00\n",
" seconds 3.389299e-02\n",
" memory-read bytes 1.743413e+10\n",
" count 8.660000e+02\n",
" other seconds 2.228680e+01\n",
" serialize seconds 7.386675e-03\n",
" thread-cpu seconds 9.035824e+00\n",
" thread-noncpu seconds 9.276757e+00\n",
" from_pandas deserialize seconds 2.684000e-06\n",
" other seconds 1.270471e-03\n",
" thread-cpu seconds 3.502000e-06\n",
" thread-noncpu seconds 1.437939e-05\n",
" ge compress seconds 8.832241e-02\n",
" deserialize seconds 8.445263e-03\n",
" disk-write bytes 1.819005e+07\n",
" count 1.000000e+00\n",
" seconds 9.253741e-03\n",
" memory-read bytes 1.176653e+10\n",
" count 8.660000e+02\n",
" other seconds 2.528699e+01\n",
" serialize seconds 1.567120e-04\n",
" thread-cpu seconds 1.376355e+00\n",
" thread-noncpu seconds 1.409568e+00\n",
" get-categories-agg deserialize seconds 1.408131e-03\n",
" memory-read bytes 2.398915e+07\n",
" count 9.250000e+02\n",
" other seconds 1.943847e+00\n",
" thread-cpu seconds 4.902417e-01\n",
" thread-noncpu seconds 4.559618e+00\n",
" get-categories-chunk deserialize seconds 2.088719e-02\n",
" memory-read bytes 6.502901e+10\n",
" count 8.660000e+02\n",
" other seconds 4.001039e+01\n",
" thread-cpu seconds 1.461580e+00\n",
" thread-noncpu seconds 3.922195e-01\n",
" getitem compress seconds 1.663258e-01\n",
" deserialize seconds 6.212383e-02\n",
" disk-write bytes 3.115353e+07\n",
" count 3.000000e+00\n",
" seconds 2.198169e-02\n",
" memory-read bytes 7.733279e+11\n",
" count 5.196000e+03\n",
" other seconds 1.008451e+02\n",
" serialize seconds 4.681260e-04\n",
" thread-cpu seconds 6.541796e-01\n",
" thread-noncpu seconds 1.097014e-01\n",
" isnull deserialize seconds 8.831830e-03\n",
" memory-read bytes 1.743413e+10\n",
" count 8.660000e+02\n",
" other seconds 1.839326e+01\n",
" thread-cpu seconds 1.916485e+00\n",
" thread-noncpu seconds 1.843588e+00\n",
" le decompress seconds 2.328353e+01\n",
" deserialize seconds 8.452431e-02\n",
" disk-read bytes 5.173490e+09\n",
" count 2.920000e+02\n",
" seconds 6.826033e+00\n",
" memory-read bytes 6.015780e+09\n",
" count 1.440000e+03\n",
" other seconds 7.423455e+01\n",
" thread-cpu seconds 2.143544e+00\n",
" thread-noncpu seconds 3.932911e+00\n",
" loc-series compress seconds 9.286848e-01\n",
" decompress seconds 6.460255e+01\n",
" deserialize seconds 1.848922e+00\n",
" disk-read bytes 1.121431e+10\n",
" count 2.970000e+02\n",
" seconds 1.038736e+01\n",
" disk-write bytes 1.866309e+08\n",
" count 8.000000e+00\n",
" seconds 1.739051e-01\n",
" memory-read bytes 7.417926e+10\n",
" count 1.435000e+03\n",
" other seconds 9.591003e+01\n",
" serialize seconds 2.044885e-02\n",
" thread-cpu seconds 9.156853e+01\n",
" thread-noncpu seconds 1.374590e+02\n",
" merge_chunk compress seconds 6.296406e-01\n",
" deserialize seconds 4.459591e-02\n",
" disk-write bytes 1.373803e+08\n",
" count 6.000000e+00\n",
" seconds 6.721934e-02\n",
" memory-read bytes 2.631508e+11\n",
" count 3.464000e+03\n",
" other seconds 1.961889e+02\n",
" serialize seconds 1.260667e-02\n",
" thread-cpu seconds 8.940670e+02\n",
" thread-noncpu seconds 1.163169e+03\n",
" mul deserialize seconds 2.368600e-05\n",
" memory-read bytes 3.200000e+01\n",
" count 1.000000e+00\n",
" other seconds 4.977354e-04\n",
" thread-cpu seconds 3.760000e-06\n",
" thread-noncpu seconds 1.793609e-05\n",
" quantiles decompress seconds 1.620960e-04\n",
" deserialize seconds 3.868425e-02\n",
" disk-read bytes 1.234200e+04\n",
" count 3.300000e+01\n",
" seconds 1.027950e-03\n",
" memory-read bytes 1.176662e+10\n",
" count 1.699000e+03\n",
" other seconds 1.844720e+01\n",
" thread-cpu seconds 1.946719e+01\n",
" thread-noncpu seconds 7.694072e+00\n",
" read-parquet I/O seconds 2.280538e+03\n",
" compress seconds 1.787037e+00\n",
" deserialize seconds 2.358927e-02\n",
" disk-write bytes 3.919784e+08\n",
" count 2.100000e+01\n",
" seconds 2.059484e-01\n",
" other seconds 4.874290e+01\n",
" serialize seconds 4.412802e-02\n",
" thread-cpu seconds 0.000000e+00\n",
" thread-noncpu seconds 1.003523e-01\n",
" rename compress seconds 1.310685e+00\n",
" deserialize seconds 4.025434e-02\n",
" disk-write bytes 2.735803e+08\n",
" count 1.200000e+01\n",
" seconds 2.001524e-01\n",
" memory-read bytes 3.723729e+11\n",
" count 1.732000e+03\n",
" other seconds 1.429023e+02\n",
" serialize seconds 2.803826e-02\n",
" thread-cpu seconds 2.028315e+02\n",
" thread-noncpu seconds 2.777587e+02\n",
" repartition compress seconds 2.022075e+00\n",
" deserialize seconds 1.382049e-02\n",
" disk-write bytes 4.198203e+08\n",
" count 2.700000e+01\n",
" seconds 2.140520e-01\n",
" memory-read bytes 8.198742e+10\n",
" count 1.294000e+03\n",
" other seconds 2.047477e+01\n",
" serialize seconds 3.934864e-02\n",
" thread-cpu seconds 6.999167e+01\n",
" thread-noncpu seconds 2.744740e+01\n",
" repartition-split compress seconds 2.732344e+00\n",
" deserialize seconds 8.215349e-03\n",
" disk-write bytes 5.980626e+08\n",
" count 3.400000e+01\n",
" seconds 3.062755e-01\n",
" memory-read bytes 4.649859e+11\n",
" count 1.294000e+03\n",
" other seconds 1.319431e+01\n",
" serialize seconds 7.675315e-02\n",
" thread-cpu seconds 6.208843e-03\n",
" thread-noncpu seconds 2.789240e-02\n",
" replace deserialize seconds 1.007822e-02\n",
" memory-read bytes 1.359723e+10\n",
" count 8.660000e+02\n",
" other seconds 1.772804e+01\n",
" thread-cpu seconds 4.456288e+01\n",
" thread-noncpu seconds 1.729192e+01\n",
" split compress seconds 2.689857e+01\n",
" deserialize seconds 8.523750e-04\n",
" disk-write bytes 6.223999e+09\n",
" count 3.450000e+02\n",
" seconds 3.094027e+00\n",
" memory-read bytes 4.098688e+10\n",
" count 4.500000e+01\n",
" other seconds 5.599360e+00\n",
" serialize seconds 6.142642e-01\n",
" thread-cpu seconds 6.445540e-02\n",
" thread-noncpu seconds 2.589523e-02\n",
" str_cat deserialize seconds 1.654157e-02\n",
" memory-read bytes 7.512703e+10\n",
" count 1.732000e+03\n",
" other seconds 6.471378e+01\n",
" thread-cpu seconds 6.781931e+02\n",
" thread-noncpu seconds 9.797700e+01\n",
" sub deserialize seconds 3.815400e-05\n",
" memory-read bytes 3.200000e+01\n",
" count 1.000000e+00\n",
" other seconds 1.622431e-03\n",
" thread-cpu seconds 5.666000e-06\n",
" thread-noncpu seconds 2.032163e-05\n",
" total_mem_usage I/O seconds 2.919971e+03\n",
" deserialize seconds 3.251624e+00\n",
" memory-read bytes 6.502950e+10\n",
" count 8.660000e+02\n",
" other seconds 1.538037e+01\n",
" thread-cpu seconds 1.386170e+00\n",
" thread-noncpu seconds 2.262479e+00\n",
"gather-dep busy seconds 1.905684e+00\n",
" cancelled seconds 2.891501e+02\n",
" compress seconds 6.130827e-01\n",
" decompress seconds 1.296874e+02\n",
" deserialize seconds 1.180477e+01\n",
" disk-write bytes 1.237622e+08\n",
" count 7.000000e+00\n",
" seconds 6.144308e-02\n",
" missing seconds 2.871231e+00\n",
" network seconds 1.084494e+03\n",
" other seconds 7.154693e+01\n",
" serialize seconds 1.452420e-02\n",
"get-data compress seconds 1.499470e+02\n",
" decompress seconds 8.457677e+00\n",
" deserialize seconds 1.146060e-01\n",
" disk-read bytes 1.298182e+09\n",
" count 2.820000e+02\n",
" seconds 1.565931e+00\n",
" memory-read bytes 6.626876e+10\n",
" count 4.898000e+03\n",
" network seconds 1.244312e+03\n",
" serialize seconds 4.049585e+01\n",
"memory-monitor compress seconds 6.887015e+01\n",
" disk-write bytes 1.118508e+10\n",
" count 6.300000e+02\n",
" seconds 9.565273e+00\n",
" serialize seconds 1.538035e+00"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df"
]
},
{
"cell_type": "markdown",
"id": "a507b4a3",
"metadata": {},
"source": [
"### Breakdown of execute()\n",
"\n",
"We're adding to the metrics an idle time, defined as follows:\n",
"\n",
"```\n",
"idle threads \n",
"= end-to-end runtime as seen from the client \n",
"* number of cluster threads\n",
"- sum of all other timings \n",
"```\n",
"Idle time can be caused by:\n",
"- client<->scheduler comm time\n",
"- initial decision time from the scheduler\n",
"- the algorithm having many serial chokepoints\n",
"- scheduler not being fast enough to push tasks to the workers\n",
"\n",
"The only \"good\" times are thread-cpu and I/O.\n",
"\n",
"We can see that we're wasting **half** of the cluster time!!!\n",
"\n",
"Besides idle time, other glaring sources of waste are:\n",
"- \"other\": this is highlighting a slow event loop and/or a high overhead from the thread pool.\n",
"- \"thread-noncpu\": either the GIL or I/O - read below for analysis"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "49022bd2",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<AxesSubplot: ylabel='value'>"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"df2 = df.reset_index()\n",
"execute_actions = df2[\n",
" (df2.activity == \"execute\") & (df2.unit == \"seconds\")\n",
"].groupby(\"action\").sum(numeric_only=True).value\n",
"execute_actions[\"idle threads\"] = (t1 - t0) * 40 - sum(execute_actions)\n",
"execute_actions.sort_values().plot.pie()"
]
},
{
"cell_type": "code",
"execution_count": 20,
"id": "4fd6d25c",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"action\n",
"serialize 0.000069\n",
"disk-write 0.000354\n",
"deserialize 0.000400\n",
"disk-read 0.001481\n",
"compress 0.003034\n",
"decompress 0.007442\n",
"other 0.109534\n",
"idle threads 0.127391\n",
"thread-noncpu 0.183854\n",
"thread-cpu 0.211252\n",
"I/O 0.355190\n",
"Name: value, dtype: float64"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"(execute_actions / execute_actions.sum()).sort_values()"
]
},
{
"cell_type": "markdown",
"id": "20b0138f",
"metadata": {},
"source": [
"### Investigate thread-noncpu\n",
"Which task prefixes are most responsible for non-CPU time while executing threads?\n",
"Did we forget to tag an I/O task in dask/dask?\n",
"The breakdown below says we don't - which is a fairly clear indicator of **death by GIL**."
]
},
{
"cell_type": "code",
"execution_count": 21,
"id": "e30c8ae5",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th>action</th>\n",
" <th>thread-cpu</th>\n",
" <th>thread-noncpu</th>\n",
" <th>noncpu %</th>\n",
" </tr>\n",
" <tr>\n",
" <th>prefix</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>from_pandas</th>\n",
" <td>0.000004</td>\n",
" <td>0.000014</td>\n",
" <td>0.804154</td>\n",
" </tr>\n",
" <tr>\n",
" <th>add</th>\n",
" <td>0.000003</td>\n",
" <td>0.000015</td>\n",
" <td>0.840505</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mul</th>\n",
" <td>0.000004</td>\n",
" <td>0.000018</td>\n",
" <td>0.826697</td>\n",
" </tr>\n",
" <tr>\n",
" <th>sub</th>\n",
" <td>0.000006</td>\n",
" <td>0.000020</td>\n",
" <td>0.781973</td>\n",
" </tr>\n",
" <tr>\n",
" <th>split</th>\n",
" <td>0.064455</td>\n",
" <td>0.025895</td>\n",
" <td>0.286608</td>\n",
" </tr>\n",
" <tr>\n",
" <th>repartition-split</th>\n",
" <td>0.006209</td>\n",
" <td>0.027892</td>\n",
" <td>0.817929</td>\n",
" </tr>\n",
" <tr>\n",
" <th>read-parquet</th>\n",
" <td>0.000000</td>\n",
" <td>0.100352</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>getitem</th>\n",
" <td>0.654180</td>\n",
" <td>0.109701</td>\n",
" <td>0.143611</td>\n",
" </tr>\n",
" <tr>\n",
" <th>get-categories-chunk</th>\n",
" <td>1.461580</td>\n",
" <td>0.392220</td>\n",
" <td>0.211576</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ge</th>\n",
" <td>1.376355</td>\n",
" <td>1.409568</td>\n",
" <td>0.505961</td>\n",
" </tr>\n",
" <tr>\n",
" <th>isnull</th>\n",
" <td>1.916485</td>\n",
" <td>1.843588</td>\n",
" <td>0.490306</td>\n",
" </tr>\n",
" <tr>\n",
" <th>total_mem_usage</th>\n",
" <td>1.386170</td>\n",
" <td>2.262479</td>\n",
" <td>0.620087</td>\n",
" </tr>\n",
" <tr>\n",
" <th>le</th>\n",
" <td>2.143544</td>\n",
" <td>3.932911</td>\n",
" <td>0.647238</td>\n",
" </tr>\n",
" <tr>\n",
" <th>get-categories-agg</th>\n",
" <td>0.490242</td>\n",
" <td>4.559618</td>\n",
" <td>0.902920</td>\n",
" </tr>\n",
" <tr>\n",
" <th>quantiles</th>\n",
" <td>19.467193</td>\n",
" <td>7.694072</td>\n",
" <td>0.283274</td>\n",
" </tr>\n",
" <tr>\n",
" <th>fillna</th>\n",
" <td>9.035824</td>\n",
" <td>9.276757</td>\n",
" <td>0.506578</td>\n",
" </tr>\n",
" <tr>\n",
" <th>dt-month</th>\n",
" <td>61.667623</td>\n",
" <td>15.566725</td>\n",
" <td>0.201552</td>\n",
" </tr>\n",
" <tr>\n",
" <th>replace</th>\n",
" <td>44.562879</td>\n",
" <td>17.291919</td>\n",
" <td>0.279557</td>\n",
" </tr>\n",
" <tr>\n",
" <th>and</th>\n",
" <td>3.873119</td>\n",
" <td>17.902204</td>\n",
" <td>0.822133</td>\n",
" </tr>\n",
" <tr>\n",
" <th>dt-hour</th>\n",
" <td>61.936668</td>\n",
" <td>19.326843</td>\n",
" <td>0.237829</td>\n",
" </tr>\n",
" <tr>\n",
" <th>dt-dayofweek</th>\n",
" <td>73.340177</td>\n",
" <td>20.227170</td>\n",
" <td>0.216178</td>\n",
" </tr>\n",
" <tr>\n",
" <th>repartition</th>\n",
" <td>69.991666</td>\n",
" <td>27.447395</td>\n",
" <td>0.281688</td>\n",
" </tr>\n",
" <tr>\n",
" <th>assign</th>\n",
" <td>85.645224</td>\n",
" <td>50.763689</td>\n",
" <td>0.372143</td>\n",
" </tr>\n",
" <tr>\n",
" <th>categorize_block</th>\n",
" <td>118.911112</td>\n",
" <td>51.995866</td>\n",
" <td>0.304235</td>\n",
" </tr>\n",
" <tr>\n",
" <th>dropna</th>\n",
" <td>73.340234</td>\n",
" <td>65.049620</td>\n",
" <td>0.470046</td>\n",
" </tr>\n",
" <tr>\n",
" <th>drop_by_shallow_copy</th>\n",
" <td>146.932939</td>\n",
" <td>67.861184</td>\n",
" <td>0.315936</td>\n",
" </tr>\n",
" <tr>\n",
" <th>str_cat</th>\n",
" <td>678.193138</td>\n",
" <td>97.977001</td>\n",
" <td>0.126231</td>\n",
" </tr>\n",
" <tr>\n",
" <th>loc-series</th>\n",
" <td>91.568525</td>\n",
" <td>137.458958</td>\n",
" <td>0.600185</td>\n",
" </tr>\n",
" <tr>\n",
" <th>rename</th>\n",
" <td>202.831485</td>\n",
" <td>277.758741</td>\n",
" <td>0.577953</td>\n",
" </tr>\n",
" <tr>\n",
" <th>astype</th>\n",
" <td>448.175915</td>\n",
" <td>630.459415</td>\n",
" <td>0.584497</td>\n",
" </tr>\n",
" <tr>\n",
" <th>merge_chunk</th>\n",
" <td>894.067032</td>\n",
" <td>1163.168770</td>\n",
" <td>0.565404</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
"action thread-cpu thread-noncpu noncpu %\n",
"prefix \n",
"from_pandas 0.000004 0.000014 0.804154\n",
"add 0.000003 0.000015 0.840505\n",
"mul 0.000004 0.000018 0.826697\n",
"sub 0.000006 0.000020 0.781973\n",
"split 0.064455 0.025895 0.286608\n",
"repartition-split 0.006209 0.027892 0.817929\n",
"read-parquet 0.000000 0.100352 1.000000\n",
"getitem 0.654180 0.109701 0.143611\n",
"get-categories-chunk 1.461580 0.392220 0.211576\n",
"ge 1.376355 1.409568 0.505961\n",
"isnull 1.916485 1.843588 0.490306\n",
"total_mem_usage 1.386170 2.262479 0.620087\n",
"le 2.143544 3.932911 0.647238\n",
"get-categories-agg 0.490242 4.559618 0.902920\n",
"quantiles 19.467193 7.694072 0.283274\n",
"fillna 9.035824 9.276757 0.506578\n",
"dt-month 61.667623 15.566725 0.201552\n",
"replace 44.562879 17.291919 0.279557\n",
"and 3.873119 17.902204 0.822133\n",
"dt-hour 61.936668 19.326843 0.237829\n",
"dt-dayofweek 73.340177 20.227170 0.216178\n",
"repartition 69.991666 27.447395 0.281688\n",
"assign 85.645224 50.763689 0.372143\n",
"categorize_block 118.911112 51.995866 0.304235\n",
"dropna 73.340234 65.049620 0.470046\n",
"drop_by_shallow_copy 146.932939 67.861184 0.315936\n",
"str_cat 678.193138 97.977001 0.126231\n",
"loc-series 91.568525 137.458958 0.600185\n",
"rename 202.831485 277.758741 0.577953\n",
"astype 448.175915 630.459415 0.584497\n",
"merge_chunk 894.067032 1163.168770 0.565404"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"cpu_to_noncpu = (\n",
" df2[(df2.action == \"thread-noncpu\") | (df2.action == \"thread-cpu\")]\n",
" .groupby([\"prefix\", \"action\"])\n",
" .sum(numeric_only=True)\n",
" .reset_index()\n",
" .pivot(index=\"prefix\", columns=\"action\", values=\"value\")\n",
")\n",
"cpu_to_noncpu[\"noncpu %\"] = cpu_to_noncpu[\"thread-noncpu\"] / cpu_to_noncpu.sum(axis=1)\n",
"cpu_to_noncpu = cpu_to_noncpu.sort_values(\"thread-noncpu\")\n",
"cpu_to_noncpu"
]
},
{
"cell_type": "markdown",
"id": "86706085",
"metadata": {},
"source": [
"### Investigate task runtime\n",
"In which tasks are we spending the most time (CPU, I/O, and waste all together)?"
]
},
{
"cell_type": "code",
"execution_count": 22,
"id": "7a57fec8",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<AxesSubplot: ylabel='value'>"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"df2[\n",
" (df2.activity == \"execute\") & (df2.unit == \"seconds\")\n",
"].groupby([\"prefix\"]).sum(numeric_only=True).value.sort_values().plot.pie()"
]
},
{
"cell_type": "markdown",
"id": "4bf7c9bb",
"metadata": {},
"source": [
"### Unspill activity\n",
"How frequently are execute and get-data unspilling keys?\n",
"Below we have counts of tasks being read."
]
},
{
"cell_type": "code",
"execution_count": 24,
"id": "57b4fc31",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th>action</th>\n",
" <th>disk-read</th>\n",
" <th>memory-read</th>\n",
" <th>cache hit ratio</th>\n",
" </tr>\n",
" <tr>\n",
" <th>activity</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>execute</th>\n",
" <td>898</td>\n",
" <td>47696</td>\n",
" <td>0.98152</td>\n",
" </tr>\n",
" <tr>\n",
" <th>get-data</th>\n",
" <td>282</td>\n",
" <td>4898</td>\n",
" <td>0.94556</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
"action disk-read memory-read cache hit ratio\n",
"activity \n",
"execute 898 47696 0.98152\n",
"get-data 282 4898 0.94556"
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"cache_hits = df2[\n",
" ((df2.action == \"memory-read\") | (df2.action == \"disk-read\"))\n",
" & (df2.unit == \"count\")\n",
"].groupby([\"activity\", \"action\"]).sum(numeric_only=True).reset_index().pivot(\n",
" index=\"activity\", columns=\"action\", values=\"value\"\n",
").astype(int)\n",
"cache_hits[\"cache hit ratio\"] = cache_hits[\"memory-read\"] / cache_hits.sum(axis=1)\n",
"cache_hits"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "2ecf6f69",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.8"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment