Skip to content

Instantly share code, notes, and snippets.

@pybokeh
Last active October 12, 2023 11:17
Show Gist options
  • Save pybokeh/9fd661dd3c430da2a8dcbb65c8e3d007 to your computer and use it in GitHub Desktop.
Save pybokeh/9fd661dd3c430da2a8dcbb65c8e3d007 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "39d87995-69f3-4e9e-8ecd-1f1a3c1ecdba",
"metadata": {},
"outputs": [],
"source": [
"import ibis\n",
"import ibis.selectors as s\n",
"import pandas as pd\n",
"from ibis import _\n",
"ibis.options.interactive = True\n",
"\n",
"# create a DuckDB client\n",
"client = ibis.duckdb.connect()"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "025881ff-f154-49ae-92ca-2bf73dac05e0",
"metadata": {},
"outputs": [],
"source": [
"failures = client.read_csv('data/rivet_failures.csv')"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "0a29e9ba-6282-48f4-ab08-9b38ed881051",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">┏━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┓\n",
"┃<span style=\"font-weight: bold\"> serial_number </span>┃<span style=\"font-weight: bold\"> failure_time_minutes </span>┃<span style=\"font-weight: bold\"> failure_mode </span>┃\n",
"┡━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━┩\n",
"│ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">int64</span> │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">int64</span> │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">string</span> │\n",
"├───────────────┼──────────────────────┼────────────────┤\n",
"│ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1</span> │ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">90</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">Flair Failure </span> │\n",
"│ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">2</span> │ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">96</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">Flair Failure </span> │\n",
"│ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3</span> │ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">100</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">Flair loosened</span> │\n",
"│ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">4</span> │ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">30</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">Flair Failure </span> │\n",
"│ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">5</span> │ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">49</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">Flair Failure </span> │\n",
"│ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">6</span> │ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">45</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">Flair loosened</span> │\n",
"│ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">7</span> │ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">10</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">Lug failed </span> │\n",
"│ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">8</span> │ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">82</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">Flair Failure </span> │\n",
"└───────────────┴──────────────────────┴────────────────┘\n",
"</pre>\n"
],
"text/plain": [
"┏━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┓\n",
"┃\u001b[1m \u001b[0m\u001b[1mserial_number\u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mfailure_time_minutes\u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mfailure_mode\u001b[0m\u001b[1m \u001b[0m\u001b[1m \u001b[0m┃\n",
"┡━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━┩\n",
"│ \u001b[2mint64\u001b[0m │ \u001b[2mint64\u001b[0m │ \u001b[2mstring\u001b[0m │\n",
"├───────────────┼──────────────────────┼────────────────┤\n",
"│ \u001b[1;36m1\u001b[0m │ \u001b[1;36m90\u001b[0m │ \u001b[32mFlair Failure \u001b[0m │\n",
"│ \u001b[1;36m2\u001b[0m │ \u001b[1;36m96\u001b[0m │ \u001b[32mFlair Failure \u001b[0m │\n",
"│ \u001b[1;36m3\u001b[0m │ \u001b[1;36m100\u001b[0m │ \u001b[32mFlair loosened\u001b[0m │\n",
"│ \u001b[1;36m4\u001b[0m │ \u001b[1;36m30\u001b[0m │ \u001b[32mFlair Failure \u001b[0m │\n",
"│ \u001b[1;36m5\u001b[0m │ \u001b[1;36m49\u001b[0m │ \u001b[32mFlair Failure \u001b[0m │\n",
"│ \u001b[1;36m6\u001b[0m │ \u001b[1;36m45\u001b[0m │ \u001b[32mFlair loosened\u001b[0m │\n",
"│ \u001b[1;36m7\u001b[0m │ \u001b[1;36m10\u001b[0m │ \u001b[32mLug failed \u001b[0m │\n",
"│ \u001b[1;36m8\u001b[0m │ \u001b[1;36m82\u001b[0m │ \u001b[32mFlair Failure \u001b[0m │\n",
"└───────────────┴──────────────────────┴────────────────┘"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"failures"
]
},
{
"cell_type": "markdown",
"id": "109d7e6b-1c26-4ed2-a9ef-ce5c38601bb9",
"metadata": {},
"source": [
"I need to create or add 4 additional columns:\n",
"\n",
"- Add `status` column to represent status of failure. Valid values are \"FAILED\" or \"SUSPENDED\"\n",
"- Add `rank` column to represent rank of failure unit by failure time sorted in ascending order\n",
"- Add `reverse_rank` column to represent the rank in reverse\n",
"- Add `adjusted_rank` column to represent adjusted rank"
]
},
{
"cell_type": "markdown",
"id": "6fee9bfa-8f75-4f2d-8b74-1ef18114f47b",
"metadata": {},
"source": [
"Where `adjusted_rank` is based on the equation below:\n",
"\n",
"<center>$\\large{Adjusted Rank = \\frac{(Reverse Rank)(Previous AdjustedRank)+(N+1)}{(Reverse Rank)+1}}$</center>\n",
"\n",
"where N equals the number of failures regardless of failure mode."
]
},
{
"cell_type": "markdown",
"id": "411d9391-afc2-4dd0-a5b3-b9bb2ebcf100",
"metadata": {},
"source": [
"Using ibis, I can add the first 3 columns: `status`, `rank`, and `reverse_rank`"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "b53a8df2-6ff7-4a34-b6b6-2a3cdb9a73d3",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">┏━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━┳━━━━━━━━━━━━━━┓\n",
"┃<span style=\"font-weight: bold\"> serial_number </span>┃<span style=\"font-weight: bold\"> failure_time_minutes </span>┃<span style=\"font-weight: bold\"> failure_mode </span>┃<span style=\"font-weight: bold\"> status </span>┃<span style=\"font-weight: bold\"> rank </span>┃<span style=\"font-weight: bold\"> reverse_rank </span>┃\n",
"┡━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━╇━━━━━━━━━━━━━━┩\n",
"│ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">int64</span> │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">int64</span> │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">string</span> │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">string</span> │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">int64</span> │ <span style=\"color: #7f7f7f; text-decoration-color: #7f7f7f\">int64</span> │\n",
"├───────────────┼──────────────────────┼────────────────┼───────────┼───────┼──────────────┤\n",
"│ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">7</span> │ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">10</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">Lug failed </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">SUSPENDED</span> │ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1</span> │ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">8</span> │\n",
"│ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">4</span> │ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">30</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">Flair Failure </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">FAILED </span> │ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">2</span> │ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">7</span> │\n",
"│ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">6</span> │ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">45</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">Flair loosened</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">SUSPENDED</span> │ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3</span> │ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">6</span> │\n",
"│ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">5</span> │ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">49</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">Flair Failure </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">FAILED </span> │ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">4</span> │ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">5</span> │\n",
"│ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">8</span> │ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">82</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">Flair Failure </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">FAILED </span> │ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">5</span> │ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">4</span> │\n",
"│ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1</span> │ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">90</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">Flair Failure </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">FAILED </span> │ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">6</span> │ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3</span> │\n",
"│ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">2</span> │ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">96</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">Flair Failure </span> │ <span style=\"color: #008000; text-decoration-color: #008000\">FAILED </span> │ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">7</span> │ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">2</span> │\n",
"│ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3</span> │ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">100</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">Flair loosened</span> │ <span style=\"color: #008000; text-decoration-color: #008000\">SUSPENDED</span> │ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">8</span> │ <span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">1</span> │\n",
"└───────────────┴──────────────────────┴────────────────┴───────────┴───────┴──────────────┘\n",
"</pre>\n"
],
"text/plain": [
"┏━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━┳━━━━━━━━━━━━━━┓\n",
"┃\u001b[1m \u001b[0m\u001b[1mserial_number\u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mfailure_time_minutes\u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mfailure_mode\u001b[0m\u001b[1m \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mstatus\u001b[0m\u001b[1m \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mrank\u001b[0m\u001b[1m \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mreverse_rank\u001b[0m\u001b[1m \u001b[0m┃\n",
"┡━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━╇━━━━━━━━━━━━━━┩\n",
"│ \u001b[2mint64\u001b[0m │ \u001b[2mint64\u001b[0m │ \u001b[2mstring\u001b[0m │ \u001b[2mstring\u001b[0m │ \u001b[2mint64\u001b[0m │ \u001b[2mint64\u001b[0m │\n",
"├───────────────┼──────────────────────┼────────────────┼───────────┼───────┼──────────────┤\n",
"│ \u001b[1;36m7\u001b[0m │ \u001b[1;36m10\u001b[0m │ \u001b[32mLug failed \u001b[0m │ \u001b[32mSUSPENDED\u001b[0m │ \u001b[1;36m1\u001b[0m │ \u001b[1;36m8\u001b[0m │\n",
"│ \u001b[1;36m4\u001b[0m │ \u001b[1;36m30\u001b[0m │ \u001b[32mFlair Failure \u001b[0m │ \u001b[32mFAILED \u001b[0m │ \u001b[1;36m2\u001b[0m │ \u001b[1;36m7\u001b[0m │\n",
"│ \u001b[1;36m6\u001b[0m │ \u001b[1;36m45\u001b[0m │ \u001b[32mFlair loosened\u001b[0m │ \u001b[32mSUSPENDED\u001b[0m │ \u001b[1;36m3\u001b[0m │ \u001b[1;36m6\u001b[0m │\n",
"│ \u001b[1;36m5\u001b[0m │ \u001b[1;36m49\u001b[0m │ \u001b[32mFlair Failure \u001b[0m │ \u001b[32mFAILED \u001b[0m │ \u001b[1;36m4\u001b[0m │ \u001b[1;36m5\u001b[0m │\n",
"│ \u001b[1;36m8\u001b[0m │ \u001b[1;36m82\u001b[0m │ \u001b[32mFlair Failure \u001b[0m │ \u001b[32mFAILED \u001b[0m │ \u001b[1;36m5\u001b[0m │ \u001b[1;36m4\u001b[0m │\n",
"│ \u001b[1;36m1\u001b[0m │ \u001b[1;36m90\u001b[0m │ \u001b[32mFlair Failure \u001b[0m │ \u001b[32mFAILED \u001b[0m │ \u001b[1;36m6\u001b[0m │ \u001b[1;36m3\u001b[0m │\n",
"│ \u001b[1;36m2\u001b[0m │ \u001b[1;36m96\u001b[0m │ \u001b[32mFlair Failure \u001b[0m │ \u001b[32mFAILED \u001b[0m │ \u001b[1;36m7\u001b[0m │ \u001b[1;36m2\u001b[0m │\n",
"│ \u001b[1;36m3\u001b[0m │ \u001b[1;36m100\u001b[0m │ \u001b[32mFlair loosened\u001b[0m │ \u001b[32mSUSPENDED\u001b[0m │ \u001b[1;36m8\u001b[0m │ \u001b[1;36m1\u001b[0m │\n",
"└───────────────┴──────────────────────┴────────────────┴───────────┴───────┴──────────────┘"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"(\n",
" failures\n",
" .mutate(\n",
" status=(\n",
" ibis.case()\n",
" .when(_.failure_mode == 'Flair Failure', 'FAILED')\n",
" .else_('SUSPENDED')\n",
" .end()\n",
" )\n",
" )\n",
" .order_by(_.failure_time_minutes)\n",
" .mutate(rank=ibis.row_number()+1)\n",
" .mutate(reverse_rank=failures.count()+1 - _.rank)\n",
")"
]
},
{
"cell_type": "markdown",
"id": "e63218c1-8c15-4625-ae33-6fe649b1d543",
"metadata": {},
"source": [
"But, I don't know how to add the fourth column `adjusted_rank` using ibis. I'm guessing I need to use ibis UDF?"
]
},
{
"cell_type": "markdown",
"id": "e7ff11c7-5f60-431b-80ec-42b112dbaa87",
"metadata": {},
"source": [
"Using pandas, I can add or create `adjusted_rank` column. First, I'll convert my ibis table expression to a pandas dataframe:"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "71bf6670-7699-4f14-a8c5-061d8aad2885",
"metadata": {},
"outputs": [],
"source": [
"pdf = (\n",
" failures\n",
" .mutate(\n",
" status=(\n",
" ibis.case()\n",
" .when(_.failure_mode == 'Flair Failure', 'FAILED')\n",
" .else_('SUSPENDED')\n",
" .end()\n",
" )\n",
" )\n",
" .order_by(_.failure_time_minutes)\n",
" .mutate(rank=ibis.row_number()+1)\n",
" .mutate(reverse_rank=failures.count()+1 - _.rank)\n",
").to_pandas()"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "3b08a438-c710-41a4-a922-bbb9b666b996",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>serial_number</th>\n",
" <th>failure_time_minutes</th>\n",
" <th>failure_mode</th>\n",
" <th>status</th>\n",
" <th>rank</th>\n",
" <th>reverse_rank</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>7</td>\n",
" <td>10</td>\n",
" <td>Lug failed</td>\n",
" <td>SUSPENDED</td>\n",
" <td>1</td>\n",
" <td>8</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>4</td>\n",
" <td>30</td>\n",
" <td>Flair Failure</td>\n",
" <td>FAILED</td>\n",
" <td>2</td>\n",
" <td>7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>6</td>\n",
" <td>45</td>\n",
" <td>Flair loosened</td>\n",
" <td>SUSPENDED</td>\n",
" <td>3</td>\n",
" <td>6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>5</td>\n",
" <td>49</td>\n",
" <td>Flair Failure</td>\n",
" <td>FAILED</td>\n",
" <td>4</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>8</td>\n",
" <td>82</td>\n",
" <td>Flair Failure</td>\n",
" <td>FAILED</td>\n",
" <td>5</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>1</td>\n",
" <td>90</td>\n",
" <td>Flair Failure</td>\n",
" <td>FAILED</td>\n",
" <td>6</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>2</td>\n",
" <td>96</td>\n",
" <td>Flair Failure</td>\n",
" <td>FAILED</td>\n",
" <td>7</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>3</td>\n",
" <td>100</td>\n",
" <td>Flair loosened</td>\n",
" <td>SUSPENDED</td>\n",
" <td>8</td>\n",
" <td>1</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" serial_number failure_time_minutes failure_mode status rank \\\n",
"0 7 10 Lug failed SUSPENDED 1 \n",
"1 4 30 Flair Failure FAILED 2 \n",
"2 6 45 Flair loosened SUSPENDED 3 \n",
"3 5 49 Flair Failure FAILED 4 \n",
"4 8 82 Flair Failure FAILED 5 \n",
"5 1 90 Flair Failure FAILED 6 \n",
"6 2 96 Flair Failure FAILED 7 \n",
"7 3 100 Flair loosened SUSPENDED 8 \n",
"\n",
" reverse_rank \n",
"0 8 \n",
"1 7 \n",
"2 6 \n",
"3 5 \n",
"4 4 \n",
"5 3 \n",
"6 2 \n",
"7 1 "
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pdf"
]
},
{
"cell_type": "markdown",
"id": "abe993f1-9233-46bc-8b98-c70496109b04",
"metadata": {},
"source": [
"<center>$\\large{AdjustedRank = \\frac{(Reverse Rank)(Previous AdjustedRank)+(N+1)}{(Reverse Rank)+1}}$</center>"
]
},
{
"cell_type": "markdown",
"id": "b4371a9b-11c0-46bf-bd3d-09b627bccb26",
"metadata": {},
"source": [
"Below is custom function to create or add `adjusted_rank` column using pandas idiom:"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "ed0262b3-7491-43d7-a274-c7545fea3b2c",
"metadata": {},
"outputs": [],
"source": [
"def add_adjusted_rank(df: pd.DataFrame, col_status: str, col_rev_rank: str):\n",
" \"\"\"\n",
" Adds adjusted rank column\n",
"\n",
" Parameters\n",
" ----------\n",
" df : pd.DataFrame\n",
" pandas dataframe containing failure data\n",
" col_status: str\n",
" column containing the status of the unit. Must only contain \"FAILED\" or \"SUSPENDED\"\n",
" col_rev_rank : str\n",
" column containing the reverse rank\n",
" \"\"\"\n",
"\n",
" # Previous adjusted rank initialized to zero\n",
" prev_adj_rank = [0]\n",
" \n",
" def adj_rank(series):\n",
" if series[col_status] == \"SUSPENDED\":\n",
" return \"SUSPENSION\"\n",
" else:\n",
" adjusted_rank = (series[col_rev_rank] * 1.0 * prev_adj_rank[0] + (len(df) + 1))/(series[col_rev_rank] + 1)\n",
" # Update previous adjusted rank to the current adjusted rank\n",
" prev_adj_rank[0] = adjusted_rank\n",
" return adjusted_rank\n",
"\n",
" df = df.assign(adjusted_rank=df.apply(adj_rank, axis=1))\n",
"\n",
" return df"
]
},
{
"cell_type": "markdown",
"id": "e00c2ff9-994e-4018-b428-91b89950cc79",
"metadata": {},
"source": [
"Below is what I get using the custom function:"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "004ee5f3-8f3f-4346-846d-de58244395e9",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>serial_number</th>\n",
" <th>failure_time_minutes</th>\n",
" <th>failure_mode</th>\n",
" <th>status</th>\n",
" <th>rank</th>\n",
" <th>reverse_rank</th>\n",
" <th>adjusted_rank</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>7</td>\n",
" <td>10</td>\n",
" <td>Lug failed</td>\n",
" <td>SUSPENDED</td>\n",
" <td>1</td>\n",
" <td>8</td>\n",
" <td>SUSPENSION</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>4</td>\n",
" <td>30</td>\n",
" <td>Flair Failure</td>\n",
" <td>FAILED</td>\n",
" <td>2</td>\n",
" <td>7</td>\n",
" <td>1.125</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>6</td>\n",
" <td>45</td>\n",
" <td>Flair loosened</td>\n",
" <td>SUSPENDED</td>\n",
" <td>3</td>\n",
" <td>6</td>\n",
" <td>SUSPENSION</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>5</td>\n",
" <td>49</td>\n",
" <td>Flair Failure</td>\n",
" <td>FAILED</td>\n",
" <td>4</td>\n",
" <td>5</td>\n",
" <td>2.4375</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>8</td>\n",
" <td>82</td>\n",
" <td>Flair Failure</td>\n",
" <td>FAILED</td>\n",
" <td>5</td>\n",
" <td>4</td>\n",
" <td>3.75</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>1</td>\n",
" <td>90</td>\n",
" <td>Flair Failure</td>\n",
" <td>FAILED</td>\n",
" <td>6</td>\n",
" <td>3</td>\n",
" <td>5.0625</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>2</td>\n",
" <td>96</td>\n",
" <td>Flair Failure</td>\n",
" <td>FAILED</td>\n",
" <td>7</td>\n",
" <td>2</td>\n",
" <td>6.375</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>3</td>\n",
" <td>100</td>\n",
" <td>Flair loosened</td>\n",
" <td>SUSPENDED</td>\n",
" <td>8</td>\n",
" <td>1</td>\n",
" <td>SUSPENSION</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" serial_number failure_time_minutes failure_mode status rank \\\n",
"0 7 10 Lug failed SUSPENDED 1 \n",
"1 4 30 Flair Failure FAILED 2 \n",
"2 6 45 Flair loosened SUSPENDED 3 \n",
"3 5 49 Flair Failure FAILED 4 \n",
"4 8 82 Flair Failure FAILED 5 \n",
"5 1 90 Flair Failure FAILED 6 \n",
"6 2 96 Flair Failure FAILED 7 \n",
"7 3 100 Flair loosened SUSPENDED 8 \n",
"\n",
" reverse_rank adjusted_rank \n",
"0 8 SUSPENSION \n",
"1 7 1.125 \n",
"2 6 SUSPENSION \n",
"3 5 2.4375 \n",
"4 4 3.75 \n",
"5 3 5.0625 \n",
"6 2 6.375 \n",
"7 1 SUSPENSION "
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"add_adjusted_rank(pdf, 'status', 'reverse_rank')"
]
},
{
"cell_type": "markdown",
"id": "248e9fca-febe-4287-80e5-514e6b5a672b",
"metadata": {},
"source": [
"I need to be able to accomplish creating this `adjusted_rank` column using ibis. I'm assuming perhaps I need to look into using ibis' UDF. I looked at the [documentation](https://ibis-project.org/reference/scalar-udfs) for UDFs, but I'm still not sure how to use ibis' UDF using duckdb backend to create this `adjusted_rank` column."
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Py3.11 (eda_dev)",
"language": "python",
"name": "eda_dev"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.5"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
@cpcloud
Copy link

cpcloud commented Oct 12, 2023

Here's a version that uses an Ibis UDF:

import pyarrow as pa

import ibis
from ibis import _, udf

ibis.options.interactive = True

client = ibis.duckdb.connect()
failures = client.read_csv("/data/rivet_failures.csv")


@udf.scalar.pyarrow
def adjusted_rank(n: int, col_status: str, col_rev_rank: int) -> float:
    # Previous adjusted rank initialized to zero
    prev_adj_rank = [0]

    def adj_rank(n, status, rev_rank):
        if status == "SUSPENDED":
            return None
        else:
            adjusted_rank = (rev_rank * prev_adj_rank[0] + (n + 1)) / (rev_rank + 1)
            # Update previous adjusted rank to the current adjusted rank
            prev_adj_rank[0] = adjusted_rank
            return adjusted_rank

    return pa.array(
        map(adj_rank, n.to_numpy(), col_status.to_numpy(), col_rev_rank.to_numpy())
    )


ranks = (
    failures.mutate(
        status=(
            ibis.case()
            .when(_.failure_mode == "Flair Failure", "FAILED")
            .else_("SUSPENDED")
            .end()
        )
    )
    .order_by(_.failure_time_minutes)
    .mutate(rank=ibis.row_number() + 1)
    .mutate(reverse_rank=failures.count() + 1 - _.rank)
    .mutate(adjusted_rank=adjusted_rank(_.count(), _.status, _.reverse_rank))
)

print(ranks)

which gives

┏━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓
┃ serial_number ┃ failure_time_minutes ┃ failure_mode   ┃ status    ┃ rank  ┃ reverse_rank ┃ adjusted_rank ┃
┡━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩
│ int64         │ int64                │ string         │ string    │ int64 │ int64        │ float64       │
├───────────────┼──────────────────────┼────────────────┼───────────┼───────┼──────────────┼───────────────┤
│             7 │                   10 │ Lug failed     │ SUSPENDED │     1 │            8 │           nan │
│             4 │                   30 │ Flair Failure  │ FAILED    │     2 │            7 │        1.1250 │
│             6 │                   45 │ Flair loosened │ SUSPENDED │     3 │            6 │           nan │
│             5 │                   49 │ Flair Failure  │ FAILED    │     4 │            5 │        2.4375 │
│             8 │                   82 │ Flair Failure  │ FAILED    │     5 │            4 │        3.7500 │
│             1 │                   90 │ Flair Failure  │ FAILED    │     6 │            3 │        5.0625 │
│             2 │                   96 │ Flair Failure  │ FAILED    │     7 │            2 │        6.3750 │
│             3 │                  100 │ Flair loosened │ SUSPENDED │     8 │            1 │           nan │
└───────────────┴──────────────────────┴────────────────┴───────────┴───────┴──────────────┴───────────────┘

The main differences are:

  1. DuckDB UDFs only support PyArrow input and output, so my implementation is using PyArrow APIs.
  2. You can't mix types like strings and floats in the same column in Ibis (and most of our backends do not support this) so I replaced the string you have here in the adjusted_rank column with None (which pandas converts into a NaN).

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment