Skip to content

Instantly share code, notes, and snippets.

@hsm207
Created May 7, 2020 16:16
Show Gist options
  • Save hsm207/eecce9c744b61d2921db52b78ed20f63 to your computer and use it in GitHub Desktop.
Save hsm207/eecce9c744b61d2921db52b78ed20f63 to your computer and use it in GitHub Desktop.
multi condition column in pandas
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# System Info"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Architecture: x86_64\r\n",
"CPU op-mode(s): 32-bit, 64-bit\r\n",
"Byte Order: Little Endian\r\n",
"CPU(s): 16\r\n",
"On-line CPU(s) list: 0-15\r\n",
"Thread(s) per core: 2\r\n",
"Core(s) per socket: 8\r\n",
"Socket(s): 1\r\n",
"NUMA node(s): 1\r\n",
"Vendor ID: GenuineIntel\r\n",
"CPU family: 6\r\n",
"Model: 85\r\n",
"Model name: Intel(R) Xeon(R) Platinum 8124M CPU @ 3.00GHz\r\n",
"Stepping: 4\r\n",
"CPU MHz: 3449.734\r\n",
"BogoMIPS: 6000.00\r\n",
"Hypervisor vendor: KVM\r\n",
"Virtualization type: full\r\n",
"L1d cache: 32K\r\n",
"L1i cache: 32K\r\n",
"L2 cache: 1024K\r\n",
"L3 cache: 25344K\r\n",
"NUMA node0 CPU(s): 0-15\r\n",
"Flags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowprefetch invpcid_single pti fsgsbase tsc_adjust bmi1 hle avx2 smep bmi2 erms invpcid rtm mpx avx512f avx512dq rdseed adx smap clflushopt clwb avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves ida arat pku ospke\r\n"
]
}
],
"source": [
"!lscpu"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" total used free shared buff/cache available\r\n",
"Mem: 30G 4.4G 24G 888K 2.0G 25G\r\n",
"Swap: 0B 0B 0B\r\n"
]
}
],
"source": [
"!free -h"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'1.0.3'"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.__version__"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Small Example"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>A</th>\n",
" <th>B</th>\n",
" <th>C</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>foo</td>\n",
" <td>1</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>bar</td>\n",
" <td>2</td>\n",
" <td>20</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>baz</td>\n",
" <td>3</td>\n",
" <td>30</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" A B C\n",
"0 foo 1 10\n",
"1 bar 2 20\n",
"2 baz 3 30"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df = pd.DataFrame({\n",
" \"A\": [\"foo\", \"bar\", \"baz\"],\n",
" \"B\": [1, 2, 3],\n",
" \"C\": [10, 20, 30]\n",
"})\n",
"\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>A</th>\n",
" <th>B</th>\n",
" <th>C</th>\n",
" <th>D</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>foo</td>\n",
" <td>1</td>\n",
" <td>10</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>bar</td>\n",
" <td>2</td>\n",
" <td>20</td>\n",
" <td>20</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>baz</td>\n",
" <td>3</td>\n",
" <td>30</td>\n",
" <td></td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" A B C D\n",
"0 foo 1 10 1\n",
"1 bar 2 20 20\n",
"2 baz 3 30 "
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"row_iter = df.itertuples(index=False, name=None)\n",
"def f(a, b, c):\n",
" if a == \"foo\":\n",
" return b\n",
" elif a == \"bar\":\n",
" return c\n",
" else:\n",
" return \"\"\n",
"\n",
"df[\"D\"] = [f(a, b, c) for a, b, c in row_iter]\n",
"df"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Big Example"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"N = 100000000\n",
"\n",
"A = np.random.choice([\"foo\", \"bar\", \"baz\"], N)\n",
"B = np.random.rand(N)\n",
"C = np.random.rand(N)\n",
"\n",
"big_df = pd.DataFrame(\n",
" {\n",
" \"A\": A,\n",
" \"B\": B,\n",
" \"C\": C\n",
" }\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 42.1 s, sys: 1.33 s, total: 43.4 s\n",
"Wall time: 43.4 s\n"
]
}
],
"source": [
"%%time\n",
"row_iter = big_df.itertuples(index=False, name=None)\n",
"\n",
"big_df[\"D\"] = [f(a, b, c) for a, b, c in row_iter]"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>A</th>\n",
" <th>B</th>\n",
" <th>C</th>\n",
" <th>D</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>baz</td>\n",
" <td>0.899798</td>\n",
" <td>0.149934</td>\n",
" <td></td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>baz</td>\n",
" <td>0.299621</td>\n",
" <td>0.960305</td>\n",
" <td></td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>bar</td>\n",
" <td>0.849153</td>\n",
" <td>0.796153</td>\n",
" <td>0.796153</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>baz</td>\n",
" <td>0.722420</td>\n",
" <td>0.384434</td>\n",
" <td></td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>foo</td>\n",
" <td>0.319965</td>\n",
" <td>0.891520</td>\n",
" <td>0.319965</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>foo</td>\n",
" <td>0.486911</td>\n",
" <td>0.199236</td>\n",
" <td>0.486911</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>baz</td>\n",
" <td>0.239329</td>\n",
" <td>0.264694</td>\n",
" <td></td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>bar</td>\n",
" <td>0.042145</td>\n",
" <td>0.079093</td>\n",
" <td>0.0790929</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>foo</td>\n",
" <td>0.789644</td>\n",
" <td>0.507562</td>\n",
" <td>0.789644</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>bar</td>\n",
" <td>0.066720</td>\n",
" <td>0.660952</td>\n",
" <td>0.660952</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>bar</td>\n",
" <td>0.729926</td>\n",
" <td>0.673034</td>\n",
" <td>0.673034</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>baz</td>\n",
" <td>0.525036</td>\n",
" <td>0.648815</td>\n",
" <td></td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>baz</td>\n",
" <td>0.711192</td>\n",
" <td>0.162742</td>\n",
" <td></td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>bar</td>\n",
" <td>0.683820</td>\n",
" <td>0.493763</td>\n",
" <td>0.493763</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>bar</td>\n",
" <td>0.449215</td>\n",
" <td>0.397865</td>\n",
" <td>0.397865</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>baz</td>\n",
" <td>0.376035</td>\n",
" <td>0.636489</td>\n",
" <td></td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>baz</td>\n",
" <td>0.740230</td>\n",
" <td>0.336693</td>\n",
" <td></td>\n",
" </tr>\n",
" <tr>\n",
" <th>17</th>\n",
" <td>foo</td>\n",
" <td>0.618343</td>\n",
" <td>0.673749</td>\n",
" <td>0.618343</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18</th>\n",
" <td>bar</td>\n",
" <td>0.099526</td>\n",
" <td>0.166923</td>\n",
" <td>0.166923</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19</th>\n",
" <td>baz</td>\n",
" <td>0.894621</td>\n",
" <td>0.869187</td>\n",
" <td></td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" A B C D\n",
"0 baz 0.899798 0.149934 \n",
"1 baz 0.299621 0.960305 \n",
"2 bar 0.849153 0.796153 0.796153\n",
"3 baz 0.722420 0.384434 \n",
"4 foo 0.319965 0.891520 0.319965\n",
"5 foo 0.486911 0.199236 0.486911\n",
"6 baz 0.239329 0.264694 \n",
"7 bar 0.042145 0.079093 0.0790929\n",
"8 foo 0.789644 0.507562 0.789644\n",
"9 bar 0.066720 0.660952 0.660952\n",
"10 bar 0.729926 0.673034 0.673034\n",
"11 baz 0.525036 0.648815 \n",
"12 baz 0.711192 0.162742 \n",
"13 bar 0.683820 0.493763 0.493763\n",
"14 bar 0.449215 0.397865 0.397865\n",
"15 baz 0.376035 0.636489 \n",
"16 baz 0.740230 0.336693 \n",
"17 foo 0.618343 0.673749 0.618343\n",
"18 bar 0.099526 0.166923 0.166923\n",
"19 baz 0.894621 0.869187 "
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"big_df.head(20)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Environment (conda_anaconda3)",
"language": "python",
"name": "conda_anaconda3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.6"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment