Skip to content

Instantly share code, notes, and snippets.

@MaxHalford
Last active December 14, 2023 08:59
Show Gist options
  • Save MaxHalford/9fba0c2d6800d0f0643902bf57b99780 to your computer and use it in GitHub Desktop.
Save MaxHalford/9fba0c2d6800d0f0643902bf57b99780 to your computer and use it in GitHub Desktop.
Decomposition without and with gaps
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Funnel decomposition"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Without gaps"
]
},
{
"cell_type": "code",
"execution_count": 84,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>timestamp</th>\n",
" <th>dim</th>\n",
" <th>impressions</th>\n",
" <th>clicks</th>\n",
" <th>conversions</th>\n",
" <th>revenue</th>\n",
" <th>month</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2018-01-01</td>\n",
" <td>A</td>\n",
" <td>1000</td>\n",
" <td>150</td>\n",
" <td>120</td>\n",
" <td>8600.0</td>\n",
" <td>01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2018-01-01</td>\n",
" <td>B</td>\n",
" <td>2000</td>\n",
" <td>150</td>\n",
" <td>150</td>\n",
" <td>9400.0</td>\n",
" <td>01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2018-01-01</td>\n",
" <td>C</td>\n",
" <td>2500</td>\n",
" <td>250</td>\n",
" <td>125</td>\n",
" <td>10750.0</td>\n",
" <td>01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>2019-01-01</td>\n",
" <td>A</td>\n",
" <td>1000</td>\n",
" <td>120</td>\n",
" <td>160</td>\n",
" <td>9055.0</td>\n",
" <td>01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2019-01-01</td>\n",
" <td>B</td>\n",
" <td>2150</td>\n",
" <td>200</td>\n",
" <td>145</td>\n",
" <td>8739.0</td>\n",
" <td>01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>2019-01-01</td>\n",
" <td>C</td>\n",
" <td>2000</td>\n",
" <td>400</td>\n",
" <td>166</td>\n",
" <td>10147.0</td>\n",
" <td>01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>2018-02-01</td>\n",
" <td>A</td>\n",
" <td>50</td>\n",
" <td>20</td>\n",
" <td>10</td>\n",
" <td>500.0</td>\n",
" <td>02</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>2018-02-01</td>\n",
" <td>B</td>\n",
" <td>2000</td>\n",
" <td>300</td>\n",
" <td>150</td>\n",
" <td>11400.0</td>\n",
" <td>02</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>2018-02-01</td>\n",
" <td>C</td>\n",
" <td>2500</td>\n",
" <td>250</td>\n",
" <td>125</td>\n",
" <td>8750.0</td>\n",
" <td>02</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>2019-02-01</td>\n",
" <td>A</td>\n",
" <td>2500</td>\n",
" <td>1000</td>\n",
" <td>500</td>\n",
" <td>50000.0</td>\n",
" <td>02</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>2019-02-01</td>\n",
" <td>B</td>\n",
" <td>2150</td>\n",
" <td>323</td>\n",
" <td>145</td>\n",
" <td>10739.0</td>\n",
" <td>02</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>2019-02-01</td>\n",
" <td>C</td>\n",
" <td>2000</td>\n",
" <td>320</td>\n",
" <td>166</td>\n",
" <td>12147.0</td>\n",
" <td>02</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" timestamp dim impressions clicks conversions revenue month\n",
"0 2018-01-01 A 1000 150 120 8600.0 01\n",
"1 2018-01-01 B 2000 150 150 9400.0 01\n",
"2 2018-01-01 C 2500 250 125 10750.0 01\n",
"3 2019-01-01 A 1000 120 160 9055.0 01\n",
"4 2019-01-01 B 2150 200 145 8739.0 01\n",
"5 2019-01-01 C 2000 400 166 10147.0 01\n",
"6 2018-02-01 A 50 20 10 500.0 02\n",
"7 2018-02-01 B 2000 300 150 11400.0 02\n",
"8 2018-02-01 C 2500 250 125 8750.0 02\n",
"9 2019-02-01 A 2500 1000 500 50000.0 02\n",
"10 2019-02-01 B 2150 323 145 10739.0 02\n",
"11 2019-02-01 C 2000 320 166 12147.0 02"
]
},
"execution_count": 84,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"\n",
"df = pd.DataFrame({\n",
" 'timestamp': ['2018-01-01', '2018-01-01', '2018-01-01', '2019-01-01', '2019-01-01', '2019-01-01', '2018-02-01', '2018-02-01', '2018-02-01', '2019-02-01', '2019-02-01', '2019-02-01'],\n",
" 'dim': ['A', 'B', 'C', 'A', 'B', 'C', 'A', 'B', 'C', 'A', 'B', 'C'],\n",
" 'impressions': [1000, 2000, 2500, 1000, 2150, 2000, 50, 2000, 2500, 2500, 2150, 2000],\n",
" 'clicks': [150, 150, 250, 120, 200, 400, 20, 300, 250, 1000, 323, 320],\n",
" 'conversions': [120, 150, 125, 160, 145, 166, 10, 150, 125, 500, 145, 166],\n",
" 'revenue': ['$8,600', '$9,400', '$10,750', '$9,055', '$8,739', '$10,147', '$500', '$11,400', '$8,750', '$50,000', '$10,739', '$12,147'],\n",
"})\n",
"df['month'] = pd.to_datetime(df['timestamp']).dt.strftime('%m')\n",
"# Convert the 'revenue' column to a numeric type (removing the dollar sign and commas)\n",
"df['revenue'] = df['revenue'].str.replace('$', '', regex=False).str.replace(',', '', regex=False).astype(float)\n",
"df\n"
]
},
{
"cell_type": "code",
"execution_count": 85,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>revenue</th>\n",
" </tr>\n",
" <tr>\n",
" <th>year</th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>2018</th>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2019</th>\n",
" <td>51427.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" revenue\n",
"year \n",
"2018 NaN\n",
"2019 51427.0"
]
},
"execution_count": 85,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"(\n",
" df.assign(year=lambda x: pd.to_datetime(x['timestamp']).dt.year)\n",
" .groupby('year')\n",
" .agg({'revenue': 'sum'})\n",
" .diff()\n",
")\n"
]
},
{
"cell_type": "code",
"execution_count": 86,
"metadata": {},
"outputs": [],
"source": [
"funnel = ['impressions', 'clicks', 'conversions', 'revenue']\n",
"dimensions = ['month', 'dim']\n"
]
},
{
"cell_type": "code",
"execution_count": 87,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['impressions',\n",
" 'clicks_over_impressions',\n",
" 'conversions_over_clicks',\n",
" 'revenue_over_conversions']"
]
},
"execution_count": 87,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"rate_names = [\n",
" f'{num}_over_{den}' if den else num\n",
" for den, num in [(None, funnel[0]), *zip(funnel, funnel[1:])]\n",
"]\n",
"rate_names\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Left-to-right"
]
},
{
"cell_type": "code",
"execution_count": 88,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>timestamp</th>\n",
" <th>dim</th>\n",
" <th>impressions</th>\n",
" <th>clicks</th>\n",
" <th>conversions</th>\n",
" <th>revenue</th>\n",
" <th>month</th>\n",
" <th>clicks_over_impressions</th>\n",
" <th>conversions_over_clicks</th>\n",
" <th>revenue_over_conversions</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2018-01-01</td>\n",
" <td>A</td>\n",
" <td>1000</td>\n",
" <td>150</td>\n",
" <td>120</td>\n",
" <td>8600.0</td>\n",
" <td>01</td>\n",
" <td>0.150000</td>\n",
" <td>0.800000</td>\n",
" <td>71.666667</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2018-01-01</td>\n",
" <td>B</td>\n",
" <td>2000</td>\n",
" <td>150</td>\n",
" <td>150</td>\n",
" <td>9400.0</td>\n",
" <td>01</td>\n",
" <td>0.075000</td>\n",
" <td>1.000000</td>\n",
" <td>62.666667</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2018-01-01</td>\n",
" <td>C</td>\n",
" <td>2500</td>\n",
" <td>250</td>\n",
" <td>125</td>\n",
" <td>10750.0</td>\n",
" <td>01</td>\n",
" <td>0.100000</td>\n",
" <td>0.500000</td>\n",
" <td>86.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>2019-01-01</td>\n",
" <td>A</td>\n",
" <td>1000</td>\n",
" <td>120</td>\n",
" <td>160</td>\n",
" <td>9055.0</td>\n",
" <td>01</td>\n",
" <td>0.120000</td>\n",
" <td>1.333333</td>\n",
" <td>56.593750</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2019-01-01</td>\n",
" <td>B</td>\n",
" <td>2150</td>\n",
" <td>200</td>\n",
" <td>145</td>\n",
" <td>8739.0</td>\n",
" <td>01</td>\n",
" <td>0.093023</td>\n",
" <td>0.725000</td>\n",
" <td>60.268966</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>2019-01-01</td>\n",
" <td>C</td>\n",
" <td>2000</td>\n",
" <td>400</td>\n",
" <td>166</td>\n",
" <td>10147.0</td>\n",
" <td>01</td>\n",
" <td>0.200000</td>\n",
" <td>0.415000</td>\n",
" <td>61.126506</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>2018-02-01</td>\n",
" <td>A</td>\n",
" <td>50</td>\n",
" <td>20</td>\n",
" <td>10</td>\n",
" <td>500.0</td>\n",
" <td>02</td>\n",
" <td>0.400000</td>\n",
" <td>0.500000</td>\n",
" <td>50.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>2018-02-01</td>\n",
" <td>B</td>\n",
" <td>2000</td>\n",
" <td>300</td>\n",
" <td>150</td>\n",
" <td>11400.0</td>\n",
" <td>02</td>\n",
" <td>0.150000</td>\n",
" <td>0.500000</td>\n",
" <td>76.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>2018-02-01</td>\n",
" <td>C</td>\n",
" <td>2500</td>\n",
" <td>250</td>\n",
" <td>125</td>\n",
" <td>8750.0</td>\n",
" <td>02</td>\n",
" <td>0.100000</td>\n",
" <td>0.500000</td>\n",
" <td>70.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>2019-02-01</td>\n",
" <td>A</td>\n",
" <td>2500</td>\n",
" <td>1000</td>\n",
" <td>500</td>\n",
" <td>50000.0</td>\n",
" <td>02</td>\n",
" <td>0.400000</td>\n",
" <td>0.500000</td>\n",
" <td>100.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>2019-02-01</td>\n",
" <td>B</td>\n",
" <td>2150</td>\n",
" <td>323</td>\n",
" <td>145</td>\n",
" <td>10739.0</td>\n",
" <td>02</td>\n",
" <td>0.150233</td>\n",
" <td>0.448916</td>\n",
" <td>74.062069</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>2019-02-01</td>\n",
" <td>C</td>\n",
" <td>2000</td>\n",
" <td>320</td>\n",
" <td>166</td>\n",
" <td>12147.0</td>\n",
" <td>02</td>\n",
" <td>0.160000</td>\n",
" <td>0.518750</td>\n",
" <td>73.174699</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" timestamp dim impressions clicks conversions revenue month \\\n",
"0 2018-01-01 A 1000 150 120 8600.0 01 \n",
"1 2018-01-01 B 2000 150 150 9400.0 01 \n",
"2 2018-01-01 C 2500 250 125 10750.0 01 \n",
"3 2019-01-01 A 1000 120 160 9055.0 01 \n",
"4 2019-01-01 B 2150 200 145 8739.0 01 \n",
"5 2019-01-01 C 2000 400 166 10147.0 01 \n",
"6 2018-02-01 A 50 20 10 500.0 02 \n",
"7 2018-02-01 B 2000 300 150 11400.0 02 \n",
"8 2018-02-01 C 2500 250 125 8750.0 02 \n",
"9 2019-02-01 A 2500 1000 500 50000.0 02 \n",
"10 2019-02-01 B 2150 323 145 10739.0 02 \n",
"11 2019-02-01 C 2000 320 166 12147.0 02 \n",
"\n",
" clicks_over_impressions conversions_over_clicks revenue_over_conversions \n",
"0 0.150000 0.800000 71.666667 \n",
"1 0.075000 1.000000 62.666667 \n",
"2 0.100000 0.500000 86.000000 \n",
"3 0.120000 1.333333 56.593750 \n",
"4 0.093023 0.725000 60.268966 \n",
"5 0.200000 0.415000 61.126506 \n",
"6 0.400000 0.500000 50.000000 \n",
"7 0.150000 0.500000 76.000000 \n",
"8 0.100000 0.500000 70.000000 \n",
"9 0.400000 0.500000 100.000000 \n",
"10 0.150233 0.448916 74.062069 \n",
"11 0.160000 0.518750 73.174699 "
]
},
"execution_count": 88,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"decomp = df.copy()\n",
"for den, num in zip(funnel, funnel[1:]):\n",
" decomp[f'{num}_over_{den}'] = df[num] / df[den]\n",
"decomp\n"
]
},
{
"cell_type": "code",
"execution_count": 89,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>timestamp</th>\n",
" <th>dim</th>\n",
" <th>impressions</th>\n",
" <th>clicks</th>\n",
" <th>conversions</th>\n",
" <th>revenue</th>\n",
" <th>month</th>\n",
" <th>clicks_over_impressions</th>\n",
" <th>conversions_over_clicks</th>\n",
" <th>revenue_over_conversions</th>\n",
" <th>impressions_lag</th>\n",
" <th>clicks_over_impressions_lag</th>\n",
" <th>conversions_over_clicks_lag</th>\n",
" <th>revenue_over_conversions_lag</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2018-01-01</td>\n",
" <td>A</td>\n",
" <td>1000</td>\n",
" <td>150</td>\n",
" <td>120</td>\n",
" <td>8600.0</td>\n",
" <td>01</td>\n",
" <td>0.150000</td>\n",
" <td>0.800000</td>\n",
" <td>71.666667</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2018-01-01</td>\n",
" <td>B</td>\n",
" <td>2000</td>\n",
" <td>150</td>\n",
" <td>150</td>\n",
" <td>9400.0</td>\n",
" <td>01</td>\n",
" <td>0.075000</td>\n",
" <td>1.000000</td>\n",
" <td>62.666667</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2018-01-01</td>\n",
" <td>C</td>\n",
" <td>2500</td>\n",
" <td>250</td>\n",
" <td>125</td>\n",
" <td>10750.0</td>\n",
" <td>01</td>\n",
" <td>0.100000</td>\n",
" <td>0.500000</td>\n",
" <td>86.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>2019-01-01</td>\n",
" <td>A</td>\n",
" <td>1000</td>\n",
" <td>120</td>\n",
" <td>160</td>\n",
" <td>9055.0</td>\n",
" <td>01</td>\n",
" <td>0.120000</td>\n",
" <td>1.333333</td>\n",
" <td>56.593750</td>\n",
" <td>1000.0</td>\n",
" <td>0.150</td>\n",
" <td>0.8</td>\n",
" <td>71.666667</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2019-01-01</td>\n",
" <td>B</td>\n",
" <td>2150</td>\n",
" <td>200</td>\n",
" <td>145</td>\n",
" <td>8739.0</td>\n",
" <td>01</td>\n",
" <td>0.093023</td>\n",
" <td>0.725000</td>\n",
" <td>60.268966</td>\n",
" <td>2000.0</td>\n",
" <td>0.075</td>\n",
" <td>1.0</td>\n",
" <td>62.666667</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>2019-01-01</td>\n",
" <td>C</td>\n",
" <td>2000</td>\n",
" <td>400</td>\n",
" <td>166</td>\n",
" <td>10147.0</td>\n",
" <td>01</td>\n",
" <td>0.200000</td>\n",
" <td>0.415000</td>\n",
" <td>61.126506</td>\n",
" <td>2500.0</td>\n",
" <td>0.100</td>\n",
" <td>0.5</td>\n",
" <td>86.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>2018-02-01</td>\n",
" <td>A</td>\n",
" <td>50</td>\n",
" <td>20</td>\n",
" <td>10</td>\n",
" <td>500.0</td>\n",
" <td>02</td>\n",
" <td>0.400000</td>\n",
" <td>0.500000</td>\n",
" <td>50.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>2018-02-01</td>\n",
" <td>B</td>\n",
" <td>2000</td>\n",
" <td>300</td>\n",
" <td>150</td>\n",
" <td>11400.0</td>\n",
" <td>02</td>\n",
" <td>0.150000</td>\n",
" <td>0.500000</td>\n",
" <td>76.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>2018-02-01</td>\n",
" <td>C</td>\n",
" <td>2500</td>\n",
" <td>250</td>\n",
" <td>125</td>\n",
" <td>8750.0</td>\n",
" <td>02</td>\n",
" <td>0.100000</td>\n",
" <td>0.500000</td>\n",
" <td>70.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>2019-02-01</td>\n",
" <td>A</td>\n",
" <td>2500</td>\n",
" <td>1000</td>\n",
" <td>500</td>\n",
" <td>50000.0</td>\n",
" <td>02</td>\n",
" <td>0.400000</td>\n",
" <td>0.500000</td>\n",
" <td>100.000000</td>\n",
" <td>50.0</td>\n",
" <td>0.400</td>\n",
" <td>0.5</td>\n",
" <td>50.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>2019-02-01</td>\n",
" <td>B</td>\n",
" <td>2150</td>\n",
" <td>323</td>\n",
" <td>145</td>\n",
" <td>10739.0</td>\n",
" <td>02</td>\n",
" <td>0.150233</td>\n",
" <td>0.448916</td>\n",
" <td>74.062069</td>\n",
" <td>2000.0</td>\n",
" <td>0.150</td>\n",
" <td>0.5</td>\n",
" <td>76.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>2019-02-01</td>\n",
" <td>C</td>\n",
" <td>2000</td>\n",
" <td>320</td>\n",
" <td>166</td>\n",
" <td>12147.0</td>\n",
" <td>02</td>\n",
" <td>0.160000</td>\n",
" <td>0.518750</td>\n",
" <td>73.174699</td>\n",
" <td>2500.0</td>\n",
" <td>0.100</td>\n",
" <td>0.5</td>\n",
" <td>70.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" timestamp dim impressions clicks conversions revenue month \\\n",
"0 2018-01-01 A 1000 150 120 8600.0 01 \n",
"1 2018-01-01 B 2000 150 150 9400.0 01 \n",
"2 2018-01-01 C 2500 250 125 10750.0 01 \n",
"3 2019-01-01 A 1000 120 160 9055.0 01 \n",
"4 2019-01-01 B 2150 200 145 8739.0 01 \n",
"5 2019-01-01 C 2000 400 166 10147.0 01 \n",
"6 2018-02-01 A 50 20 10 500.0 02 \n",
"7 2018-02-01 B 2000 300 150 11400.0 02 \n",
"8 2018-02-01 C 2500 250 125 8750.0 02 \n",
"9 2019-02-01 A 2500 1000 500 50000.0 02 \n",
"10 2019-02-01 B 2150 323 145 10739.0 02 \n",
"11 2019-02-01 C 2000 320 166 12147.0 02 \n",
"\n",
" clicks_over_impressions conversions_over_clicks \\\n",
"0 0.150000 0.800000 \n",
"1 0.075000 1.000000 \n",
"2 0.100000 0.500000 \n",
"3 0.120000 1.333333 \n",
"4 0.093023 0.725000 \n",
"5 0.200000 0.415000 \n",
"6 0.400000 0.500000 \n",
"7 0.150000 0.500000 \n",
"8 0.100000 0.500000 \n",
"9 0.400000 0.500000 \n",
"10 0.150233 0.448916 \n",
"11 0.160000 0.518750 \n",
"\n",
" revenue_over_conversions impressions_lag clicks_over_impressions_lag \\\n",
"0 71.666667 NaN NaN \n",
"1 62.666667 NaN NaN \n",
"2 86.000000 NaN NaN \n",
"3 56.593750 1000.0 0.150 \n",
"4 60.268966 2000.0 0.075 \n",
"5 61.126506 2500.0 0.100 \n",
"6 50.000000 NaN NaN \n",
"7 76.000000 NaN NaN \n",
"8 70.000000 NaN NaN \n",
"9 100.000000 50.0 0.400 \n",
"10 74.062069 2000.0 0.150 \n",
"11 73.174699 2500.0 0.100 \n",
"\n",
" conversions_over_clicks_lag revenue_over_conversions_lag \n",
"0 NaN NaN \n",
"1 NaN NaN \n",
"2 NaN NaN \n",
"3 0.8 71.666667 \n",
"4 1.0 62.666667 \n",
"5 0.5 86.000000 \n",
"6 NaN NaN \n",
"7 NaN NaN \n",
"8 NaN NaN \n",
"9 0.5 50.000000 \n",
"10 0.5 76.000000 \n",
"11 0.5 70.000000 "
]
},
"execution_count": 89,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"for rate_name in rate_names:\n",
" decomp[f'{rate_name}_lag'] = decomp.groupby(dimensions)[rate_name].shift(1)\n",
"decomp\n"
]
},
{
"cell_type": "code",
"execution_count": 90,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>timestamp</th>\n",
" <th>dim</th>\n",
" <th>impressions</th>\n",
" <th>clicks</th>\n",
" <th>conversions</th>\n",
" <th>revenue</th>\n",
" <th>month</th>\n",
" <th>clicks_over_impressions</th>\n",
" <th>conversions_over_clicks</th>\n",
" <th>revenue_over_conversions</th>\n",
" <th>impressions_lag</th>\n",
" <th>clicks_over_impressions_lag</th>\n",
" <th>conversions_over_clicks_lag</th>\n",
" <th>revenue_over_conversions_lag</th>\n",
" <th>impressions_contribution</th>\n",
" <th>clicks_over_impressions_contribution</th>\n",
" <th>conversions_over_clicks_contribution</th>\n",
" <th>revenue_over_conversions_contribution</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2018-01-01</td>\n",
" <td>A</td>\n",
" <td>1000</td>\n",
" <td>150</td>\n",
" <td>120</td>\n",
" <td>8600.0</td>\n",
" <td>01</td>\n",
" <td>0.150000</td>\n",
" <td>0.800000</td>\n",
" <td>71.666667</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2018-01-01</td>\n",
" <td>B</td>\n",
" <td>2000</td>\n",
" <td>150</td>\n",
" <td>150</td>\n",
" <td>9400.0</td>\n",
" <td>01</td>\n",
" <td>0.075000</td>\n",
" <td>1.000000</td>\n",
" <td>62.666667</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2018-01-01</td>\n",
" <td>C</td>\n",
" <td>2500</td>\n",
" <td>250</td>\n",
" <td>125</td>\n",
" <td>10750.0</td>\n",
" <td>01</td>\n",
" <td>0.100000</td>\n",
" <td>0.500000</td>\n",
" <td>86.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>2019-01-01</td>\n",
" <td>A</td>\n",
" <td>1000</td>\n",
" <td>120</td>\n",
" <td>160</td>\n",
" <td>9055.0</td>\n",
" <td>01</td>\n",
" <td>0.120000</td>\n",
" <td>1.333333</td>\n",
" <td>56.593750</td>\n",
" <td>1000.0</td>\n",
" <td>0.150</td>\n",
" <td>0.8</td>\n",
" <td>71.666667</td>\n",
" <td>0.0</td>\n",
" <td>-1720.000000</td>\n",
" <td>4586.666667</td>\n",
" <td>-2411.666667</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2019-01-01</td>\n",
" <td>B</td>\n",
" <td>2150</td>\n",
" <td>200</td>\n",
" <td>145</td>\n",
" <td>8739.0</td>\n",
" <td>01</td>\n",
" <td>0.093023</td>\n",
" <td>0.725000</td>\n",
" <td>60.268966</td>\n",
" <td>2000.0</td>\n",
" <td>0.075</td>\n",
" <td>1.0</td>\n",
" <td>62.666667</td>\n",
" <td>705.0</td>\n",
" <td>2428.333333</td>\n",
" <td>-3446.666667</td>\n",
" <td>-347.666667</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>2019-01-01</td>\n",
" <td>C</td>\n",
" <td>2000</td>\n",
" <td>400</td>\n",
" <td>166</td>\n",
" <td>10147.0</td>\n",
" <td>01</td>\n",
" <td>0.200000</td>\n",
" <td>0.415000</td>\n",
" <td>61.126506</td>\n",
" <td>2500.0</td>\n",
" <td>0.100</td>\n",
" <td>0.5</td>\n",
" <td>86.000000</td>\n",
" <td>-2150.0</td>\n",
" <td>8600.000000</td>\n",
" <td>-2924.000000</td>\n",
" <td>-4129.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>2018-02-01</td>\n",
" <td>A</td>\n",
" <td>50</td>\n",
" <td>20</td>\n",
" <td>10</td>\n",
" <td>500.0</td>\n",
" <td>02</td>\n",
" <td>0.400000</td>\n",
" <td>0.500000</td>\n",
" <td>50.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>2018-02-01</td>\n",
" <td>B</td>\n",
" <td>2000</td>\n",
" <td>300</td>\n",
" <td>150</td>\n",
" <td>11400.0</td>\n",
" <td>02</td>\n",
" <td>0.150000</td>\n",
" <td>0.500000</td>\n",
" <td>76.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>2018-02-01</td>\n",
" <td>C</td>\n",
" <td>2500</td>\n",
" <td>250</td>\n",
" <td>125</td>\n",
" <td>8750.0</td>\n",
" <td>02</td>\n",
" <td>0.100000</td>\n",
" <td>0.500000</td>\n",
" <td>70.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>2019-02-01</td>\n",
" <td>A</td>\n",
" <td>2500</td>\n",
" <td>1000</td>\n",
" <td>500</td>\n",
" <td>50000.0</td>\n",
" <td>02</td>\n",
" <td>0.400000</td>\n",
" <td>0.500000</td>\n",
" <td>100.000000</td>\n",
" <td>50.0</td>\n",
" <td>0.400</td>\n",
" <td>0.5</td>\n",
" <td>50.000000</td>\n",
" <td>24500.0</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>25000.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>2019-02-01</td>\n",
" <td>B</td>\n",
" <td>2150</td>\n",
" <td>323</td>\n",
" <td>145</td>\n",
" <td>10739.0</td>\n",
" <td>02</td>\n",
" <td>0.150233</td>\n",
" <td>0.448916</td>\n",
" <td>74.062069</td>\n",
" <td>2000.0</td>\n",
" <td>0.150</td>\n",
" <td>0.5</td>\n",
" <td>76.000000</td>\n",
" <td>855.0</td>\n",
" <td>19.000000</td>\n",
" <td>-1254.000000</td>\n",
" <td>-281.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>2019-02-01</td>\n",
" <td>C</td>\n",
" <td>2000</td>\n",
" <td>320</td>\n",
" <td>166</td>\n",
" <td>12147.0</td>\n",
" <td>02</td>\n",
" <td>0.160000</td>\n",
" <td>0.518750</td>\n",
" <td>73.174699</td>\n",
" <td>2500.0</td>\n",
" <td>0.100</td>\n",
" <td>0.5</td>\n",
" <td>70.000000</td>\n",
" <td>-1750.0</td>\n",
" <td>4200.000000</td>\n",
" <td>420.000000</td>\n",
" <td>527.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" timestamp dim impressions clicks conversions revenue month \\\n",
"0 2018-01-01 A 1000 150 120 8600.0 01 \n",
"1 2018-01-01 B 2000 150 150 9400.0 01 \n",
"2 2018-01-01 C 2500 250 125 10750.0 01 \n",
"3 2019-01-01 A 1000 120 160 9055.0 01 \n",
"4 2019-01-01 B 2150 200 145 8739.0 01 \n",
"5 2019-01-01 C 2000 400 166 10147.0 01 \n",
"6 2018-02-01 A 50 20 10 500.0 02 \n",
"7 2018-02-01 B 2000 300 150 11400.0 02 \n",
"8 2018-02-01 C 2500 250 125 8750.0 02 \n",
"9 2019-02-01 A 2500 1000 500 50000.0 02 \n",
"10 2019-02-01 B 2150 323 145 10739.0 02 \n",
"11 2019-02-01 C 2000 320 166 12147.0 02 \n",
"\n",
" clicks_over_impressions conversions_over_clicks \\\n",
"0 0.150000 0.800000 \n",
"1 0.075000 1.000000 \n",
"2 0.100000 0.500000 \n",
"3 0.120000 1.333333 \n",
"4 0.093023 0.725000 \n",
"5 0.200000 0.415000 \n",
"6 0.400000 0.500000 \n",
"7 0.150000 0.500000 \n",
"8 0.100000 0.500000 \n",
"9 0.400000 0.500000 \n",
"10 0.150233 0.448916 \n",
"11 0.160000 0.518750 \n",
"\n",
" revenue_over_conversions impressions_lag clicks_over_impressions_lag \\\n",
"0 71.666667 NaN NaN \n",
"1 62.666667 NaN NaN \n",
"2 86.000000 NaN NaN \n",
"3 56.593750 1000.0 0.150 \n",
"4 60.268966 2000.0 0.075 \n",
"5 61.126506 2500.0 0.100 \n",
"6 50.000000 NaN NaN \n",
"7 76.000000 NaN NaN \n",
"8 70.000000 NaN NaN \n",
"9 100.000000 50.0 0.400 \n",
"10 74.062069 2000.0 0.150 \n",
"11 73.174699 2500.0 0.100 \n",
"\n",
" conversions_over_clicks_lag revenue_over_conversions_lag \\\n",
"0 NaN NaN \n",
"1 NaN NaN \n",
"2 NaN NaN \n",
"3 0.8 71.666667 \n",
"4 1.0 62.666667 \n",
"5 0.5 86.000000 \n",
"6 NaN NaN \n",
"7 NaN NaN \n",
"8 NaN NaN \n",
"9 0.5 50.000000 \n",
"10 0.5 76.000000 \n",
"11 0.5 70.000000 \n",
"\n",
" impressions_contribution clicks_over_impressions_contribution \\\n",
"0 NaN NaN \n",
"1 NaN NaN \n",
"2 NaN NaN \n",
"3 0.0 -1720.000000 \n",
"4 705.0 2428.333333 \n",
"5 -2150.0 8600.000000 \n",
"6 NaN NaN \n",
"7 NaN NaN \n",
"8 NaN NaN \n",
"9 24500.0 0.000000 \n",
"10 855.0 19.000000 \n",
"11 -1750.0 4200.000000 \n",
"\n",
" conversions_over_clicks_contribution \\\n",
"0 NaN \n",
"1 NaN \n",
"2 NaN \n",
"3 4586.666667 \n",
"4 -3446.666667 \n",
"5 -2924.000000 \n",
"6 NaN \n",
"7 NaN \n",
"8 NaN \n",
"9 0.000000 \n",
"10 -1254.000000 \n",
"11 420.000000 \n",
"\n",
" revenue_over_conversions_contribution \n",
"0 NaN \n",
"1 NaN \n",
"2 NaN \n",
"3 -2411.666667 \n",
"4 -347.666667 \n",
"5 -4129.000000 \n",
"6 NaN \n",
"7 NaN \n",
"8 NaN \n",
"9 25000.000000 \n",
"10 -281.000000 \n",
"11 527.000000 "
]
},
"execution_count": 90,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"for i, _ in enumerate(rate_names):\n",
"\n",
" before = ' * '.join(rate_names[:i])\n",
" current = f'({rate_names[i]} - {rate_names[i]}_lag)'\n",
" after = ' * '.join(f'{x}_lag' for x in rate_names[i+1:])\n",
" formula = ' * '.join(filter(None, [before, current, after]))\n",
"\n",
" decomp[f'{rate_names[i]}_contribution'] = decomp.eval(formula)\n",
"\n",
"decomp\n"
]
},
{
"cell_type": "code",
"execution_count": 91,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"year\n",
"2018 0.0\n",
"2019 51427.0\n",
"dtype: float64"
]
},
"execution_count": 91,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"contribution_columns = [x for x in decomp.columns if x.endswith('_contribution')]\n",
"(\n",
" decomp\n",
" .assign(year=lambda x: pd.to_datetime(x['timestamp']).dt.year)\n",
" .groupby('year')[contribution_columns].sum().sum(axis=1)\n",
")\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## With gaps"
]
},
{
"cell_type": "code",
"execution_count": 145,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>timestamp</th>\n",
" <th>dim</th>\n",
" <th>impressions</th>\n",
" <th>clicks</th>\n",
" <th>conversions</th>\n",
" <th>revenue</th>\n",
" <th>month</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2018-01-01</td>\n",
" <td>A</td>\n",
" <td>1000</td>\n",
" <td>150</td>\n",
" <td>120</td>\n",
" <td>8600.0</td>\n",
" <td>01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2018-01-01</td>\n",
" <td>B</td>\n",
" <td>2000</td>\n",
" <td>150</td>\n",
" <td>150</td>\n",
" <td>9400.0</td>\n",
" <td>01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>2019-01-01</td>\n",
" <td>A</td>\n",
" <td>1000</td>\n",
" <td>120</td>\n",
" <td>160</td>\n",
" <td>9055.0</td>\n",
" <td>01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>2019-01-01</td>\n",
" <td>C</td>\n",
" <td>2000</td>\n",
" <td>400</td>\n",
" <td>166</td>\n",
" <td>10147.0</td>\n",
" <td>01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>2018-02-01</td>\n",
" <td>A</td>\n",
" <td>50</td>\n",
" <td>20</td>\n",
" <td>10</td>\n",
" <td>500.0</td>\n",
" <td>02</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>2018-02-01</td>\n",
" <td>B</td>\n",
" <td>2000</td>\n",
" <td>300</td>\n",
" <td>150</td>\n",
" <td>11400.0</td>\n",
" <td>02</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>2018-02-01</td>\n",
" <td>C</td>\n",
" <td>2500</td>\n",
" <td>250</td>\n",
" <td>125</td>\n",
" <td>8750.0</td>\n",
" <td>02</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>2019-02-01</td>\n",
" <td>A</td>\n",
" <td>2500</td>\n",
" <td>1000</td>\n",
" <td>500</td>\n",
" <td>50000.0</td>\n",
" <td>02</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>2019-02-01</td>\n",
" <td>B</td>\n",
" <td>2150</td>\n",
" <td>323</td>\n",
" <td>145</td>\n",
" <td>10739.0</td>\n",
" <td>02</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>2019-02-01</td>\n",
" <td>C</td>\n",
" <td>2000</td>\n",
" <td>320</td>\n",
" <td>166</td>\n",
" <td>12147.0</td>\n",
" <td>02</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" timestamp dim impressions clicks conversions revenue month\n",
"0 2018-01-01 A 1000 150 120 8600.0 01\n",
"1 2018-01-01 B 2000 150 150 9400.0 01\n",
"3 2019-01-01 A 1000 120 160 9055.0 01\n",
"5 2019-01-01 C 2000 400 166 10147.0 01\n",
"6 2018-02-01 A 50 20 10 500.0 02\n",
"7 2018-02-01 B 2000 300 150 11400.0 02\n",
"8 2018-02-01 C 2500 250 125 8750.0 02\n",
"9 2019-02-01 A 2500 1000 500 50000.0 02\n",
"10 2019-02-01 B 2150 323 145 10739.0 02\n",
"11 2019-02-01 C 2000 320 166 12147.0 02"
]
},
"execution_count": 145,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_with_gaps = df.copy()\n",
"df_with_gaps = df_with_gaps.drop(2)\n",
"df_with_gaps = df_with_gaps.drop(4)\n",
"df_with_gaps\n"
]
},
{
"cell_type": "code",
"execution_count": 140,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>revenue</th>\n",
" </tr>\n",
" <tr>\n",
" <th>year</th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>2018</th>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2019</th>\n",
" <td>53438.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" revenue\n",
"year \n",
"2018 NaN\n",
"2019 53438.0"
]
},
"execution_count": 140,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"(\n",
" df_with_gaps.assign(year=lambda x: pd.to_datetime(x['timestamp']).dt.year)\n",
" .groupby('year')\n",
" .agg({'revenue': 'sum'})\n",
" .diff()\n",
")\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"The trick is to artificially insert the dimension values that aren't there, and fill in the missing values with 0s."
]
},
{
"cell_type": "code",
"execution_count": 146,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>timestamp</th>\n",
" <th>dim</th>\n",
" <th>impressions</th>\n",
" <th>clicks</th>\n",
" <th>conversions</th>\n",
" <th>revenue</th>\n",
" <th>month</th>\n",
" <th>year</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2018-01-01</td>\n",
" <td>A</td>\n",
" <td>1000.0</td>\n",
" <td>150.0</td>\n",
" <td>120.0</td>\n",
" <td>8600.0</td>\n",
" <td>01</td>\n",
" <td>2018</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2018-01-01</td>\n",
" <td>B</td>\n",
" <td>2000.0</td>\n",
" <td>150.0</td>\n",
" <td>150.0</td>\n",
" <td>9400.0</td>\n",
" <td>01</td>\n",
" <td>2018</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>2018-01-01</td>\n",
" <td>C</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>01</td>\n",
" <td>2018</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2019-01-01</td>\n",
" <td>A</td>\n",
" <td>1000.0</td>\n",
" <td>120.0</td>\n",
" <td>160.0</td>\n",
" <td>9055.0</td>\n",
" <td>01</td>\n",
" <td>2019</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>2019-01-01</td>\n",
" <td>B</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>01</td>\n",
" <td>2019</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>2019-01-01</td>\n",
" <td>C</td>\n",
" <td>2000.0</td>\n",
" <td>400.0</td>\n",
" <td>166.0</td>\n",
" <td>10147.0</td>\n",
" <td>01</td>\n",
" <td>2019</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2018-02-01</td>\n",
" <td>A</td>\n",
" <td>50.0</td>\n",
" <td>20.0</td>\n",
" <td>10.0</td>\n",
" <td>500.0</td>\n",
" <td>02</td>\n",
" <td>2018</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>2018-02-01</td>\n",
" <td>B</td>\n",
" <td>2000.0</td>\n",
" <td>300.0</td>\n",
" <td>150.0</td>\n",
" <td>11400.0</td>\n",
" <td>02</td>\n",
" <td>2018</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>2018-02-01</td>\n",
" <td>C</td>\n",
" <td>2500.0</td>\n",
" <td>250.0</td>\n",
" <td>125.0</td>\n",
" <td>8750.0</td>\n",
" <td>02</td>\n",
" <td>2018</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>2019-02-01</td>\n",
" <td>A</td>\n",
" <td>2500.0</td>\n",
" <td>1000.0</td>\n",
" <td>500.0</td>\n",
" <td>50000.0</td>\n",
" <td>02</td>\n",
" <td>2019</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>2019-02-01</td>\n",
" <td>B</td>\n",
" <td>2150.0</td>\n",
" <td>323.0</td>\n",
" <td>145.0</td>\n",
" <td>10739.0</td>\n",
" <td>02</td>\n",
" <td>2019</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>2019-02-01</td>\n",
" <td>C</td>\n",
" <td>2000.0</td>\n",
" <td>320.0</td>\n",
" <td>166.0</td>\n",
" <td>12147.0</td>\n",
" <td>02</td>\n",
" <td>2019</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" timestamp dim impressions clicks conversions revenue month year\n",
"0 2018-01-01 A 1000.0 150.0 120.0 8600.0 01 2018\n",
"4 2018-01-01 B 2000.0 150.0 150.0 9400.0 01 2018\n",
"8 2018-01-01 C 0.0 0.0 0.0 0.0 01 2018\n",
"1 2019-01-01 A 1000.0 120.0 160.0 9055.0 01 2019\n",
"5 2019-01-01 B 0.0 0.0 0.0 0.0 01 2019\n",
"9 2019-01-01 C 2000.0 400.0 166.0 10147.0 01 2019\n",
"2 2018-02-01 A 50.0 20.0 10.0 500.0 02 2018\n",
"6 2018-02-01 B 2000.0 300.0 150.0 11400.0 02 2018\n",
"10 2018-02-01 C 2500.0 250.0 125.0 8750.0 02 2018\n",
"3 2019-02-01 A 2500.0 1000.0 500.0 50000.0 02 2019\n",
"7 2019-02-01 B 2150.0 323.0 145.0 10739.0 02 2019\n",
"11 2019-02-01 C 2000.0 320.0 166.0 12147.0 02 2019"
]
},
"execution_count": 146,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dimension_values = [\n",
" pd.Series(df_with_gaps[dim].unique(), name=dim).to_frame()\n",
" for dim in ['dim', 'timestamp']\n",
"]\n",
"product = dimension_values[0].merge(dimension_values[1], how='cross')\n",
"df_with_gaps = pd.merge(\n",
" df_with_gaps,\n",
" product,\n",
" left_on=product.columns.tolist(),\n",
" right_on=product.columns.tolist(),\n",
" how='right'\n",
")\n",
"df_with_gaps = (\n",
" df_with_gaps\n",
" .assign(\n",
" month=pd.to_datetime(df_with_gaps['timestamp']).dt.strftime('%m'),\n",
" year=pd.to_datetime(df_with_gaps['timestamp']).dt.strftime('%Y')\n",
" )\n",
" .sort_values(['month', 'year', 'dim'])\n",
")\n",
"\n",
"for step in funnel:\n",
" df_with_gaps[step] = df_with_gaps[step].fillna(0)\n",
"\n",
"df_with_gaps\n"
]
},
{
"cell_type": "code",
"execution_count": 147,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>timestamp</th>\n",
" <th>dim</th>\n",
" <th>impressions</th>\n",
" <th>clicks</th>\n",
" <th>conversions</th>\n",
" <th>revenue</th>\n",
" <th>month</th>\n",
" <th>year</th>\n",
" <th>clicks_over_impressions</th>\n",
" <th>conversions_over_clicks</th>\n",
" <th>revenue_over_conversions</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2018-01-01</td>\n",
" <td>A</td>\n",
" <td>1000.0</td>\n",
" <td>150.0</td>\n",
" <td>120.0</td>\n",
" <td>8600.0</td>\n",
" <td>01</td>\n",
" <td>2018</td>\n",
" <td>0.150000</td>\n",
" <td>0.800000</td>\n",
" <td>71.666667</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2018-01-01</td>\n",
" <td>B</td>\n",
" <td>2000.0</td>\n",
" <td>150.0</td>\n",
" <td>150.0</td>\n",
" <td>9400.0</td>\n",
" <td>01</td>\n",
" <td>2018</td>\n",
" <td>0.075000</td>\n",
" <td>1.000000</td>\n",
" <td>62.666667</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>2018-01-01</td>\n",
" <td>C</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>01</td>\n",
" <td>2018</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2019-01-01</td>\n",
" <td>A</td>\n",
" <td>1000.0</td>\n",
" <td>120.0</td>\n",
" <td>160.0</td>\n",
" <td>9055.0</td>\n",
" <td>01</td>\n",
" <td>2019</td>\n",
" <td>0.120000</td>\n",
" <td>1.333333</td>\n",
" <td>56.593750</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>2019-01-01</td>\n",
" <td>B</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>01</td>\n",
" <td>2019</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>2019-01-01</td>\n",
" <td>C</td>\n",
" <td>2000.0</td>\n",
" <td>400.0</td>\n",
" <td>166.0</td>\n",
" <td>10147.0</td>\n",
" <td>01</td>\n",
" <td>2019</td>\n",
" <td>0.200000</td>\n",
" <td>0.415000</td>\n",
" <td>61.126506</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2018-02-01</td>\n",
" <td>A</td>\n",
" <td>50.0</td>\n",
" <td>20.0</td>\n",
" <td>10.0</td>\n",
" <td>500.0</td>\n",
" <td>02</td>\n",
" <td>2018</td>\n",
" <td>0.400000</td>\n",
" <td>0.500000</td>\n",
" <td>50.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>2018-02-01</td>\n",
" <td>B</td>\n",
" <td>2000.0</td>\n",
" <td>300.0</td>\n",
" <td>150.0</td>\n",
" <td>11400.0</td>\n",
" <td>02</td>\n",
" <td>2018</td>\n",
" <td>0.150000</td>\n",
" <td>0.500000</td>\n",
" <td>76.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>2018-02-01</td>\n",
" <td>C</td>\n",
" <td>2500.0</td>\n",
" <td>250.0</td>\n",
" <td>125.0</td>\n",
" <td>8750.0</td>\n",
" <td>02</td>\n",
" <td>2018</td>\n",
" <td>0.100000</td>\n",
" <td>0.500000</td>\n",
" <td>70.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>2019-02-01</td>\n",
" <td>A</td>\n",
" <td>2500.0</td>\n",
" <td>1000.0</td>\n",
" <td>500.0</td>\n",
" <td>50000.0</td>\n",
" <td>02</td>\n",
" <td>2019</td>\n",
" <td>0.400000</td>\n",
" <td>0.500000</td>\n",
" <td>100.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>2019-02-01</td>\n",
" <td>B</td>\n",
" <td>2150.0</td>\n",
" <td>323.0</td>\n",
" <td>145.0</td>\n",
" <td>10739.0</td>\n",
" <td>02</td>\n",
" <td>2019</td>\n",
" <td>0.150233</td>\n",
" <td>0.448916</td>\n",
" <td>74.062069</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>2019-02-01</td>\n",
" <td>C</td>\n",
" <td>2000.0</td>\n",
" <td>320.0</td>\n",
" <td>166.0</td>\n",
" <td>12147.0</td>\n",
" <td>02</td>\n",
" <td>2019</td>\n",
" <td>0.160000</td>\n",
" <td>0.518750</td>\n",
" <td>73.174699</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" timestamp dim impressions clicks conversions revenue month year \\\n",
"0 2018-01-01 A 1000.0 150.0 120.0 8600.0 01 2018 \n",
"4 2018-01-01 B 2000.0 150.0 150.0 9400.0 01 2018 \n",
"8 2018-01-01 C 0.0 0.0 0.0 0.0 01 2018 \n",
"1 2019-01-01 A 1000.0 120.0 160.0 9055.0 01 2019 \n",
"5 2019-01-01 B 0.0 0.0 0.0 0.0 01 2019 \n",
"9 2019-01-01 C 2000.0 400.0 166.0 10147.0 01 2019 \n",
"2 2018-02-01 A 50.0 20.0 10.0 500.0 02 2018 \n",
"6 2018-02-01 B 2000.0 300.0 150.0 11400.0 02 2018 \n",
"10 2018-02-01 C 2500.0 250.0 125.0 8750.0 02 2018 \n",
"3 2019-02-01 A 2500.0 1000.0 500.0 50000.0 02 2019 \n",
"7 2019-02-01 B 2150.0 323.0 145.0 10739.0 02 2019 \n",
"11 2019-02-01 C 2000.0 320.0 166.0 12147.0 02 2019 \n",
"\n",
" clicks_over_impressions conversions_over_clicks revenue_over_conversions \n",
"0 0.150000 0.800000 71.666667 \n",
"4 0.075000 1.000000 62.666667 \n",
"8 0.000000 0.000000 0.000000 \n",
"1 0.120000 1.333333 56.593750 \n",
"5 0.000000 0.000000 0.000000 \n",
"9 0.200000 0.415000 61.126506 \n",
"2 0.400000 0.500000 50.000000 \n",
"6 0.150000 0.500000 76.000000 \n",
"10 0.100000 0.500000 70.000000 \n",
"3 0.400000 0.500000 100.000000 \n",
"7 0.150233 0.448916 74.062069 \n",
"11 0.160000 0.518750 73.174699 "
]
},
"execution_count": 147,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"decomp = df_with_gaps.copy()\n",
"for den, num in zip(funnel, funnel[1:]):\n",
" decomp[f'{num}_over_{den}'] = (df_with_gaps[num] / df_with_gaps[den]).fillna(0)\n",
"decomp\n"
]
},
{
"cell_type": "code",
"execution_count": 148,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>timestamp</th>\n",
" <th>dim</th>\n",
" <th>impressions</th>\n",
" <th>clicks</th>\n",
" <th>conversions</th>\n",
" <th>revenue</th>\n",
" <th>month</th>\n",
" <th>year</th>\n",
" <th>clicks_over_impressions</th>\n",
" <th>conversions_over_clicks</th>\n",
" <th>revenue_over_conversions</th>\n",
" <th>impressions_lag</th>\n",
" <th>clicks_over_impressions_lag</th>\n",
" <th>conversions_over_clicks_lag</th>\n",
" <th>revenue_over_conversions_lag</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2018-01-01</td>\n",
" <td>A</td>\n",
" <td>1000.0</td>\n",
" <td>150.0</td>\n",
" <td>120.0</td>\n",
" <td>8600.0</td>\n",
" <td>01</td>\n",
" <td>2018</td>\n",
" <td>0.150000</td>\n",
" <td>0.800000</td>\n",
" <td>71.666667</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2018-01-01</td>\n",
" <td>B</td>\n",
" <td>2000.0</td>\n",
" <td>150.0</td>\n",
" <td>150.0</td>\n",
" <td>9400.0</td>\n",
" <td>01</td>\n",
" <td>2018</td>\n",
" <td>0.075000</td>\n",
" <td>1.000000</td>\n",
" <td>62.666667</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>2018-01-01</td>\n",
" <td>C</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>01</td>\n",
" <td>2018</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2019-01-01</td>\n",
" <td>A</td>\n",
" <td>1000.0</td>\n",
" <td>120.0</td>\n",
" <td>160.0</td>\n",
" <td>9055.0</td>\n",
" <td>01</td>\n",
" <td>2019</td>\n",
" <td>0.120000</td>\n",
" <td>1.333333</td>\n",
" <td>56.593750</td>\n",
" <td>1000.0</td>\n",
" <td>0.150</td>\n",
" <td>0.8</td>\n",
" <td>71.666667</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>2019-01-01</td>\n",
" <td>B</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>01</td>\n",
" <td>2019</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>2000.0</td>\n",
" <td>0.075</td>\n",
" <td>1.0</td>\n",
" <td>62.666667</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>2019-01-01</td>\n",
" <td>C</td>\n",
" <td>2000.0</td>\n",
" <td>400.0</td>\n",
" <td>166.0</td>\n",
" <td>10147.0</td>\n",
" <td>01</td>\n",
" <td>2019</td>\n",
" <td>0.200000</td>\n",
" <td>0.415000</td>\n",
" <td>61.126506</td>\n",
" <td>0.0</td>\n",
" <td>0.000</td>\n",
" <td>0.0</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2018-02-01</td>\n",
" <td>A</td>\n",
" <td>50.0</td>\n",
" <td>20.0</td>\n",
" <td>10.0</td>\n",
" <td>500.0</td>\n",
" <td>02</td>\n",
" <td>2018</td>\n",
" <td>0.400000</td>\n",
" <td>0.500000</td>\n",
" <td>50.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>2018-02-01</td>\n",
" <td>B</td>\n",
" <td>2000.0</td>\n",
" <td>300.0</td>\n",
" <td>150.0</td>\n",
" <td>11400.0</td>\n",
" <td>02</td>\n",
" <td>2018</td>\n",
" <td>0.150000</td>\n",
" <td>0.500000</td>\n",
" <td>76.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>2018-02-01</td>\n",
" <td>C</td>\n",
" <td>2500.0</td>\n",
" <td>250.0</td>\n",
" <td>125.0</td>\n",
" <td>8750.0</td>\n",
" <td>02</td>\n",
" <td>2018</td>\n",
" <td>0.100000</td>\n",
" <td>0.500000</td>\n",
" <td>70.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>2019-02-01</td>\n",
" <td>A</td>\n",
" <td>2500.0</td>\n",
" <td>1000.0</td>\n",
" <td>500.0</td>\n",
" <td>50000.0</td>\n",
" <td>02</td>\n",
" <td>2019</td>\n",
" <td>0.400000</td>\n",
" <td>0.500000</td>\n",
" <td>100.000000</td>\n",
" <td>50.0</td>\n",
" <td>0.400</td>\n",
" <td>0.5</td>\n",
" <td>50.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>2019-02-01</td>\n",
" <td>B</td>\n",
" <td>2150.0</td>\n",
" <td>323.0</td>\n",
" <td>145.0</td>\n",
" <td>10739.0</td>\n",
" <td>02</td>\n",
" <td>2019</td>\n",
" <td>0.150233</td>\n",
" <td>0.448916</td>\n",
" <td>74.062069</td>\n",
" <td>2000.0</td>\n",
" <td>0.150</td>\n",
" <td>0.5</td>\n",
" <td>76.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>2019-02-01</td>\n",
" <td>C</td>\n",
" <td>2000.0</td>\n",
" <td>320.0</td>\n",
" <td>166.0</td>\n",
" <td>12147.0</td>\n",
" <td>02</td>\n",
" <td>2019</td>\n",
" <td>0.160000</td>\n",
" <td>0.518750</td>\n",
" <td>73.174699</td>\n",
" <td>2500.0</td>\n",
" <td>0.100</td>\n",
" <td>0.5</td>\n",
" <td>70.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" timestamp dim impressions clicks conversions revenue month year \\\n",
"0 2018-01-01 A 1000.0 150.0 120.0 8600.0 01 2018 \n",
"4 2018-01-01 B 2000.0 150.0 150.0 9400.0 01 2018 \n",
"8 2018-01-01 C 0.0 0.0 0.0 0.0 01 2018 \n",
"1 2019-01-01 A 1000.0 120.0 160.0 9055.0 01 2019 \n",
"5 2019-01-01 B 0.0 0.0 0.0 0.0 01 2019 \n",
"9 2019-01-01 C 2000.0 400.0 166.0 10147.0 01 2019 \n",
"2 2018-02-01 A 50.0 20.0 10.0 500.0 02 2018 \n",
"6 2018-02-01 B 2000.0 300.0 150.0 11400.0 02 2018 \n",
"10 2018-02-01 C 2500.0 250.0 125.0 8750.0 02 2018 \n",
"3 2019-02-01 A 2500.0 1000.0 500.0 50000.0 02 2019 \n",
"7 2019-02-01 B 2150.0 323.0 145.0 10739.0 02 2019 \n",
"11 2019-02-01 C 2000.0 320.0 166.0 12147.0 02 2019 \n",
"\n",
" clicks_over_impressions conversions_over_clicks \\\n",
"0 0.150000 0.800000 \n",
"4 0.075000 1.000000 \n",
"8 0.000000 0.000000 \n",
"1 0.120000 1.333333 \n",
"5 0.000000 0.000000 \n",
"9 0.200000 0.415000 \n",
"2 0.400000 0.500000 \n",
"6 0.150000 0.500000 \n",
"10 0.100000 0.500000 \n",
"3 0.400000 0.500000 \n",
"7 0.150233 0.448916 \n",
"11 0.160000 0.518750 \n",
"\n",
" revenue_over_conversions impressions_lag clicks_over_impressions_lag \\\n",
"0 71.666667 NaN NaN \n",
"4 62.666667 NaN NaN \n",
"8 0.000000 NaN NaN \n",
"1 56.593750 1000.0 0.150 \n",
"5 0.000000 2000.0 0.075 \n",
"9 61.126506 0.0 0.000 \n",
"2 50.000000 NaN NaN \n",
"6 76.000000 NaN NaN \n",
"10 70.000000 NaN NaN \n",
"3 100.000000 50.0 0.400 \n",
"7 74.062069 2000.0 0.150 \n",
"11 73.174699 2500.0 0.100 \n",
"\n",
" conversions_over_clicks_lag revenue_over_conversions_lag \n",
"0 NaN NaN \n",
"4 NaN NaN \n",
"8 NaN NaN \n",
"1 0.8 71.666667 \n",
"5 1.0 62.666667 \n",
"9 0.0 0.000000 \n",
"2 NaN NaN \n",
"6 NaN NaN \n",
"10 NaN NaN \n",
"3 0.5 50.000000 \n",
"7 0.5 76.000000 \n",
"11 0.5 70.000000 "
]
},
"execution_count": 148,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"for rate_name in rate_names:\n",
" decomp[f'{rate_name}_lag'] = decomp.groupby(dimensions)[rate_name].shift(1)\n",
"decomp\n"
]
},
{
"cell_type": "code",
"execution_count": 149,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>timestamp</th>\n",
" <th>dim</th>\n",
" <th>impressions</th>\n",
" <th>clicks</th>\n",
" <th>conversions</th>\n",
" <th>revenue</th>\n",
" <th>month</th>\n",
" <th>year</th>\n",
" <th>clicks_over_impressions</th>\n",
" <th>conversions_over_clicks</th>\n",
" <th>revenue_over_conversions</th>\n",
" <th>impressions_lag</th>\n",
" <th>clicks_over_impressions_lag</th>\n",
" <th>conversions_over_clicks_lag</th>\n",
" <th>revenue_over_conversions_lag</th>\n",
" <th>impressions_contribution</th>\n",
" <th>clicks_over_impressions_contribution</th>\n",
" <th>conversions_over_clicks_contribution</th>\n",
" <th>revenue_over_conversions_contribution</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2018-01-01</td>\n",
" <td>A</td>\n",
" <td>1000.0</td>\n",
" <td>150.0</td>\n",
" <td>120.0</td>\n",
" <td>8600.0</td>\n",
" <td>01</td>\n",
" <td>2018</td>\n",
" <td>0.150000</td>\n",
" <td>0.800000</td>\n",
" <td>71.666667</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2018-01-01</td>\n",
" <td>B</td>\n",
" <td>2000.0</td>\n",
" <td>150.0</td>\n",
" <td>150.0</td>\n",
" <td>9400.0</td>\n",
" <td>01</td>\n",
" <td>2018</td>\n",
" <td>0.075000</td>\n",
" <td>1.000000</td>\n",
" <td>62.666667</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>2018-01-01</td>\n",
" <td>C</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>01</td>\n",
" <td>2018</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2019-01-01</td>\n",
" <td>A</td>\n",
" <td>1000.0</td>\n",
" <td>120.0</td>\n",
" <td>160.0</td>\n",
" <td>9055.0</td>\n",
" <td>01</td>\n",
" <td>2019</td>\n",
" <td>0.120000</td>\n",
" <td>1.333333</td>\n",
" <td>56.593750</td>\n",
" <td>1000.0</td>\n",
" <td>0.150</td>\n",
" <td>0.8</td>\n",
" <td>71.666667</td>\n",
" <td>0.0</td>\n",
" <td>-1720.0</td>\n",
" <td>4586.666667</td>\n",
" <td>-2411.666667</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>2019-01-01</td>\n",
" <td>B</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>01</td>\n",
" <td>2019</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>2000.0</td>\n",
" <td>0.075</td>\n",
" <td>1.0</td>\n",
" <td>62.666667</td>\n",
" <td>-9400.0</td>\n",
" <td>-0.0</td>\n",
" <td>-0.000000</td>\n",
" <td>-0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>2019-01-01</td>\n",
" <td>C</td>\n",
" <td>2000.0</td>\n",
" <td>400.0</td>\n",
" <td>166.0</td>\n",
" <td>10147.0</td>\n",
" <td>01</td>\n",
" <td>2019</td>\n",
" <td>0.200000</td>\n",
" <td>0.415000</td>\n",
" <td>61.126506</td>\n",
" <td>0.0</td>\n",
" <td>0.000</td>\n",
" <td>0.0</td>\n",
" <td>0.000000</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.000000</td>\n",
" <td>10147.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2018-02-01</td>\n",
" <td>A</td>\n",
" <td>50.0</td>\n",
" <td>20.0</td>\n",
" <td>10.0</td>\n",
" <td>500.0</td>\n",
" <td>02</td>\n",
" <td>2018</td>\n",
" <td>0.400000</td>\n",
" <td>0.500000</td>\n",
" <td>50.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>2018-02-01</td>\n",
" <td>B</td>\n",
" <td>2000.0</td>\n",
" <td>300.0</td>\n",
" <td>150.0</td>\n",
" <td>11400.0</td>\n",
" <td>02</td>\n",
" <td>2018</td>\n",
" <td>0.150000</td>\n",
" <td>0.500000</td>\n",
" <td>76.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>2018-02-01</td>\n",
" <td>C</td>\n",
" <td>2500.0</td>\n",
" <td>250.0</td>\n",
" <td>125.0</td>\n",
" <td>8750.0</td>\n",
" <td>02</td>\n",
" <td>2018</td>\n",
" <td>0.100000</td>\n",
" <td>0.500000</td>\n",
" <td>70.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>2019-02-01</td>\n",
" <td>A</td>\n",
" <td>2500.0</td>\n",
" <td>1000.0</td>\n",
" <td>500.0</td>\n",
" <td>50000.0</td>\n",
" <td>02</td>\n",
" <td>2019</td>\n",
" <td>0.400000</td>\n",
" <td>0.500000</td>\n",
" <td>100.000000</td>\n",
" <td>50.0</td>\n",
" <td>0.400</td>\n",
" <td>0.5</td>\n",
" <td>50.000000</td>\n",
" <td>24500.0</td>\n",
" <td>0.0</td>\n",
" <td>0.000000</td>\n",
" <td>25000.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>2019-02-01</td>\n",
" <td>B</td>\n",
" <td>2150.0</td>\n",
" <td>323.0</td>\n",
" <td>145.0</td>\n",
" <td>10739.0</td>\n",
" <td>02</td>\n",
" <td>2019</td>\n",
" <td>0.150233</td>\n",
" <td>0.448916</td>\n",
" <td>74.062069</td>\n",
" <td>2000.0</td>\n",
" <td>0.150</td>\n",
" <td>0.5</td>\n",
" <td>76.000000</td>\n",
" <td>855.0</td>\n",
" <td>19.0</td>\n",
" <td>-1254.000000</td>\n",
" <td>-281.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>2019-02-01</td>\n",
" <td>C</td>\n",
" <td>2000.0</td>\n",
" <td>320.0</td>\n",
" <td>166.0</td>\n",
" <td>12147.0</td>\n",
" <td>02</td>\n",
" <td>2019</td>\n",
" <td>0.160000</td>\n",
" <td>0.518750</td>\n",
" <td>73.174699</td>\n",
" <td>2500.0</td>\n",
" <td>0.100</td>\n",
" <td>0.5</td>\n",
" <td>70.000000</td>\n",
" <td>-1750.0</td>\n",
" <td>4200.0</td>\n",
" <td>420.000000</td>\n",
" <td>527.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" timestamp dim impressions clicks conversions revenue month year \\\n",
"0 2018-01-01 A 1000.0 150.0 120.0 8600.0 01 2018 \n",
"4 2018-01-01 B 2000.0 150.0 150.0 9400.0 01 2018 \n",
"8 2018-01-01 C 0.0 0.0 0.0 0.0 01 2018 \n",
"1 2019-01-01 A 1000.0 120.0 160.0 9055.0 01 2019 \n",
"5 2019-01-01 B 0.0 0.0 0.0 0.0 01 2019 \n",
"9 2019-01-01 C 2000.0 400.0 166.0 10147.0 01 2019 \n",
"2 2018-02-01 A 50.0 20.0 10.0 500.0 02 2018 \n",
"6 2018-02-01 B 2000.0 300.0 150.0 11400.0 02 2018 \n",
"10 2018-02-01 C 2500.0 250.0 125.0 8750.0 02 2018 \n",
"3 2019-02-01 A 2500.0 1000.0 500.0 50000.0 02 2019 \n",
"7 2019-02-01 B 2150.0 323.0 145.0 10739.0 02 2019 \n",
"11 2019-02-01 C 2000.0 320.0 166.0 12147.0 02 2019 \n",
"\n",
" clicks_over_impressions conversions_over_clicks \\\n",
"0 0.150000 0.800000 \n",
"4 0.075000 1.000000 \n",
"8 0.000000 0.000000 \n",
"1 0.120000 1.333333 \n",
"5 0.000000 0.000000 \n",
"9 0.200000 0.415000 \n",
"2 0.400000 0.500000 \n",
"6 0.150000 0.500000 \n",
"10 0.100000 0.500000 \n",
"3 0.400000 0.500000 \n",
"7 0.150233 0.448916 \n",
"11 0.160000 0.518750 \n",
"\n",
" revenue_over_conversions impressions_lag clicks_over_impressions_lag \\\n",
"0 71.666667 NaN NaN \n",
"4 62.666667 NaN NaN \n",
"8 0.000000 NaN NaN \n",
"1 56.593750 1000.0 0.150 \n",
"5 0.000000 2000.0 0.075 \n",
"9 61.126506 0.0 0.000 \n",
"2 50.000000 NaN NaN \n",
"6 76.000000 NaN NaN \n",
"10 70.000000 NaN NaN \n",
"3 100.000000 50.0 0.400 \n",
"7 74.062069 2000.0 0.150 \n",
"11 73.174699 2500.0 0.100 \n",
"\n",
" conversions_over_clicks_lag revenue_over_conversions_lag \\\n",
"0 NaN NaN \n",
"4 NaN NaN \n",
"8 NaN NaN \n",
"1 0.8 71.666667 \n",
"5 1.0 62.666667 \n",
"9 0.0 0.000000 \n",
"2 NaN NaN \n",
"6 NaN NaN \n",
"10 NaN NaN \n",
"3 0.5 50.000000 \n",
"7 0.5 76.000000 \n",
"11 0.5 70.000000 \n",
"\n",
" impressions_contribution clicks_over_impressions_contribution \\\n",
"0 NaN NaN \n",
"4 NaN NaN \n",
"8 NaN NaN \n",
"1 0.0 -1720.0 \n",
"5 -9400.0 -0.0 \n",
"9 0.0 0.0 \n",
"2 NaN NaN \n",
"6 NaN NaN \n",
"10 NaN NaN \n",
"3 24500.0 0.0 \n",
"7 855.0 19.0 \n",
"11 -1750.0 4200.0 \n",
"\n",
" conversions_over_clicks_contribution \\\n",
"0 NaN \n",
"4 NaN \n",
"8 NaN \n",
"1 4586.666667 \n",
"5 -0.000000 \n",
"9 0.000000 \n",
"2 NaN \n",
"6 NaN \n",
"10 NaN \n",
"3 0.000000 \n",
"7 -1254.000000 \n",
"11 420.000000 \n",
"\n",
" revenue_over_conversions_contribution \n",
"0 NaN \n",
"4 NaN \n",
"8 NaN \n",
"1 -2411.666667 \n",
"5 -0.000000 \n",
"9 10147.000000 \n",
"2 NaN \n",
"6 NaN \n",
"10 NaN \n",
"3 25000.000000 \n",
"7 -281.000000 \n",
"11 527.000000 "
]
},
"execution_count": 149,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"for i, _ in enumerate(rate_names):\n",
"\n",
" before = ' * '.join(rate_names[:i])\n",
" current = f'({rate_names[i]} - {rate_names[i]}_lag)'\n",
" after = ' * '.join(f'{x}_lag' for x in rate_names[i+1:])\n",
" formula = ' * '.join(filter(None, [before, current, after]))\n",
"\n",
" decomp[f'{rate_names[i]}_contribution'] = decomp.eval(formula)\n",
"\n",
"decomp\n"
]
},
{
"cell_type": "code",
"execution_count": 150,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"year\n",
"2018 0.0\n",
"2019 53438.0\n",
"dtype: float64"
]
},
"execution_count": 150,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"(\n",
" decomp\n",
" .assign(year=lambda x: pd.to_datetime(x['timestamp']).dt.year)\n",
" .groupby('year')[contribution_columns].sum().sum(axis=1)\n",
")\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.0"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Display the source blob
Display the rendered blob
Raw
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment