Created
January 17, 2019 10:27
-
-
Save semyont/dd2a261f262e92b0a07b2c2195eeec2e to your computer and use it in GitHub Desktop.
Pandas Group By Aggregate CTR Weighted Average on Campaigns Mediums Example
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
__author__='Semyon Teplisky' | |
import functools | |
import pandas as pd | |
import numpy as np | |
raw_data = [{'campaign_id':1111,'medium_id':'msn','campaign_name': 'viral #1', 'clicks':100, 'ctr':0.7}, | |
{'campaign_id':1111,'medium_id':'aol','campaign_name': 'viral #1', 'clicks':250, 'ctr':0.4}, | |
{'campaign_id':1111,'medium_id':'google','campaign_name': 'viral #1', 'clicks':500, 'ctr':0.1}] | |
data = pd.DataFrame(raw_data) | |
# Option 1 Advanced with functools | |
def wavg(val, df, weight): | |
try: | |
w = df.ix[val.index][weight] | |
return (val * w).sum() / w.sum() | |
except ZeroDivisionError: | |
return val.mean() | |
# Binding df to function with functools | |
fwavg = functools.partial(wavg, df=data, weight='clicks') | |
gdata = data.groupby(['campaign_id']).agg({"conversions":np.sum, "impressions":np.sum, | |
"spend":np.sum,"clicks":np.sum, "ctr":fwavg}).reset_index() | |
gdata.fillna(0.0, inplace=True) | |
gdata.head() | |
# Option 2 Math Workaround | |
data['ctr'] = data['clicks'] * data['ctr'] | |
gdata = data.groupby(['campaign_id']).agg({"conversions":np.sum, "impressions":np.sum, | |
"spend":np.sum,"clicks":np.sum, "ctr":np.sum}).reset_index() | |
gdata['ctr'] = gdata['ctr'] / gdata['clicks'] | |
gdata.head() | |
# Option 3 Two DF | |
def wavg(group, avg_name, weight_name): | |
val = group[avg_name] | |
w = group[weight_name] | |
try: | |
return (val * w).sum() / w.sum() | |
except ZeroDivisionError: | |
return val.mean() | |
gdata = data.groupby(['campaign_id']) | |
data_ctr = gdata.apply(wavg, "ctr", "clicks") | |
data_meta = gdata.agg({"conversions":np.sum, "impressions":np.sum, "spend":np.sum,"clicks":np.sum}) | |
gdata = pd.merge(data_meta, data_ctr, on='campaign_id') | |
gdata.head() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment