This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# for each query, find min & max date | |
df_by_query_date = df.groupby("query").aggregate({"date": ["min","max"], "clicks": "sum", | |
"impressions": "sum", "position": "mean"}).sort_values(("impressions","sum"),ascending=False) | |
# filter to just queries in above the median in impressions | |
df_by_query_date = df_by_query_date[df_by_query_date[("impressions","sum")] >= df_by_query_date[("impressions","sum")].quantile(0.50)] | |
# find all queries that haven't appeared in the last two weeks | |
df_by_query_date[df_by_query_date[("date","max")] < datetime.datetime.now() - datetime.timedelta(days=14)].head(5) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# df_by_query is from https://gist.github.com/itsderek23/41cd10943201e7b664619c6cd15f409d | |
# Limit to queries with at least 1 click and in the 60th quartile or greater by position. | |
# Limit to queries with a CTR > the 0.5% quartile w/at least 1 click. This sounds low, but many queries will have zero clicks. | |
df_by_query_low_positions = df_by_query[(df_by_query.ctr > 0) & (df_by_query.position >= df_by_query.position.quantile(0.4))] | |
(df_by_query_low_positions[df_by_query_low_positions.ctr >= df_by_query[df_by_query.ctr > 0].ctr.quantile(0.05)] | |
.sort_values("impressions",ascending=False).head(10) | |
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# df_by_query is generated from https://gist.github.com/itsderek23/41cd10943201e7b664619c6cd15f409d | |
# Limits to queries with the top 20% of impressions | |
# Limits to queries in the bottom 20% of CTR | |
df_by_query_top_impressions = df_by_query[df_by_query.impressions >= df_by_query.impressions.quantile(0.8)] | |
(df_by_query_top_impressions[df_by_query_top_impressions.ctr <= df_by_query_top_impressions.ctr.quantile(0.2)] | |
.sort_values("impressions",ascending=False) | |
.head(10) | |
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
df_by_position = df.groupby(df.position.apply(lambda x: round(x, 0))).agg({ | |
"clicks": "sum", "impressions": "sum" | |
}) | |
df_by_position['ctr'] = (df_by_position.clicks / df_by_position.impressions)*100 | |
df_by_position[df_by_position.index <= 20].ctr.plot(title="Click Through Rate (CTR) by Position") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
df_by_query = (df | |
.groupby("query") | |
.agg({"clicks": "sum", "impressions": "sum", "position": "mean"}) | |
.sort_values("clicks",ascending=False)) | |
df_by_query["ctr"] = (df_by_query.clicks / df_by_query.impressions)*100 | |
df_by_query.head(5) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
df.groupby("date").agg({"clicks": "sum"}).plot(title="Total Clicks over Time") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(contacts.df | |
.groupby("ip_state") | |
.agg({"created": "count"}) | |
.sort_values(by="created",ascending=False) | |
).rename(columns={"created": "number_of_contacts"}) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import matplotlib | |
import matplotlib.pyplot as plt | |
grouped = contacts.df.groupby(pd.Grouper(key="created",freq="M")).agg({"created": "count"}) | |
grouped.rename(columns={"created": "new_contacts"},inplace=True) | |
grouped.cumsum().plot(title="Total Hubspot Contacts") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
grouped = df.groupby(pd.Grouper(key="created",freq="M")).agg({"created": "count"}) | |
grouped.rename(columns={"created": "new_contacts"},inplace=True) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import petaldata | |
# https://petaldata.app/datasets/hubspot/#auth | |
petaldata.datasets.hubspot.api_key = "[YOUR HUBSPOT API KEY]" | |
contacts = petaldata.datasets.hubspot.Contacts() | |
# Will download ALL hubspot contacts. | |
# See download options at https://petaldata.app/datasets/hubspot/#contacts-download. | |
contacts.download() | |
contacts.save() # Save locally so we don't need to download everything again. |