Skip to content

Instantly share code, notes, and snippets.

View itsderek23's full-sized avatar

Derek Haynes itsderek23

View GitHub Profile
@itsderek23
itsderek23 / lost_keywords.py
Created July 6, 2019 20:26
SERP Analysis - Lost Keywords
# for each query, find min & max date
df_by_query_date = df.groupby("query").aggregate({"date": ["min","max"], "clicks": "sum",
"impressions": "sum", "position": "mean"}).sort_values(("impressions","sum"),ascending=False)
# filter to just queries in above the median in impressions
df_by_query_date = df_by_query_date[df_by_query_date[("impressions","sum")] >= df_by_query_date[("impressions","sum")].quantile(0.50)]
# find all queries that haven't appeared in the last two weeks
df_by_query_date[df_by_query_date[("date","max")] < datetime.datetime.now() - datetime.timedelta(days=14)].head(5)
@itsderek23
itsderek23 / overperforming_keywords.py
Last active July 5, 2019 16:37
SERP Analysis - Overperforming Keywords
# df_by_query is from https://gist.github.com/itsderek23/41cd10943201e7b664619c6cd15f409d
# Limit to queries with at least 1 click and in the 60th quartile or greater by position.
# Limit to queries with a CTR > the 0.5% quartile w/at least 1 click. This sounds low, but many queries will have zero clicks.
df_by_query_low_positions = df_by_query[(df_by_query.ctr > 0) & (df_by_query.position >= df_by_query.position.quantile(0.4))]
(df_by_query_low_positions[df_by_query_low_positions.ctr >= df_by_query[df_by_query.ctr > 0].ctr.quantile(0.05)]
.sort_values("impressions",ascending=False).head(10)
)
@itsderek23
itsderek23 / underperforming_keywords.py
Created July 5, 2019 16:08
SERP Analysis - Underperforming Keywords
# df_by_query is generated from https://gist.github.com/itsderek23/41cd10943201e7b664619c6cd15f409d
# Limits to queries with the top 20% of impressions
# Limits to queries in the bottom 20% of CTR
df_by_query_top_impressions = df_by_query[df_by_query.impressions >= df_by_query.impressions.quantile(0.8)]
(df_by_query_top_impressions[df_by_query_top_impressions.ctr <= df_by_query_top_impressions.ctr.quantile(0.2)]
.sort_values("impressions",ascending=False)
.head(10)
)
@itsderek23
itsderek23 / plot_ctr_by_position.py
Created July 5, 2019 15:29
SERP Analysis - Plot Click Through Rate (CTR) by Position
df_by_position = df.groupby(df.position.apply(lambda x: round(x, 0))).agg({
"clicks": "sum", "impressions": "sum"
})
df_by_position['ctr'] = (df_by_position.clicks / df_by_position.impressions)*100
df_by_position[df_by_position.index <= 20].ctr.plot(title="Click Through Rate (CTR) by Position")
@itsderek23
itsderek23 / metrics_by_query.py
Created July 5, 2019 15:20
SERP Analysis - Clicks, impressions, position, and CTR by query
df_by_query = (df
.groupby("query")
.agg({"clicks": "sum", "impressions": "sum", "position": "mean"})
.sort_values("clicks",ascending=False))
df_by_query["ctr"] = (df_by_query.clicks / df_by_query.impressions)*100
df_by_query.head(5)
@itsderek23
itsderek23 / plot_total_clicks_over_time.py
Last active July 5, 2019 15:15
SERP Analysis - Plot Total Clicks over Time
df.groupby("date").agg({"clicks": "sum"}).plot(title="Total Clicks over Time")
@itsderek23
itsderek23 / hubspot_contacts_by_state.py
Created June 5, 2019 20:58
Hubspot Contacts by state with Pandas and PetalData
(contacts.df
.groupby("ip_state")
.agg({"created": "count"})
.sort_values(by="created",ascending=False)
).rename(columns={"created": "number_of_contacts"})
@itsderek23
itsderek23 / hubspot_contacts_plot.py
Created June 5, 2019 20:40
Plot of Hubspot Total Contacts over Time w/Pandas
import matplotlib
import matplotlib.pyplot as plt
grouped = contacts.df.groupby(pd.Grouper(key="created",freq="M")).agg({"created": "count"})
grouped.rename(columns={"created": "new_contacts"},inplace=True)
grouped.cumsum().plot(title="Total Hubspot Contacts")
@itsderek23
itsderek23 / hubspot_signups_by_month.py
Last active June 5, 2019 20:39
Hubspot Signups by Month w/ Pandas and PetalData
grouped = df.groupby(pd.Grouper(key="created",freq="M")).agg({"created": "count"})
grouped.rename(columns={"created": "new_contacts"},inplace=True)
@itsderek23
itsderek23 / export_hubspot_contacts.py
Created June 5, 2019 19:55
Export all Hubspot contacts with PetalData
import petaldata
# https://petaldata.app/datasets/hubspot/#auth
petaldata.datasets.hubspot.api_key = "[YOUR HUBSPOT API KEY]"
contacts = petaldata.datasets.hubspot.Contacts()
# Will download ALL hubspot contacts.
# See download options at https://petaldata.app/datasets/hubspot/#contacts-download.
contacts.download()
contacts.save() # Save locally so we don't need to download everything again.