Skip to content

Instantly share code, notes, and snippets.

Derek Haynes itsderek23

Block or report user

Report or block itsderek23

Hide content and notifications from this user.

Learn more about blocking users

Contact Support about this user’s behavior.

Learn more about reporting abuse

Report abuse
View GitHub Profile
@itsderek23
itsderek23 / impression_outliers_print.py
Created Jul 10, 2019
SERP Analysis - Abnormal Impressions by Query
View impression_outliers_print.py
from sklearn.ensemble import IsolationForest
def print_anomalies(query,column):
df_anom = df[(df['query'] == query) & (df['device'] == 'desktop')]
x=df_anom[column].values
xx = np.linspace(df_anom[column].min(), df_anom[column].max(), len(df)).reshape(-1,1)
isolation_forest = IsolationForest(n_estimators=100)
isolation_forest.fit(x.reshape(-1, 1))
@itsderek23
itsderek23 / impression_outliers_plot.py
Created Jul 10, 2019
SERP Analysis - Plot Impression Outliers
View impression_outliers_plot.py
from sklearn.ensemble import IsolationForest
def plot_anomalies(query,column):
df_anom = df[(df['query'] == query) & (df['device'] == 'desktop')]
x=df_anom[column].values
xx = np.linspace(df_anom[column].min(), df_anom[column].max(), len(df)).reshape(-1,1)
isolation_forest = IsolationForest(n_estimators=100)
isolation_forest.fit(x.reshape(-1, 1))
@itsderek23
itsderek23 / top_ten_by_click.py
Created Jul 10, 2019
SERP Analysis - Top 10 Queries by Click
View top_ten_by_click.py
top_queries_by_clicks = (df_by_query
.sort_values("clicks", ascending=False)
.head(10)
.index.values
)
@itsderek23
itsderek23 / gsc_csvs_to_dataframe.py
Created Jul 10, 2019
SERP Analysis - Load CSV into a Pandas Dataframe
View gsc_csvs_to_dataframe.py
import os
import re
import dateparser
import pandas as pd
# [keys, row['clicks'], row['impressions'], row['ctr'], row['position']]
# cpu steal,3.0,4.0,0.75,1.0,gsc_property,worldwide,mobile,
HEADERS = {0:"query", 1: "clicks", 2: "impressions", 3: "ctr", 4: "position", 5: "property",
6: "location", 7: "device"}
@itsderek23
itsderek23 / lost_keywords.py
Created Jul 6, 2019
SERP Analysis - Lost Keywords
View lost_keywords.py
# for each query, find min & max date
df_by_query_date = df.groupby("query").aggregate({"date": ["min","max"], "clicks": "sum",
"impressions": "sum", "position": "mean"}).sort_values(("impressions","sum"),ascending=False)
# filter to just queries in above the median in impressions
df_by_query_date = df_by_query_date[df_by_query_date[("impressions","sum")] >= df_by_query_date[("impressions","sum")].quantile(0.50)]
# find all queries that haven't appeared in the last two weeks
df_by_query_date[df_by_query_date[("date","max")] < datetime.datetime.now() - datetime.timedelta(days=14)].head(5)
@itsderek23
itsderek23 / overperforming_keywords.py
Last active Jul 5, 2019
SERP Analysis - Overperforming Keywords
View overperforming_keywords.py
# df_by_query is from https://gist.github.com/itsderek23/41cd10943201e7b664619c6cd15f409d
# Limit to queries with at least 1 click and in the 60th quartile or greater by position.
# Limit to queries with a CTR > the 0.5% quartile w/at least 1 click. This sounds low, but many queries will have zero clicks.
df_by_query_low_positions = df_by_query[(df_by_query.ctr > 0) & (df_by_query.position >= df_by_query.position.quantile(0.4))]
(df_by_query_low_positions[df_by_query_low_positions.ctr >= df_by_query[df_by_query.ctr > 0].ctr.quantile(0.05)]
.sort_values("impressions",ascending=False).head(10)
)
@itsderek23
itsderek23 / underperforming_keywords.py
Created Jul 5, 2019
SERP Analysis - Underperforming Keywords
View underperforming_keywords.py
# df_by_query is generated from https://gist.github.com/itsderek23/41cd10943201e7b664619c6cd15f409d
# Limits to queries with the top 20% of impressions
# Limits to queries in the bottom 20% of CTR
df_by_query_top_impressions = df_by_query[df_by_query.impressions >= df_by_query.impressions.quantile(0.8)]
(df_by_query_top_impressions[df_by_query_top_impressions.ctr <= df_by_query_top_impressions.ctr.quantile(0.2)]
.sort_values("impressions",ascending=False)
.head(10)
)
@itsderek23
itsderek23 / plot_ctr_by_position.py
Created Jul 5, 2019
SERP Analysis - Plot Click Through Rate (CTR) by Position
View plot_ctr_by_position.py
df_by_position = df.groupby(df.position.apply(lambda x: round(x, 0))).agg({
"clicks": "sum", "impressions": "sum"
})
df_by_position['ctr'] = (df_by_position.clicks / df_by_position.impressions)*100
df_by_position[df_by_position.index <= 20].ctr.plot(title="Click Through Rate (CTR) by Position")
@itsderek23
itsderek23 / metrics_by_query.py
Created Jul 5, 2019
SERP Analysis - Clicks, impressions, position, and CTR by query
View metrics_by_query.py
df_by_query = (df
.groupby("query")
.agg({"clicks": "sum", "impressions": "sum", "position": "mean"})
.sort_values("clicks",ascending=False))
df_by_query["ctr"] = (df_by_query.clicks / df_by_query.impressions)*100
df_by_query.head(5)
@itsderek23
itsderek23 / plot_total_clicks_over_time.py
Last active Jul 5, 2019
SERP Analysis - Plot Total Clicks over Time
View plot_total_clicks_over_time.py
df.groupby("date").agg({"clicks": "sum"}).plot(title="Total Clicks over Time")
You can’t perform that action at this time.