Skip to content

Instantly share code, notes, and snippets.

@itsderek23
itsderek23 / booklet_ai_policy.json
Created Mar 19, 2020
Booklet.ai AWS JSON Policy
View booklet_ai_policy.json
{
"Version": "2012-10-17",
"Statement": [
{
"Sid": "VisualEditor0",
"Effect": "Allow",
"Action": [
"sagemaker:ListEndpointConfigs",
"sagemaker:DescribeEndpointConfig",
"sagemaker:ListModels",
@itsderek23
itsderek23 / remote.html
Created Oct 21, 2019
Demo Remote URL for Electron Hybrid App
View remote.html
<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8">
<title>Remote Hello World!</title>
<link rel="stylesheet" href="index.css">
</head>
<body>
<h1>Say Hello!</h1>
<form>
@itsderek23
itsderek23 / impression_outliers_print.py
Created Jul 10, 2019
SERP Analysis - Abnormal Impressions by Query
View impression_outliers_print.py
from sklearn.ensemble import IsolationForest
def print_anomalies(query,column):
df_anom = df[(df['query'] == query) & (df['device'] == 'desktop')]
x=df_anom[column].values
xx = np.linspace(df_anom[column].min(), df_anom[column].max(), len(df)).reshape(-1,1)
isolation_forest = IsolationForest(n_estimators=100)
isolation_forest.fit(x.reshape(-1, 1))
@itsderek23
itsderek23 / impression_outliers_plot.py
Created Jul 10, 2019
SERP Analysis - Plot Impression Outliers
View impression_outliers_plot.py
from sklearn.ensemble import IsolationForest
def plot_anomalies(query,column):
df_anom = df[(df['query'] == query) & (df['device'] == 'desktop')]
x=df_anom[column].values
xx = np.linspace(df_anom[column].min(), df_anom[column].max(), len(df)).reshape(-1,1)
isolation_forest = IsolationForest(n_estimators=100)
isolation_forest.fit(x.reshape(-1, 1))
@itsderek23
itsderek23 / top_ten_by_click.py
Created Jul 10, 2019
SERP Analysis - Top 10 Queries by Click
View top_ten_by_click.py
top_queries_by_clicks = (df_by_query
.sort_values("clicks", ascending=False)
.head(10)
.index.values
)
@itsderek23
itsderek23 / gsc_csvs_to_dataframe.py
Created Jul 10, 2019
SERP Analysis - Load CSV into a Pandas Dataframe
View gsc_csvs_to_dataframe.py
import os
import re
import dateparser
import pandas as pd
# [keys, row['clicks'], row['impressions'], row['ctr'], row['position']]
# cpu steal,3.0,4.0,0.75,1.0,gsc_property,worldwide,mobile,
HEADERS = {0:"query", 1: "clicks", 2: "impressions", 3: "ctr", 4: "position", 5: "property",
6: "location", 7: "device"}
@itsderek23
itsderek23 / lost_keywords.py
Created Jul 6, 2019
SERP Analysis - Lost Keywords
View lost_keywords.py
# for each query, find min & max date
df_by_query_date = df.groupby("query").aggregate({"date": ["min","max"], "clicks": "sum",
"impressions": "sum", "position": "mean"}).sort_values(("impressions","sum"),ascending=False)
# filter to just queries in above the median in impressions
df_by_query_date = df_by_query_date[df_by_query_date[("impressions","sum")] >= df_by_query_date[("impressions","sum")].quantile(0.50)]
# find all queries that haven't appeared in the last two weeks
df_by_query_date[df_by_query_date[("date","max")] < datetime.datetime.now() - datetime.timedelta(days=14)].head(5)
@itsderek23
itsderek23 / overperforming_keywords.py
Last active Jul 5, 2019
SERP Analysis - Overperforming Keywords
View overperforming_keywords.py
# df_by_query is from https://gist.github.com/itsderek23/41cd10943201e7b664619c6cd15f409d
# Limit to queries with at least 1 click and in the 60th quartile or greater by position.
# Limit to queries with a CTR > the 0.5% quartile w/at least 1 click. This sounds low, but many queries will have zero clicks.
df_by_query_low_positions = df_by_query[(df_by_query.ctr > 0) & (df_by_query.position >= df_by_query.position.quantile(0.4))]
(df_by_query_low_positions[df_by_query_low_positions.ctr >= df_by_query[df_by_query.ctr > 0].ctr.quantile(0.05)]
.sort_values("impressions",ascending=False).head(10)
)
@itsderek23
itsderek23 / underperforming_keywords.py
Created Jul 5, 2019
SERP Analysis - Underperforming Keywords
View underperforming_keywords.py
# df_by_query is generated from https://gist.github.com/itsderek23/41cd10943201e7b664619c6cd15f409d
# Limits to queries with the top 20% of impressions
# Limits to queries in the bottom 20% of CTR
df_by_query_top_impressions = df_by_query[df_by_query.impressions >= df_by_query.impressions.quantile(0.8)]
(df_by_query_top_impressions[df_by_query_top_impressions.ctr <= df_by_query_top_impressions.ctr.quantile(0.2)]
.sort_values("impressions",ascending=False)
.head(10)
)
@itsderek23
itsderek23 / plot_ctr_by_position.py
Created Jul 5, 2019
SERP Analysis - Plot Click Through Rate (CTR) by Position
View plot_ctr_by_position.py
df_by_position = df.groupby(df.position.apply(lambda x: round(x, 0))).agg({
"clicks": "sum", "impressions": "sum"
})
df_by_position['ctr'] = (df_by_position.clicks / df_by_position.impressions)*100
df_by_position[df_by_position.index <= 20].ctr.plot(title="Click Through Rate (CTR) by Position")
You can’t perform that action at this time.