Skip to content

Instantly share code, notes, and snippets.

View itsderek23's full-sized avatar

Derek Haynes itsderek23

View GitHub Profile
defmodule SlowLogger.Ecto do
require Logger
defmacro __using__(_args) do
quote unquote: false do
contents = quote do
require Logger
def aggregate(a,b,c) do
__trace(fn -> unquote(__MODULE__).aggregate(a,b,c) end)
@itsderek23
itsderek23 / scout_apm_absinthe_plug.ex
Last active March 4, 2020 06:58
Scout Absinthe (GraphQL) Instrumentation
defmodule ScoutApm.Absinthe.Plug do
alias ScoutApm.Internal.Layer
def init(default), do: default
def call(conn, _default) do
ScoutApm.TrackedRequest.start_layer("Controller", action_name(conn))
conn
|> Plug.Conn.register_before_send(&before_send/1)
@itsderek23
itsderek23 / remote.html
Created October 21, 2019 19:38
Demo Remote URL for Electron Hybrid App
<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8">
<title>Remote Hello World!</title>
<link rel="stylesheet" href="index.css">
</head>
<body>
<h1>Say Hello!</h1>
<form>
@itsderek23
itsderek23 / impression_outliers_print.py
Created July 10, 2019 19:15
SERP Analysis - Abnormal Impressions by Query
from sklearn.ensemble import IsolationForest
def print_anomalies(query,column):
df_anom = df[(df['query'] == query) & (df['device'] == 'desktop')]
x=df_anom[column].values
xx = np.linspace(df_anom[column].min(), df_anom[column].max(), len(df)).reshape(-1,1)
isolation_forest = IsolationForest(n_estimators=100)
isolation_forest.fit(x.reshape(-1, 1))
@itsderek23
itsderek23 / impression_outliers_plot.py
Created July 10, 2019 19:01
SERP Analysis - Plot Impression Outliers
from sklearn.ensemble import IsolationForest
def plot_anomalies(query,column):
df_anom = df[(df['query'] == query) & (df['device'] == 'desktop')]
x=df_anom[column].values
xx = np.linspace(df_anom[column].min(), df_anom[column].max(), len(df)).reshape(-1,1)
isolation_forest = IsolationForest(n_estimators=100)
isolation_forest.fit(x.reshape(-1, 1))
@itsderek23
itsderek23 / top_ten_by_click.py
Created July 10, 2019 18:54
SERP Analysis - Top 10 Queries by Click
top_queries_by_clicks = (df_by_query
.sort_values("clicks", ascending=False)
.head(10)
.index.values
)
@itsderek23
itsderek23 / gsc_csvs_to_dataframe.py
Created July 10, 2019 18:37
SERP Analysis - Load CSV into a Pandas Dataframe
import os
import re
import dateparser
import pandas as pd
# [keys, row['clicks'], row['impressions'], row['ctr'], row['position']]
# cpu steal,3.0,4.0,0.75,1.0,gsc_property,worldwide,mobile,
HEADERS = {0:"query", 1: "clicks", 2: "impressions", 3: "ctr", 4: "position", 5: "property",
6: "location", 7: "device"}
@itsderek23
itsderek23 / lost_keywords.py
Created July 6, 2019 20:26
SERP Analysis - Lost Keywords
# for each query, find min & max date
df_by_query_date = df.groupby("query").aggregate({"date": ["min","max"], "clicks": "sum",
"impressions": "sum", "position": "mean"}).sort_values(("impressions","sum"),ascending=False)
# filter to just queries in above the median in impressions
df_by_query_date = df_by_query_date[df_by_query_date[("impressions","sum")] >= df_by_query_date[("impressions","sum")].quantile(0.50)]
# find all queries that haven't appeared in the last two weeks
df_by_query_date[df_by_query_date[("date","max")] < datetime.datetime.now() - datetime.timedelta(days=14)].head(5)
@itsderek23
itsderek23 / overperforming_keywords.py
Last active July 5, 2019 16:37
SERP Analysis - Overperforming Keywords
# df_by_query is from https://gist.github.com/itsderek23/41cd10943201e7b664619c6cd15f409d
# Limit to queries with at least 1 click and in the 60th quartile or greater by position.
# Limit to queries with a CTR > the 0.5% quartile w/at least 1 click. This sounds low, but many queries will have zero clicks.
df_by_query_low_positions = df_by_query[(df_by_query.ctr > 0) & (df_by_query.position >= df_by_query.position.quantile(0.4))]
(df_by_query_low_positions[df_by_query_low_positions.ctr >= df_by_query[df_by_query.ctr > 0].ctr.quantile(0.05)]
.sort_values("impressions",ascending=False).head(10)
)
@itsderek23
itsderek23 / underperforming_keywords.py
Created July 5, 2019 16:08
SERP Analysis - Underperforming Keywords
# df_by_query is generated from https://gist.github.com/itsderek23/41cd10943201e7b664619c6cd15f409d
# Limits to queries with the top 20% of impressions
# Limits to queries in the bottom 20% of CTR
df_by_query_top_impressions = df_by_query[df_by_query.impressions >= df_by_query.impressions.quantile(0.8)]
(df_by_query_top_impressions[df_by_query_top_impressions.ctr <= df_by_query_top_impressions.ctr.quantile(0.2)]
.sort_values("impressions",ascending=False)
.head(10)
)