Skip to content

Instantly share code, notes, and snippets.

@itsderek23
Created July 10, 2019 19:01
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save itsderek23/058359ff66dd4627e54bf0989fda56c5 to your computer and use it in GitHub Desktop.
Save itsderek23/058359ff66dd4627e54bf0989fda56c5 to your computer and use it in GitHub Desktop.
SERP Analysis - Plot Impression Outliers
from sklearn.ensemble import IsolationForest
def plot_anomalies(query,column):
df_anom = df[(df['query'] == query) & (df['device'] == 'desktop')]
x=df_anom[column].values
xx = np.linspace(df_anom[column].min(), df_anom[column].max(), len(df)).reshape(-1,1)
isolation_forest = IsolationForest(n_estimators=100)
isolation_forest.fit(x.reshape(-1, 1))
anomaly_score = isolation_forest.decision_function(xx)
# 1 = inlier, 0 = outlier
outlier = isolation_forest.predict(xx)
plt.figure(figsize=(10,4))
plt.plot(xx, anomaly_score, label='anomaly score')
plt.fill_between(xx.T[0], np.min(anomaly_score), np.max(anomaly_score),
where=outlier==-1, color='r',
alpha=.4, label='outlier region')
plt.legend()
plt.ylabel('anomaly score')
plt.xlabel(column)
plt.title("{query} {column} Anomalies".format(query=query,column=column))
for q in top_queries_by_clicks:
plot_anomalies(q,'impressions')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment