Skip to content

Instantly share code, notes, and snippets.

@KTG1
Forked from eliasdabbas/serp_heatmap.py
Created October 5, 2022 12:31
Show Gist options
  • Save KTG1/e91068d3cd2c8e880daf41f2e716aa00 to your computer and use it in GitHub Desktop.
Save KTG1/e91068d3cd2c8e880daf41f2e716aa00 to your computer and use it in GitHub Desktop.
Create a heatmap of SERPs, using a table with columns: "keyword", "rank", and "domain"
import plotly.graph_objects as go
import pandas as pd
def serp_heatmap(df, num_domains=10, select_domain=None):
df = df.rename(columns={'domain': 'displayLink',
'searchTerms': 'keyword'})
top_domains = df['displayLink'].value_counts()[:num_domains].index.tolist()
top_domains = df['displayLink'].value_counts()[:num_domains].index.tolist()
top_df = df[df['displayLink'].isin(top_domains) & df['displayLink'].ne('')]
top_df_counts_means = (top_df
.groupby('displayLink', as_index=False)
.agg({'rank': ['count', 'mean']}))
top_df_counts_means.columns = ['displayLink', 'rank_count', 'rank_mean']
top_df = (pd.merge(top_df, top_df_counts_means)
.sort_values(['rank_count', 'rank_mean'],
ascending=[False, True]))
rank_counts = (top_df
.groupby(['displayLink', 'rank'])
.agg({'rank': ['count']})
.reset_index())
rank_counts.columns = ['displayLink', 'rank', 'count']
summary = (df
.groupby(['displayLink'], as_index=False)
.agg({'rank': ['count', 'mean']})
.sort_values(('rank', 'count'), ascending=False)
.assign(coverage=lambda df: (df[('rank', 'count')]
.div(df[('rank', 'count')]
.sum()))))
summary.columns = ['displayLink', 'count', 'avg_rank', 'coverage']
summary['displayLink'] = summary['displayLink'].str.replace('www.', '', regex=True)
summary['avg_rank'] = summary['avg_rank'].round(1)
summary['coverage'] = (summary['coverage'].mul(100)
.round(1).astype(str).add('%'))
# num_queries might need to be set manually if you have the same query
# repeated across countries, languages, etc
num_queries = df['keyword'].nunique()
fig = go.Figure()
fig.add_scatter(x=top_df['displayLink'].str.replace('www\.', '', regex=True),
y=top_df['rank'], mode='markers',
marker={'size': 30, 'opacity': 1/rank_counts['count'].max()})
fig.add_scatter(x=rank_counts['displayLink'].str.replace('www\.', '', regex=True),
y=rank_counts['rank'], mode='text',
text=rank_counts['count'])
for domain in rank_counts['displayLink'].unique():
rank_counts_subset = rank_counts[rank_counts['displayLink'] == domain]
fig.add_scatter(x=[domain.replace('www.', '')],
y=[0], mode='text',
marker={'size': 50},
text=str(rank_counts_subset['count'].sum()))
fig.add_scatter(x=[domain.replace('www.', '')],
y=[-1], mode='text',
text=format(rank_counts_subset['count'].sum() / num_queries, '.1%'))
fig.add_scatter(x=[domain.replace('www.', '')],
y=[-2], mode='text',
marker={'size': 50},
text=str(round(rank_counts_subset['rank']
.mul(rank_counts_subset['count'])
.sum() / rank_counts_subset['count']
.sum(), 2)))
minrank, maxrank = int(min(top_df['rank'].unique())), int(max(top_df['rank'].unique()))
fig.layout.yaxis.tickvals = [-2, -1, 0] + list(range(minrank, maxrank+1))
fig.layout.yaxis.ticktext = ['Avg. Pos.', 'Coverage', 'Total<br>appearances'] + list(range(minrank, maxrank+1))
fig.layout.height = max([600, 100 + ((maxrank - minrank) * 50)])
fig.layout.yaxis.title = 'SERP Rank (number of appearances)'
fig.layout.showlegend = False
fig.layout.paper_bgcolor = '#eeeeee'
fig.layout.plot_bgcolor = '#eeeeee'
fig.layout.autosize = False
fig.layout.margin.r = 2
fig.layout.margin.l = 120
fig.layout.margin.pad = 0
fig.layout.hovermode = False
fig.layout.yaxis.autorange = 'reversed'
fig.layout.yaxis.zeroline = False
fig.layout.width = 1100
return fig
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment