-
-
Save KTG1/e91068d3cd2c8e880daf41f2e716aa00 to your computer and use it in GitHub Desktop.
Create a heatmap of SERPs, using a table with columns: "keyword", "rank", and "domain"
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import plotly.graph_objects as go | |
import pandas as pd | |
def serp_heatmap(df, num_domains=10, select_domain=None): | |
df = df.rename(columns={'domain': 'displayLink', | |
'searchTerms': 'keyword'}) | |
top_domains = df['displayLink'].value_counts()[:num_domains].index.tolist() | |
top_domains = df['displayLink'].value_counts()[:num_domains].index.tolist() | |
top_df = df[df['displayLink'].isin(top_domains) & df['displayLink'].ne('')] | |
top_df_counts_means = (top_df | |
.groupby('displayLink', as_index=False) | |
.agg({'rank': ['count', 'mean']})) | |
top_df_counts_means.columns = ['displayLink', 'rank_count', 'rank_mean'] | |
top_df = (pd.merge(top_df, top_df_counts_means) | |
.sort_values(['rank_count', 'rank_mean'], | |
ascending=[False, True])) | |
rank_counts = (top_df | |
.groupby(['displayLink', 'rank']) | |
.agg({'rank': ['count']}) | |
.reset_index()) | |
rank_counts.columns = ['displayLink', 'rank', 'count'] | |
summary = (df | |
.groupby(['displayLink'], as_index=False) | |
.agg({'rank': ['count', 'mean']}) | |
.sort_values(('rank', 'count'), ascending=False) | |
.assign(coverage=lambda df: (df[('rank', 'count')] | |
.div(df[('rank', 'count')] | |
.sum())))) | |
summary.columns = ['displayLink', 'count', 'avg_rank', 'coverage'] | |
summary['displayLink'] = summary['displayLink'].str.replace('www.', '', regex=True) | |
summary['avg_rank'] = summary['avg_rank'].round(1) | |
summary['coverage'] = (summary['coverage'].mul(100) | |
.round(1).astype(str).add('%')) | |
# num_queries might need to be set manually if you have the same query | |
# repeated across countries, languages, etc | |
num_queries = df['keyword'].nunique() | |
fig = go.Figure() | |
fig.add_scatter(x=top_df['displayLink'].str.replace('www\.', '', regex=True), | |
y=top_df['rank'], mode='markers', | |
marker={'size': 30, 'opacity': 1/rank_counts['count'].max()}) | |
fig.add_scatter(x=rank_counts['displayLink'].str.replace('www\.', '', regex=True), | |
y=rank_counts['rank'], mode='text', | |
text=rank_counts['count']) | |
for domain in rank_counts['displayLink'].unique(): | |
rank_counts_subset = rank_counts[rank_counts['displayLink'] == domain] | |
fig.add_scatter(x=[domain.replace('www.', '')], | |
y=[0], mode='text', | |
marker={'size': 50}, | |
text=str(rank_counts_subset['count'].sum())) | |
fig.add_scatter(x=[domain.replace('www.', '')], | |
y=[-1], mode='text', | |
text=format(rank_counts_subset['count'].sum() / num_queries, '.1%')) | |
fig.add_scatter(x=[domain.replace('www.', '')], | |
y=[-2], mode='text', | |
marker={'size': 50}, | |
text=str(round(rank_counts_subset['rank'] | |
.mul(rank_counts_subset['count']) | |
.sum() / rank_counts_subset['count'] | |
.sum(), 2))) | |
minrank, maxrank = int(min(top_df['rank'].unique())), int(max(top_df['rank'].unique())) | |
fig.layout.yaxis.tickvals = [-2, -1, 0] + list(range(minrank, maxrank+1)) | |
fig.layout.yaxis.ticktext = ['Avg. Pos.', 'Coverage', 'Total<br>appearances'] + list(range(minrank, maxrank+1)) | |
fig.layout.height = max([600, 100 + ((maxrank - minrank) * 50)]) | |
fig.layout.yaxis.title = 'SERP Rank (number of appearances)' | |
fig.layout.showlegend = False | |
fig.layout.paper_bgcolor = '#eeeeee' | |
fig.layout.plot_bgcolor = '#eeeeee' | |
fig.layout.autosize = False | |
fig.layout.margin.r = 2 | |
fig.layout.margin.l = 120 | |
fig.layout.margin.pad = 0 | |
fig.layout.hovermode = False | |
fig.layout.yaxis.autorange = 'reversed' | |
fig.layout.yaxis.zeroline = False | |
fig.layout.width = 1100 | |
return fig |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment