Skip to content

Instantly share code, notes, and snippets.

hamletbatista

View GitHub Profile
View plot_macys_tommy_wordcloud.py
from collections import Counter
import re
import nltk
from nltk.corpus import stopwords
from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator
import matplotlib.pyplot as plt
nltk.download('stopwords')
View macys_tommy_better_rankings_traffic.py
macys_ranks_better.groupby("Keyword").agg({"Traffic_x": np.sum})["Traffic_x"].sum() # Output -> 75026
tommy_ranks_better.groupby("Keyword").agg({"Traffic_x": np.sum})["Traffic_x"].sum() # Output -> 66415
View tommy_better_rankings.py
#if Tommy's keyword position is lower, say 1 vs 6. It is ranking higher/better
tommy_ranks_better = merged_df.query("Position_x > Position_y")#.groupby("Keyword")
len(set(tommy_ranks_better["Keyword"])) # we have 3173 better rankings
View macys_better_rankings.py
#if Macys' keyword position is lower, say 1 vs 6. It is ranking higher/better
macys_ranks_better = merged_df.query("Position_x < Position_y")
len(set(macys_ranks_better["Keyword"])) # we have 1075 better rankings
View tommy_macys_datablend.py
# combine keyword sets by keywords in common
merged_df = pd.merge(macys_df, tommy_df, how="inner", on="Keyword")
merged_df.groupby("Keyword").count().info() # we have 4086 unique entries
View macys_tomy_venn_diagram.py
#See https://jingwen-z.github.io/data-viz-with-matplotlib-series6-venn-diagram/
import matplotlib.pyplot as plt
from matplotlib_venn import venn2
#calculating percentages
total = grp1.union(grp2)
print(len(total)) # prints -> 4725
both = grp1 & grp2 # set intersection
print(len(both)) #prints -> 4086
View macys_tommy_overlap.py
macys_set = set(macys_df["Keyword"]) #this eliminates duplicates
print(len(macys_set)) # prints -> 4210
tommy_set = set(tommy_df["Keyword"])
print(len(tommy_set)) # prints -> 4601
in_macys_only = macys_set - tommy_set #in macys but not in tommy
View pull_tommy_keywords.py
database="us"
tommy="usa.tommy.com"
brand="Tommy Hilfiger"
tommy_df = get_seo_branded_data(brand, tommy, export_columns="Ph,Po,Tg") # only keyword, position and traffic
#we explicitly convert numbers to integers to be able to perform arithmetic operations later
convert_dict = {'Keyword': str, 'Position': int, 'Traffic': int}
tommy_df = tommy_df.astype(convert_dict)
View pull_macys_keywords.py
database="us"
macys="macys.com"
brand="Tommy Hilfiger"
macys_df = get_seo_branded_data(brand, macys, export_columns="Ph,Po,Tg") # only keyword, position and traffic
#we explicitly convert numbers to integers to be able to perform arithmetic operations later
convert_dict = {'Keyword': str, 'Position': int, 'Traffic': int}
macys_df = macys_df.astype(convert_dict)
@hamletbatista
hamletbatista / get_seo_branded_data.py
Created Apr 20, 2019
Get branded SEO data using the Semrush API
View get_seo_branded_data.py
import requests
from urllib.parse import urlencode, urlparse, urlunparse, quote
import pandas as pd
def get_seo_branded_data(brand, domain, database="us", export_columns="Ph,Po,Nq,Ur,Tg,Td,Ts", display_limit=10000, display_filter="+|Ph|Co|{brand}"):
global key
url_params={"type": "domain_organic",
"key": key,
You can’t perform that action at this time.