Skip to content

Instantly share code, notes, and snippets.

@hamletbatista
Created April 20, 2019 19:21
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save hamletbatista/389385cafa90f5a4a2a9739d94c55334 to your computer and use it in GitHub Desktop.
Save hamletbatista/389385cafa90f5a4a2a9739d94c55334 to your computer and use it in GitHub Desktop.
#See https://jingwen-z.github.io/data-viz-with-matplotlib-series6-venn-diagram/
import matplotlib.pyplot as plt
from matplotlib_venn import venn2
#calculating percentages
total = grp1.union(grp2)
print(len(total)) # prints -> 4725
both = grp1 & grp2 # set intersection
print(len(both)) #prints -> 4086
#difference from total
grp1_diff = len(total) - len(grp1)
both_diff = len(both)
grp2_diff = len(total) - len(grp2)
#percentages
print(grp1_diff/len(total)*100) # prints -> 10.9%
print(both_diff/len(total)*100) #prints -> 86.5%
print(grp2_diff/len(total)*100) #prints -> 2.6%
# Plotting
grp1 = set(macys_df["Keyword"])
grp2 = set(tommy_df["Keyword"])
fig = plt.figure()
fig.suptitle('Branded keywords overlap between Macys and Tommy Hilfiger')
fig.set_size_inches(18.5, 10.5)
v2 = venn2([grp1, grp2], set_labels = ('', ''))
v2.get_patch_by_id('10').set_color('yellow')
v2.get_patch_by_id('01').set_color('red')
v2.get_patch_by_id('11').set_color('orange')
v2.get_patch_by_id('10').set_edgecolor('none')
v2.get_patch_by_id('01').set_edgecolor('none')
v2.get_patch_by_id('11').set_edgecolor('none')
v2.get_label_by_id('10').set_text('Only Macys\n(2.6%)')
v2.get_label_by_id('01').set_text('Only Tommy\n(10.9%)')
v2.get_label_by_id('11').set_text('Both\n(86.5%)')
plt.show()
fig.savefig('overlap.jpg') #save image locally
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment