Skip to content

Instantly share code, notes, and snippets.

@TooTouch
Created December 10, 2020 06:36
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save TooTouch/cb2ff423981e664ebbf3878cc3b1c535 to your computer and use it in GitHub Desktop.
Save TooTouch/cb2ff423981e664ebbf3878cc3b1c535 to your computer and use it in GitHub Desktop.
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
sns.set_style("whitegrid")
import matplotlib as mpl
plt.rcParams["font.family"] = 'NanumGothicCoding'
mpl.rcParams['axes.unicode_minus'] = False
# function
def cat_os_by_cluster_plot(cluster: int, data, save=False):
categories = data['category'].unique()
labels = data['os'].unique().tolist()
data_k = data[data['k']==cluster]
f, ax = plt.subplots(1,5, figsize=(20,10))
for i, cat in enumerate(categories):
sizes = data_k[data_k['category'] == cat]['os_rate']
explode = (sizes == sizes.max()).astype(int) / 10
ax[i].pie(sizes, explode=explode, labels=labels, autopct='%.1f%%',
shadow=True, startangle=90, textprops={'fontsize': 16})
ax[i].set(ylabel='', aspect='equal')
if i==2:
ax[i].set_title(f'소비트렌드 변화 상위 5개 OS 사용비율\n\n{cat.capitalize()}', size=20)
else:
ax[i].set_title(f'\n\n{cat.capitalize()}', size=20)
plt.tight_layout()
if save:
plt.savefig('../images/sample/os_example.jpg', dpi=300)
plt.show()
np.random.seed(223) # it's my birthday
# sample features
categories = ['TOP1 업종','TOP2 업종','TOP3 업종','TOP4 업종','TOP5 업종']
sex_age = [f'{sex}_{age}' for sex in ['F','M'] for age in range(10,80,10)]
os_cat = ['WINDOW','IOS','안드로이드']
cluster_df = pd.DataFrame({'sex_age':sex_age, 'k':np.random.randint(low=0, high=5, size=len(sex_age))})
# generate sample data
sample_df = pd.DataFrame()
for sex_age_i in sex_age:
for cat in categories:
sample_i_df = pd.DataFrame({'os':os_cat, 'os_cnt':np.random.randint(low=1, high=100, size=3)})
sample_i_df['sex_age'] = sex_age_i
sample_i_df['category'] = cat
sample_df = pd.concat([sample_df, sample_i_df], axis=0)
sample_df = pd.merge(sample_df, cluster_df, on='sex_age', how='left')
sample_df = sample_df.groupby(['category','os','k'])['os_cnt'].sum().reset_index()
# calculate os rate
temp_df = pd.DataFrame()
for cat in categories:
for k in range(5):
sample_i_df = sample_df[(sample_df['category']==cat)&(sample_df['k']==k)]
sample_i_df['os_rate'] = sample_i_df['os_cnt'] / sample_i_df['os_cnt'].sum()
temp_df = pd.concat([temp_df, sample_i_df], axis=0)
sample_df = temp_df.copy()
del temp_df
sample_df = sample_df[['category','os','os_rate','k']]
# pie chart
cat_os_by_cluster_plot(cluster=0, data=sample_df, save=False)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment