Created
December 10, 2020 06:36
-
-
Save TooTouch/cb2ff423981e664ebbf3878cc3b1c535 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import matplotlib.pyplot as plt | |
import numpy as np | |
import seaborn as sns | |
sns.set_style("whitegrid") | |
import matplotlib as mpl | |
plt.rcParams["font.family"] = 'NanumGothicCoding' | |
mpl.rcParams['axes.unicode_minus'] = False | |
# function | |
def cat_os_by_cluster_plot(cluster: int, data, save=False): | |
categories = data['category'].unique() | |
labels = data['os'].unique().tolist() | |
data_k = data[data['k']==cluster] | |
f, ax = plt.subplots(1,5, figsize=(20,10)) | |
for i, cat in enumerate(categories): | |
sizes = data_k[data_k['category'] == cat]['os_rate'] | |
explode = (sizes == sizes.max()).astype(int) / 10 | |
ax[i].pie(sizes, explode=explode, labels=labels, autopct='%.1f%%', | |
shadow=True, startangle=90, textprops={'fontsize': 16}) | |
ax[i].set(ylabel='', aspect='equal') | |
if i==2: | |
ax[i].set_title(f'소비트렌드 변화 상위 5개 OS 사용비율\n\n{cat.capitalize()}', size=20) | |
else: | |
ax[i].set_title(f'\n\n{cat.capitalize()}', size=20) | |
plt.tight_layout() | |
if save: | |
plt.savefig('../images/sample/os_example.jpg', dpi=300) | |
plt.show() | |
np.random.seed(223) # it's my birthday | |
# sample features | |
categories = ['TOP1 업종','TOP2 업종','TOP3 업종','TOP4 업종','TOP5 업종'] | |
sex_age = [f'{sex}_{age}' for sex in ['F','M'] for age in range(10,80,10)] | |
os_cat = ['WINDOW','IOS','안드로이드'] | |
cluster_df = pd.DataFrame({'sex_age':sex_age, 'k':np.random.randint(low=0, high=5, size=len(sex_age))}) | |
# generate sample data | |
sample_df = pd.DataFrame() | |
for sex_age_i in sex_age: | |
for cat in categories: | |
sample_i_df = pd.DataFrame({'os':os_cat, 'os_cnt':np.random.randint(low=1, high=100, size=3)}) | |
sample_i_df['sex_age'] = sex_age_i | |
sample_i_df['category'] = cat | |
sample_df = pd.concat([sample_df, sample_i_df], axis=0) | |
sample_df = pd.merge(sample_df, cluster_df, on='sex_age', how='left') | |
sample_df = sample_df.groupby(['category','os','k'])['os_cnt'].sum().reset_index() | |
# calculate os rate | |
temp_df = pd.DataFrame() | |
for cat in categories: | |
for k in range(5): | |
sample_i_df = sample_df[(sample_df['category']==cat)&(sample_df['k']==k)] | |
sample_i_df['os_rate'] = sample_i_df['os_cnt'] / sample_i_df['os_cnt'].sum() | |
temp_df = pd.concat([temp_df, sample_i_df], axis=0) | |
sample_df = temp_df.copy() | |
del temp_df | |
sample_df = sample_df[['category','os','os_rate','k']] | |
# pie chart | |
cat_os_by_cluster_plot(cluster=0, data=sample_df, save=False) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment