Skip to content

Instantly share code, notes, and snippets.

@zerebom
Last active December 21, 2020 03:58
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save zerebom/8879d55959ecf20f10eb1146274a121d to your computer and use it in GitHub Desktop.
Save zerebom/8879d55959ecf20f10eb1146274a121d to your computer and use it in GitHub Desktop.
pandas
import warnings
from itertools import cycle
warnings.filterwarnings("ignore")
pd.set_option('max_columns', 50)
pd.set_option('max_rows', 50)
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
plt.style.use('bmh')
color_pal = plt.rcParams['axes.prop_cycle'].by_key()['color']
color_cycle = cycle(plt.rcParams['axes.prop_cycle'].by_key()['color'])
def feature_encoding(train, test, category_col, target_col, func_list):
'''target_encodingを重要な列(面積など)でやる。TEと違って、test_dfに含まれる値も集計して作る'''
data=pd.concat([train,test],axis=0).reset_index()
agg_func = {target_col: func_list}
#agg_funcでgruopby
agg_df = data.groupby(category_col)[target_col].agg(agg_func)
#列名作成
agg_df.columns = [category_col + '_' + '_'.join(col).strip() for col in agg_df.columns.values]
#元の列に集約結果をmapしその値をコピーし新規列に加え返す。
for col in agg_df.columns.values:
train[col] = train[category_col].map(agg_df[col]).copy()
test[col] = test[category_col].map(agg_df[col]).copy()
return train, test
def target_encoding(train, test, category_col, target_col, func_list):
'''target_encodingをやる。func_listに辞書型で列と処理する関数(meanとか)を渡す'''
agg_func = {target_col: func_list}
#agg_funcでgruopby
agg_df = train.groupby(category_col)[target_col].agg(agg_func)
#列名作成
agg_df.columns = [category_col + '_' + '_'.join(col).strip() for col in agg_df.columns.values]
#元の列に集約結果をmapしその値をコピーし新規列に加え返す。
for col in agg_df.columns.values:
train[col] = train[category_col].map(agg_df[col]).copy()
test[col] = test[category_col].map(agg_df[col]).copy()
return train, test
def groupby_map(df, new_col, by_col, target_col, agg):
agg_df = df.groupby(by_col).agg(agg)[target_col]
df[new_col]=df[by_col].map(agg_df)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment