zerebom/00_readme.md

## 00_readme.md

      
    Raw
  

              00_readme.md
            
          
    [WIP]

  
## 01_EDA_init_import.py
import warnings
from itertools import cycle
warnings.filterwarnings("ignore")
pd.set_option('max_columns', 50)
pd.set_option('max_rows', 50)
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

plt.style.use('bmh')
color_pal = plt.rcParams['axes.prop_cycle'].by_key()['color']
color_cycle = cycle(plt.rcParams['axes.prop_cycle'].by_key()['color'])

## 02_feature_encoding.py

def feature_encoding(train, test, category_col, target_col, func_list):
    '''target_encodingを重要な列（面積など）でやる。TEと違って、test_dfに含まれる値も集計して作る'''
    data=pd.concat([train,test],axis=0).reset_index()

    agg_func = {target_col: func_list}
    #agg_funcでgruopby
    agg_df = data.groupby(category_col)[target_col].agg(agg_func)
    #列名作成
    agg_df.columns = [category_col + '_' + '_'.join(col).strip() for col in agg_df.columns.values]
    #元の列に集約結果をmapしその値をコピーし新規列に加え返す。
    for col in agg_df.columns.values:
        train[col] = train[category_col].map(agg_df[col]).copy()
        test[col] = test[category_col].map(agg_df[col]).copy()
    return train, test

def target_encoding(train, test, category_col, target_col, func_list):
    '''target_encodingをやる。func_listに辞書型で列と処理する関数(meanとか)を渡す'''

    agg_func = {target_col: func_list}
    #agg_funcでgruopby
    agg_df = train.groupby(category_col)[target_col].agg(agg_func)
    #列名作成
    agg_df.columns = [category_col + '_' + '_'.join(col).strip() for col in agg_df.columns.values]
    #元の列に集約結果をmapしその値をコピーし新規列に加え返す。
    for col in agg_df.columns.values:
        train[col] = train[category_col].map(agg_df[col]).copy()
        test[col] = test[category_col].map(agg_df[col]).copy()
    return train, test

## 03_groupby_map.py
def groupby_map(df, new_col, by_col, target_col, agg):
    agg_df = df.groupby(by_col).agg(agg)[target_col]
    df[new_col]=df[by_col].map(agg_df)
	import warnings
	from itertools import cycle
	warnings.filterwarnings("ignore")
	pd.set_option('max_columns', 50)
	pd.set_option('max_rows', 50)
	from IPython.core.interactiveshell import InteractiveShell
	InteractiveShell.ast_node_interactivity = "all"

	plt.style.use('bmh')
	color_pal = plt.rcParams['axes.prop_cycle'].by_key()['color']
	color_cycle = cycle(plt.rcParams['axes.prop_cycle'].by_key()['color'])

	def feature_encoding(train, test, category_col, target_col, func_list):
	'''target_encodingを重要な列（面積など）でやる。TEと違って、test_dfに含まれる値も集計して作る'''
	data=pd.concat([train,test],axis=0).reset_index()

	agg_func = {target_col: func_list}
	#agg_funcでgruopby
	agg_df = data.groupby(category_col)[target_col].agg(agg_func)
	#列名作成
	agg_df.columns = [category_col + '_' + '_'.join(col).strip() for col in agg_df.columns.values]
	#元の列に集約結果をmapしその値をコピーし新規列に加え返す。
	for col in agg_df.columns.values:
	train[col] = train[category_col].map(agg_df[col]).copy()
	test[col] = test[category_col].map(agg_df[col]).copy()
	return train, test

	def target_encoding(train, test, category_col, target_col, func_list):
	'''target_encodingをやる。func_listに辞書型で列と処理する関数(meanとか)を渡す'''

	agg_func = {target_col: func_list}
	#agg_funcでgruopby
	agg_df = train.groupby(category_col)[target_col].agg(agg_func)
	#列名作成
	agg_df.columns = [category_col + '_' + '_'.join(col).strip() for col in agg_df.columns.values]
	#元の列に集約結果をmapしその値をコピーし新規列に加え返す。
	for col in agg_df.columns.values:
	train[col] = train[category_col].map(agg_df[col]).copy()
	test[col] = test[category_col].map(agg_df[col]).copy()
	return train, test
	def groupby_map(df, new_col, by_col, target_col, agg):
	agg_df = df.groupby(by_col).agg(agg)[target_col]
	df[new_col]=df[by_col].map(agg_df)