netsatsawat/hr_feature_engineering.py

## hr_feature_engineering.py
def create_generation_feature(age_val: int) -> str:
    """
    Function to convert age value onto generation string
    @Args:
      age_val (int): the age value from data frame

    Return:
      String output specifies the generation
    """
    out = ''
    if age_val < 37:
        out = 'Millenials'

    elif age_val < 54:
        out = 'Generation X'

    elif age_val < 73:
        out = 'Boomers'

    else:
        out = 'Silent'

    return out


def create_job_hop_index(df: pd.DataFrame, total_exp_col: str,
                         num_prev_com_col: str) -> (int, float):
    """
    Function to compute the job hopper index and identify the first job or not
    @Args:
      df (pd.DataFrame): pandas data frame
      total_exp_col (str): the name of the column containing total experience
      num_prev_com_col (str): the name of the column containing numbers of previous companies worked

    Return:
      The job hopper index
    """
    first_job_ind = np.where(df[num_prev_com_col] == 0, 1, 0)
    job_hop_idx = np.where(df[num_prev_com_col] == 0,
                           0.,
                           df[total_exp_col] / df[num_prev_com_col]
                          )

    return first_job_ind, job_hop_idx


def compute_compa_ratio_feature(df: pd.DataFrame, salary_col: str,
                                lookup_df: pd.DataFrame=_MEDIAN_INCOME_LOOKUP,
                                col_lookup_list: list=_LOOKUP_COL):
    """
    Function to compute the compa-ratio of each employee.
      This is computed by each employee divided by the median salary of the industry
    @Args:
      df: pandas data frame with monthly salary of each employee
      salary_col: a name of pandas data frame storing income
      lookup_df: pandas data frame stored the median of salary
      col_lookup_list: A list of column names

    Return:
      pandas data frame
    """
    merge_df = df.reset_index().merge(_MEDIAN_INCOME_LOOKUP,
                                      on=_LOOKUP_COL,
                                      how="left").set_index('index')
    merge_df['compa_ratio'] = merge_df[salary_col] / merge_df['MedianIncome']
    merge_df.drop(['MedianIncome', 'Count'], axis=1, inplace=True)
    return merge_df
	def create_generation_feature(age_val: int) -> str:
	"""
	Function to convert age value onto generation string
	@Args:
	age_val (int): the age value from data frame

	Return:
	String output specifies the generation
	"""
	out = ''
	if age_val < 37:
	out = 'Millenials'

	elif age_val < 54:
	out = 'Generation X'

	elif age_val < 73:
	out = 'Boomers'

	else:
	out = 'Silent'

	return out


	def create_job_hop_index(df: pd.DataFrame, total_exp_col: str,
	num_prev_com_col: str) -> (int, float):
	"""
	Function to compute the job hopper index and identify the first job or not
	@Args:
	df (pd.DataFrame): pandas data frame
	total_exp_col (str): the name of the column containing total experience
	num_prev_com_col (str): the name of the column containing numbers of previous companies worked

	Return:
	The job hopper index
	"""
	first_job_ind = np.where(df[num_prev_com_col] == 0, 1, 0)
	job_hop_idx = np.where(df[num_prev_com_col] == 0,
	0.,
	df[total_exp_col] / df[num_prev_com_col]
	)

	return first_job_ind, job_hop_idx


	def compute_compa_ratio_feature(df: pd.DataFrame, salary_col: str,
	lookup_df: pd.DataFrame=_MEDIAN_INCOME_LOOKUP,
	col_lookup_list: list=_LOOKUP_COL):
	"""
	Function to compute the compa-ratio of each employee.
	This is computed by each employee divided by the median salary of the industry
	@Args:
	df: pandas data frame with monthly salary of each employee
	salary_col: a name of pandas data frame storing income
	lookup_df: pandas data frame stored the median of salary
	col_lookup_list: A list of column names

	Return:
	pandas data frame
	"""
	merge_df = df.reset_index().merge(_MEDIAN_INCOME_LOOKUP,
	on=_LOOKUP_COL,
	how="left").set_index('index')
	merge_df['compa_ratio'] = merge_df[salary_col] / merge_df['MedianIncome']
	merge_df.drop(['MedianIncome', 'Count'], axis=1, inplace=True)
	return merge_df