muokicaleb/missing_val

## missing_val
'''
Make the assumption that weight value is missing in the df but
 Looking at the columns we'll notice that there is Item_Identifier. Similar items have the same identifier.
 We can have a dictionary of the weigts and identifier. Then look up the missing values for each.
'''
# first create a pivot table

item_avg_weight = df.pivot_table(values='Item_Weight', index='Item_Identifier')

'''
when you want an average of the field using
outlet_size_mode = data.pivot_table(values='Outlet_Size',
                                   columns='Outlet_Type',
                                   aggfunc=lambda x: x.mode().iat[0])
'''

# convert into a dict

dic = item_avg_weight.to_dict()

# it's a nested dict thus get inner dict.

dic2 = dic['Item_Weight']

# then fill the null values.

data['Item_Weight'] = data['Item_Weight'].fillna(data['Item_Identifier'].map(dic2))
	'''
	Make the assumption that weight value is missing in the df but
	Looking at the columns we'll notice that there is Item_Identifier. Similar items have the same identifier.
	We can have a dictionary of the weigts and identifier. Then look up the missing values for each.
	'''
	# first create a pivot table

	item_avg_weight = df.pivot_table(values='Item_Weight', index='Item_Identifier')

	'''
	when you want an average of the field using
	outlet_size_mode = data.pivot_table(values='Outlet_Size',
	columns='Outlet_Type',
	aggfunc=lambda x: x.mode().iat[0])
	'''

	# convert into a dict

	dic = item_avg_weight.to_dict()

	# it's a nested dict thus get inner dict.

	dic2 = dic['Item_Weight']

	# then fill the null values.

	data['Item_Weight'] = data['Item_Weight'].fillna(data['Item_Identifier'].map(dic2))