tkazusa/TargetEncoding_with_confidence.py

## TargetEncoding_with_confidence.py
for cols in ATTRIBUTION_CATEGORIES:
  　# Aggregation function
    def rate_calculation(x):
        """Calculate the attributed rate. Scale by confidence"""
        rate = x.sum() / float(x.count())
        conf = np.min([1, np.log(x.count()) / log_group])
        return rate * conf

    # Perform the merge
    X_train = X_train.merge(
        group_object['is_attributed']. \
            apply(rate_calculation). \
            reset_index(). \
            rename(
                index=str,
                columns={'is_attributed': new_feature}
            )[cols + [new_feature]],
        on=cols, how='left'
    )
	for cols in ATTRIBUTION_CATEGORIES:
	# Aggregation function
	def rate_calculation(x):
	"""Calculate the attributed rate. Scale by confidence"""
	rate = x.sum() / float(x.count())
	conf = np.min([1, np.log(x.count()) / log_group])
	return rate * conf

	# Perform the merge
	X_train = X_train.merge(
	group_object['is_attributed']. \
	apply(rate_calculation). \
	reset_index(). \
	rename(
	index=str,
	columns={'is_attributed': new_feature}
	)[cols + [new_feature]],
	on=cols, how='left'
	)