glemaitre/quantile_smoothing_noise.py

## quantile_smoothing_noise.py
import numpy as np

from sklearn.preprocessing import QuantileTransformer

X = np.array([0] * 1 + [0.5] * 7 + [1] * 2).reshape(-1, 1)

qt = QuantileTransformer(n_quantiles=10)
qt.fit(X)

# a behaviour which is not desired, but that frankly should
# not happen will be the following
print('0.5 is mapped to {}'.format(qt.transform(0.5)))
print('0.4999999 is mapped to {}'.format(qt.transform(0.499999)))

# the two values are mapped far from each other since 0.5
# will be mapped to the greater quantiles.
# a solution is to add a small noise while computing the
# quantiles, making the operation more stable.

qt = QuantileTransformer(n_quantiles=10, smoothing_noise=1e-7)
qt.fit(X)

# a behaviour which is not desired, but that frankly should
# not happen will be the following
print('0.5 is mapped to {}'.format(qt.transform(0.5)))
print('0.4999999 is mapped to {}'.format(qt.transform(0.499999)))

# however, this case is unlikely to happen in real-world dataset
# and that's why we chose to put the smoothing_noise parameter
# to None as default value.
	import numpy as np

	from sklearn.preprocessing import QuantileTransformer

	X = np.array([0] * 1 + [0.5] * 7 + [1] * 2).reshape(-1, 1)

	qt = QuantileTransformer(n_quantiles=10)
	qt.fit(X)

	# a behaviour which is not desired, but that frankly should
	# not happen will be the following
	print('0.5 is mapped to {}'.format(qt.transform(0.5)))
	print('0.4999999 is mapped to {}'.format(qt.transform(0.499999)))

	# the two values are mapped far from each other since 0.5
	# will be mapped to the greater quantiles.
	# a solution is to add a small noise while computing the
	# quantiles, making the operation more stable.

	qt = QuantileTransformer(n_quantiles=10, smoothing_noise=1e-7)
	qt.fit(X)

	# a behaviour which is not desired, but that frankly should
	# not happen will be the following
	print('0.5 is mapped to {}'.format(qt.transform(0.5)))
	print('0.4999999 is mapped to {}'.format(qt.transform(0.499999)))

	# however, this case is unlikely to happen in real-world dataset
	# and that's why we chose to put the smoothing_noise parameter
	# to None as default value.