finlytics-hub/variancethreshold_string.py

## variancethreshold_string.py
# import the required library
from sklearn.feature_selection import VarianceThreshold

# define the transform.
# Default threshold parameter is 0, can be set to any float value. All features with variance below this threshold will be removed
selector = VarianceThreshold(threshold = 0)

# fit the defined transform on training dataset's numerical columns
selector.fit(X_train.select_dtypes(include = [np.number]))

# transform the training dataset (don't forget to transform the test dataset as well)
selector.transform(X_train.select_dtypes(include = [np.number]))

# Extract the non-low-variance columns from the original DF into a new DF
X_train_clean = X_train[X_train.select_dtypes(include=[np.number]).columns[selector.get_support(indices=False)]]

# Concatenate the non-numerical columns at the end of the clean DF - a small distraction if you care about the column sorting
X_train_clean = pd.concat([X_train_clean, X_train.select_dtypes(exclude = [np.number])], axis = 1)
	# import the required library
	from sklearn.feature_selection import VarianceThreshold

	# define the transform.
	# Default threshold parameter is 0, can be set to any float value. All features with variance below this threshold will be removed
	selector = VarianceThreshold(threshold = 0)

	# fit the defined transform on training dataset's numerical columns
	selector.fit(X_train.select_dtypes(include = [np.number]))

	# transform the training dataset (don't forget to transform the test dataset as well)
	selector.transform(X_train.select_dtypes(include = [np.number]))

	# Extract the non-low-variance columns from the original DF into a new DF
	X_train_clean = X_train[X_train.select_dtypes(include=[np.number]).columns[selector.get_support(indices=False)]]

	# Concatenate the non-numerical columns at the end of the clean DF - a small distraction if you care about the column sorting
	X_train_clean = pd.concat([X_train_clean, X_train.select_dtypes(exclude = [np.number])], axis = 1)