smazzanti/mrmr_fcq.py

## mrmr_fcq.py
import pandas as pd
from sklearn.feature_selection import f_regression

# inputs:
#    X: pandas.DataFrame, features
#    y: pandas.Series, target variable
#    K: number of features to select

# compute F-statistics and correlations
F = pd.Series(f_regression(X, y)[0], index = X.columns)
corr = X.corr().abs().clip(.00001) # minimum value of correlation set to .00001 (to avoid division by zero)

# initialize list of selected features and list of excluded features
selected = []
not_selected = list(X.columns)

# repeat K times:
# compute FCQ score for all the features that are currently excluded,
# then find the best one, add it to selected, and remove it from not_selected
for i in range(K):

    # compute FCQ score for all the (currently) excluded features (this is Formula 2)
    score = F.loc[not_selected] / corr.loc[not_selected, selected].mean(axis = 1).fillna(.00001)

    # find best feature, add it to selected and remove it from not_selected
    best = score.index[score.argmax()]
    selected.append(best)
    not_selected.remove(best)
	import pandas as pd
	from sklearn.feature_selection import f_regression

	# inputs:
	# X: pandas.DataFrame, features
	# y: pandas.Series, target variable
	# K: number of features to select

	# compute F-statistics and correlations
	F = pd.Series(f_regression(X, y)[0], index = X.columns)
	corr = X.corr().abs().clip(.00001) # minimum value of correlation set to .00001 (to avoid division by zero)

	# initialize list of selected features and list of excluded features
	selected = []
	not_selected = list(X.columns)

	# repeat K times:
	# compute FCQ score for all the features that are currently excluded,
	# then find the best one, add it to selected, and remove it from not_selected
	for i in range(K):

	# compute FCQ score for all the (currently) excluded features (this is Formula 2)
	score = F.loc[not_selected] / corr.loc[not_selected, selected].mean(axis = 1).fillna(.00001)

	# find best feature, add it to selected and remove it from not_selected
	best = score.index[score.argmax()]
	selected.append(best)
	not_selected.remove(best)