seahrh/drop_corr_features.py

## drop_corr_features.py
to_drop = list()

# Iterating over rows starting from the second one, because position [0, 0] will be self-correlation which is 1
for i in range(1, len(corr_matrix)):
    # Iterating over columns of the row. Only going under the diagonal.
    for j in range(i):
        # See if the correlation between two features are more than a selected threshold
        if corr_matrix.iloc[i, j] >= 0.98:
            # Then keep the one from thos two which correlates with target better
            if abs(pd.concat([X[corr_matrix.index[i]], y], axis=1).corr().iloc[0][1]) > abs(pd.concat([X[corr_matrix.columns[j]], y], axis=1).corr().iloc[0][1]):
                to_drop.append(corr_matrix.columns[j])
            else:
                to_drop.append(corr_matrix.index[i])

to_drop = list(set(to_drop))
	to_drop = list()

	# Iterating over rows starting from the second one, because position [0, 0] will be self-correlation which is 1
	for i in range(1, len(corr_matrix)):
	# Iterating over columns of the row. Only going under the diagonal.
	for j in range(i):
	# See if the correlation between two features are more than a selected threshold
	if corr_matrix.iloc[i, j] >= 0.98:
	# Then keep the one from thos two which correlates with target better
	if abs(pd.concat([X[corr_matrix.index[i]], y], axis=1).corr().iloc[0][1]) > abs(pd.concat([X[corr_matrix.columns[j]], y], axis=1).corr().iloc[0][1]):
	to_drop.append(corr_matrix.columns[j])
	else:
	to_drop.append(corr_matrix.index[i])

	to_drop = list(set(to_drop))