Skip to content

Instantly share code, notes, and snippets.

@this-is-richard
Last active May 9, 2019 03:46
Show Gist options
  • Save this-is-richard/ad78ec777d63e673b06dfcbda70b4ff9 to your computer and use it in GitHub Desktop.
Save this-is-richard/ad78ec777d63e673b06dfcbda70b4ff9 to your computer and use it in GitHub Desktop.
import math
corr = df.corr()
corr_abs = corr.abs()
corr_tuples = []
# get highest corr for each col
for (i, col) in enumerate(corr_abs.columns):
corr_with_col = corr_abs.iloc[:, i]\
.sort_values(ascending=False)
highest_corr = corr_with_col[1]
if not math.isnan(highest_corr):
col2 = corr_with_col.index[1]
corr_tuples.append((col, col2, float(highest_corr)))
# sort by corr
corr_tuples = sorted(corr_tuples, key=lambda x: x[2], reverse=True)
# remove duplicates
for i, tup in enumerate(corr_tuples):
try:
next_tup = corr_tuples[i + 1]
if next_tup[0] == tup[1] and next_tup[1] == tup[0]:
corr_tuples.remove(tup)
except IndexError:
break
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment