Skip to content

Instantly share code, notes, and snippets.

@danyashorokh
Last active October 16, 2017 15:13
Show Gist options
  • Save danyashorokh/bc246a208d425c8f70c3ce0253d868e6 to your computer and use it in GitHub Desktop.
Save danyashorokh/bc246a208d425c8f70c3ce0253d868e6 to your computer and use it in GitHub Desktop.
[Python] Features correlation
import pandas as pd
pd.set_option('expand_frame_repr', False)
def calc_corr(A, B):
return "{:0.2f}".format(A.corr(B))
df = pd.read_excel('input.xlsx')
print(df.head())
caths = list(df.columns[1:])
# print(caths)
print(len(caths))
df[caths] = df[caths].applymap(lambda x: int(x.split('.')[0]))
print(df.head())
rlst = []
for i in range(0, len(caths)):
clst = []
for j in range(0, len(caths)):
if i != j:
clst.append(calc_corr(df[caths[i]], df[caths[j]]))
else: clst.append(0)
print(i, j, caths[i], caths[j])
rlst.append(clst)
corrs = pd.DataFrame(rlst, columns=caths)
corrs.index = caths
corrs[caths] = corrs[caths].astype(float)
# print(corrs)
writer = pd.ExcelWriter("corrs.xlsx", engine="xlsxwriter")
corrs.to_excel(writer, sheet_name='Sheet1')
workbook = writer.book
worksheet = writer.sheets['Sheet1']
worksheet.conditional_format(0,0, len(caths) + 1, len(caths) + 1, {'type': '3_color_scale'})
writer.save()
writer.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment