Skip to content

Instantly share code, notes, and snippets.

@mutaku
Created November 8, 2016 17:38
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mutaku/619952deba8c1c64402bd61b64f87dd9 to your computer and use it in GitHub Desktop.
Save mutaku/619952deba8c1c64402bd61b64f87dd9 to your computer and use it in GitHub Desktop.
# Let's build out the first cluster (0) of first run (1)
# Calculate sums of diffs for all parameters in vector
grouped = df.groupby(['run', 'cluster'])
cluster_compare = pd.DataFrame(columns=('cluster', 'sums', 'cv'))
for group in [grouped.get_group((8, x))
for x in range(max(df[df.run==1].cluster))
if len(grouped.get_group((1, x))) > 1]:
for p in range(len(group.vector.iloc[0])):
sums = list()
projection = range(len(group))
for i in projection:
_s = 0
for j in projection:
_s += (group.vector.iloc[i][p] - group.vector.iloc[j][p])**2
sums.append(_s)
cluster_compare.loc[p] = [group.cluster.iloc[0],
sums,
np.std(sums)/np.mean(sums)]
print cluster_compare.describe()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment