mutaku/intra_cluster_parameter_variation.py

## intra_cluster_parameter_variation.py
# Let's build out the first cluster (0) of first run (1)
#  Calculate sums of diffs for all parameters in vector
grouped = df.groupby(['run', 'cluster'])
cluster_compare = pd.DataFrame(columns=('cluster', 'sums', 'cv'))

for group in [grouped.get_group((8, x))
              for x in range(max(df[df.run==1].cluster))
              if len(grouped.get_group((1, x))) > 1]:
    for p in range(len(group.vector.iloc[0])):
        sums = list()
        projection = range(len(group))
        for i in projection:
            _s = 0
            for j in projection:
                _s += (group.vector.iloc[i][p] - group.vector.iloc[j][p])**2
            sums.append(_s)
        cluster_compare.loc[p] = [group.cluster.iloc[0],
                                  sums,
                                  np.std(sums)/np.mean(sums)]
print cluster_compare.describe()
	# Let's build out the first cluster (0) of first run (1)
	# Calculate sums of diffs for all parameters in vector
	grouped = df.groupby(['run', 'cluster'])
	cluster_compare = pd.DataFrame(columns=('cluster', 'sums', 'cv'))

	for group in [grouped.get_group((8, x))
	for x in range(max(df[df.run==1].cluster))
	if len(grouped.get_group((1, x))) > 1]:
	for p in range(len(group.vector.iloc[0])):
	sums = list()
	projection = range(len(group))
	for i in projection:
	_s = 0
	for j in projection:
	_s += (group.vector.iloc[i][p] - group.vector.iloc[j][p])**2
	sums.append(_s)
	cluster_compare.loc[p] = [group.cluster.iloc[0],
	sums,
	np.std(sums)/np.mean(sums)]
	print cluster_compare.describe()