Skip to content

Instantly share code, notes, and snippets.

@nmningmei
Last active September 28, 2021 12:54
To show scipy.spatial.distance.pdist produce the same RDM when using 'correlation' regardless of normalization or not.
import numpy as np
from matplotlib import pyplot as plt
from scipy.spatial import distance
# create data the center at zero, 36 words by 100 voxels
zero_mean_data = np.random.rand(36,100)
zero_mean_data = zero_mean_data - zero_mean_data.mean(1).reshape(-1,1) # so each row has mean zero
# create data that is not center at zero, 36 words by 100 voxels
sim_data = zero_mean_data.copy()
sim_data = sim_data + np.arange(36).reshape(-1,1) # each row has a different mean
# normalize sim_data by subtracting the mean of each row
sim_data_normalized = sim_data - sim_data.mean(1).reshape(-1,1)
# RDM of both data
RDM_a = distance.pdist(zero_mean_data, "correlation")
RDM_b = distance.pdist(sim_data, "correlation")
RDM_c = distance.pdist(sim_data_normalized, "correlation")
# visualize
fig,axes = plt.subplots(nrows = 3)
for ax,RDM,color,label in zip(axes.flatten(),
[RDM_a, RDM_b,RDM_c],
['blue','red','green'],
['zero-centered, original data','not zero-centered','normalized']):
ax.hist(RDM, color = color, label = label)
ax.legend(loc = 'upper right')
# we should see all 3 histograms overlapped
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment