Skip to content

Instantly share code, notes, and snippets.

@thistleknot
Last active January 30, 2022 20:18
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save thistleknot/13e8630ed9a50359c08b301c53f38ec6 to your computer and use it in GitHub Desktop.
Save thistleknot/13e8630ed9a50359c08b301c53f38ec6 to your computer and use it in GitHub Desktop.
Laferriere Transform
#Laferriere Transform
print("scatterplot of transformed values w Z scores")
def laferriere_transform (data, method):
dataFrame = pd.DataFrame()
if(method=='mean'):
df_scaled = pd.DataFrame(StandardScaler().fit_transform(data),columns=data.columns).set_index(data.index)
for c in data.columns:
lower_index_ = df_scaled[[c]].sort_values(kind="quicksort", by=c,ascending=False)[df_scaled[[c]].sort_values(kind="quicksort", by=c,ascending=False)<0].cumsum().dropna()
lower_index_ = lower_index_/lower_index_.min()
lower_index_ = abs(((lower_index_-.5)-.5))/2
upper_index_ = df_scaled[[c]].sort_values(kind="quicksort", by=c,ascending=True)[df_scaled[[c]].sort_values(kind="quicksort", by=c,ascending=True)>=0].cumsum().dropna()
upper_index_ = upper_index_/upper_index_.max()/2+.5
together_index = pd.concat([lower_index_,upper_index_],axis=0)
together_index.columns = [c]
together_index = together_index.sort_values(kind="quicksort", by=c,ascending=True)
dataFrame = pd.concat([dataFrame,together_index],axis=1)
plt.scatter(pd.concat([together_index,df_scaled[[c]]],axis=1).iloc[:,0],pd.concat([together_index,df_scaled[[c]]],axis=1).iloc[:,1])
elif(method=='median'):
df_scaled = pd.DataFrame((data-np.median(data,axis=0))/stats.median_abs_deviation(data),columns=data.columns).set_index(data.index)
for c in data.columns:
lower_index_ = df_scaled[[c]].sort_values(kind="quicksort", by=c,ascending=False)[df_scaled[[c]].sort_values(kind="quicksort", by=c,ascending=False)<0].cumsum().dropna()
lower_index_ = lower_index_/lower_index_.min()
lower_index_ = abs(((lower_index_-.5)-.5))/2
upper_index_ = df_scaled[[c]].sort_values(kind="quicksort", by=c,ascending=True)[df_scaled[[c]].sort_values(kind="quicksort", by=c,ascending=True)>=0].cumsum().dropna()
upper_index_ = upper_index_/upper_index_.max()/2+.5
together_index = pd.concat([lower_index_,upper_index_],axis=0)
together_index.columns = [c]
together_index = together_index.sort_values(kind="quicksort", by=c,ascending=True)
dataFrame = pd.concat([dataFrame,together_index],axis=1)
plt.scatter(pd.concat([together_index,df_scaled[[c]]],axis=1).iloc[:,0],pd.concat([together_index,df_scaled[[c]]],axis=1).iloc[:,1])
#plt.show()
plt.show()
return(dataFrame)
@thistleknot
Copy link
Author

image

@thistleknot
Copy link
Author

median
image

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment