Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Box-Cox transforms
#power = PowerTransformer(method='box-cox')
def testNormal (x):
k2, p = stats.normaltest(x)
alpha = .001
#print("p = {:g}".format(p))
if p < alpha: # null hypothesis: x comes from a normal distribution
#print(p)
#print(alpha)
print("The null hypothesis can be rejected")
xt, _ = stats.boxcox(x)
print(_)
xt = pd.DataFrame(xt)
return _, pd.DataFrame(xt).set_index(x.index)
else:
print("The null hypothesis cannot be rejected")
return 1, pd.DataFrame(x)
def inverse_boxcox (data, lambdas):
return inv_boxcox(data, lambdas.values)
def transform_boxcox (data):
transformed = pd.DataFrame()
transformed_lambdas = pd.DataFrame()
for i in range(0,len(data.columns)):
l, inner_scale = testNormal(data.iloc[:,i])
inner_scale.set_index(data.index)
transformed_lambdas = pd.concat([transformed_lambdas,pd.DataFrame(pd.Series(l))],axis=0)
transformed = pd.concat([transformed,inner_scale],axis=1)
transformed.columns = data.columns
return transformed, transformed_lambdas
def revert_boxcox (data, lambdas):
reverted = pd.DataFrame()
for i in range(0,len(data.columns)):
if lambdas.iloc[i].values == 1 :
revert = data.iloc[:,i]
else:
revert = pd.DataFrame(inv_boxcox(data.iloc[:,i].values, lambdas.iloc[i].values))
revert.index = data.index
reverted = pd.concat([reverted,revert],axis=1)
reverted.columns = data.columns
return reverted
all_data = pd.read_csv('/mnt/distvol/states.csv')
all_data.index = all_data.iloc[:,0]
transformed, lambdas = transform_boxcox(all_data.iloc[:,1:])
transformed.columns = all_data.iloc[:,1:].columns
all_data.iloc[:,2:].hist()
transformed.hist()
transformed.to_csv("/mnt/distvol/transformed.csv")
reverted = revert_boxcox(transformed,lambdas)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment