Skip to content

Instantly share code, notes, and snippets.

@mutaku
Last active March 15, 2017 16:03
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mutaku/648b5971c0f981002f4f227271d84810 to your computer and use it in GitHub Desktop.
Save mutaku/648b5971c0f981002f4f227271d84810 to your computer and use it in GitHub Desktop.
from __future__ import division
import numpy as np
import pandas as pd
def standardization(x, args):
"""Zero mean and unit variance scaling"""
return (x - args['mean']) / args['std']
def rescaling(x, args):
"""Min - Max scaling [0-1]"""
return (x - args['min']) / (args['max'] - args['min'])
def feature_scaling(array, method=standardization):
"""Perform feature scaling for given
array using method.
"""
args = dict()
if method is rescaling:
args['max'] = max(array)
args['min'] = min(array)
elif method is standardization:
args['mean'] = np.mean(array)
args['std'] = np.std(array)
# We can get rid of asarray if solely being fed from
# pandas apply.
return np.asarray(map(lambda x: method(x, args), array))
# Example:
# Build a matrix of the PVs such that each row is a parameter
# and each column is a parameter vector.
# Then apply the feature_scaling function to each row of the
# pandas matrix.
pv_df = pd.concat(map(pd.Series, parameter_vectors), axis=1)
pv_feature_scaled = pv_df.apply(feature_scaling, axis=1, args=(standardization,))
# Here is a generated data implementation of what was just described:
In [110]: l_as_df
Out[110]:
0 1 2
0 0.00 22.2000 52.000
1 1.96 23.9316 53.508
2 4.00 26.6400 57.200
3 11.88 57.1428 118.404
In [111]: pv_feature_scaled = l_as_df.apply(feature_scaling, axis=1, args=(standardization,))
In [112]: pv_feature_scaled
Out[112]:
0 1 2
0 -1.160952 -0.118912 1.279863
1 -1.160315 -0.120022 1.280337
2 -1.159692 -0.121107 1.280799
3 -1.159083 -0.122168 1.281250
In [113]: l_as_df.apply(feature_scaling, axis=1, args=(rescaling,))
Out[113]:
0 1 2
0 0.0 0.426923 1.0
1 0.0 0.426236 1.0
2 0.0 0.425564 1.0
3 0.0 0.424907 1.0
In [20]: m = np.zeros((5, 5))
In [21]: for x in combinations(range(6), 2):
...: m[(x[0], x[1] - 1)] = 3
...:
In [22]: m
Out[22]:
array([[ 3., 3., 3., 3., 3.],
[ 0., 3., 3., 3., 3.],
[ 0., 0., 3., 3., 3.],
[ 0., 0., 0., 3., 3.],
[ 0., 0., 0., 0., 3.]])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment