mutaku/feature_scaling.py

## feature_scaling.py
from __future__ import division
import numpy as np
import pandas as pd


def standardization(x, args):
  """Zero mean and unit variance scaling"""
  return (x - args['mean']) / args['std']

def rescaling(x, args):
  """Min - Max scaling [0-1]"""
  return (x - args['min']) / (args['max'] - args['min'])

def feature_scaling(array, method=standardization):
  """Perform feature scaling for given
     array using method.
  """
  args = dict()
  if method is rescaling:
    args['max'] = max(array)
    args['min'] = min(array)
  elif method is standardization:
    args['mean'] = np.mean(array)
    args['std'] = np.std(array)

  # We can get rid of asarray if solely being fed from
  # pandas apply.
  return np.asarray(map(lambda x: method(x, args), array))

# Example:
# Build a matrix of the PVs such that each row is a parameter
# and each column is a parameter vector.
# Then apply the feature_scaling function to each row of the
# pandas matrix.
pv_df = pd.concat(map(pd.Series, parameter_vectors), axis=1)
pv_feature_scaled = pv_df.apply(feature_scaling, axis=1, args=(standardization,))

# Here is a generated data implementation of what was just described:
In [110]: l_as_df
Out[110]:
       0        1        2
0   0.00  22.2000   52.000
1   1.96  23.9316   53.508
2   4.00  26.6400   57.200
3  11.88  57.1428  118.404

In [111]: pv_feature_scaled = l_as_df.apply(feature_scaling, axis=1, args=(standardization,))

In [112]: pv_feature_scaled
Out[112]:
          0         1         2
0 -1.160952 -0.118912  1.279863
1 -1.160315 -0.120022  1.280337
2 -1.159692 -0.121107  1.280799
3 -1.159083 -0.122168  1.281250

In [113]: l_as_df.apply(feature_scaling, axis=1, args=(rescaling,))
Out[113]:
     0         1    2
0  0.0  0.426923  1.0
1  0.0  0.426236  1.0
2  0.0  0.425564  1.0
3  0.0  0.424907  1.0

## triangular_populate.py
In [20]: m = np.zeros((5, 5))

In [21]: for x in combinations(range(6), 2):
    ...:     m[(x[0], x[1] - 1)] = 3
    ...:

In [22]: m
Out[22]:
array([[ 3.,  3.,  3.,  3.,  3.],
       [ 0.,  3.,  3.,  3.,  3.],
       [ 0.,  0.,  3.,  3.,  3.],
       [ 0.,  0.,  0.,  3.,  3.],
       [ 0.,  0.,  0.,  0.,  3.]])
	from __future__ import division
	import numpy as np
	import pandas as pd


	def standardization(x, args):
	"""Zero mean and unit variance scaling"""
	return (x - args['mean']) / args['std']

	def rescaling(x, args):
	"""Min - Max scaling [0-1]"""
	return (x - args['min']) / (args['max'] - args['min'])

	def feature_scaling(array, method=standardization):
	"""Perform feature scaling for given
	array using method.
	"""
	args = dict()
	if method is rescaling:
	args['max'] = max(array)
	args['min'] = min(array)
	elif method is standardization:
	args['mean'] = np.mean(array)
	args['std'] = np.std(array)

	# We can get rid of asarray if solely being fed from
	# pandas apply.
	return np.asarray(map(lambda x: method(x, args), array))

	# Example:
	# Build a matrix of the PVs such that each row is a parameter
	# and each column is a parameter vector.
	# Then apply the feature_scaling function to each row of the
	# pandas matrix.
	pv_df = pd.concat(map(pd.Series, parameter_vectors), axis=1)
	pv_feature_scaled = pv_df.apply(feature_scaling, axis=1, args=(standardization,))

	# Here is a generated data implementation of what was just described:
	In [110]: l_as_df
	Out[110]:
	0 1 2
	0 0.00 22.2000 52.000
	1 1.96 23.9316 53.508
	2 4.00 26.6400 57.200
	3 11.88 57.1428 118.404

	In [111]: pv_feature_scaled = l_as_df.apply(feature_scaling, axis=1, args=(standardization,))

	In [112]: pv_feature_scaled
	Out[112]:
	0 1 2
	0 -1.160952 -0.118912 1.279863
	1 -1.160315 -0.120022 1.280337
	2 -1.159692 -0.121107 1.280799
	3 -1.159083 -0.122168 1.281250

	In [113]: l_as_df.apply(feature_scaling, axis=1, args=(rescaling,))
	Out[113]:
	0 1 2
	0 0.0 0.426923 1.0
	1 0.0 0.426236 1.0
	2 0.0 0.425564 1.0
	3 0.0 0.424907 1.0
	In [20]: m = np.zeros((5, 5))

	In [21]: for x in combinations(range(6), 2):
	...: m[(x[0], x[1] - 1)] = 3
	...:

	In [22]: m
	Out[22]:
	array([[ 3., 3., 3., 3., 3.],
	[ 0., 3., 3., 3., 3.],
	[ 0., 0., 3., 3., 3.],
	[ 0., 0., 0., 3., 3.],
	[ 0., 0., 0., 0., 3.]])