Sharon Woo sharonwoo

## hashing_perceptron.py
# Original code from tinrtgu on Kaggle under WTFPL license
# Relicensed to BSD 3-clause (it does say do what you want...)
# Authors: Kyle Kastner
# License: BSD 3-clause

# Reference links:
# Adaptive learning: http://static.googleusercontent.com/media/research.google.com/en//pubs/archive/41159.pdf
# Criteo scalable response prediction: http://people.csail.mit.edu/romer/papers/TISTRespPredAds.pdf
# Vowpal Wabbit (hashing trick): https://github.com/JohnLangford/vowpal_wabbit/
# Hashing Trick: http://arxiv.org/pdf/0902.2206.pdf

## outlier.py
# load READY.DAT (56 cols)
from dataload import load_object
x_train, x_test, y_train, y_test = load_object('ready.dat')
x_train.max(axis=0)
del x_test, y_test      # we don't need the test set for now

# under-sample the big classes to make the set manageable
from imblearn.under_sampling import RandomUnderSampler
rus = RandomUnderSampler(ratio={'normal':50000, 'dos':50000}, random_state=4129)
x_train, y_train = rus.fit_sample(x_train, y_train.attack_type)
	# Original code from tinrtgu on Kaggle under WTFPL license
	# Relicensed to BSD 3-clause (it does say do what you want...)
	# Authors: Kyle Kastner
	# License: BSD 3-clause

	# Reference links:
	# Adaptive learning: http://static.googleusercontent.com/media/research.google.com/en//pubs/archive/41159.pdf
	# Criteo scalable response prediction: http://people.csail.mit.edu/romer/papers/TISTRespPredAds.pdf
	# Vowpal Wabbit (hashing trick): https://github.com/JohnLangford/vowpal_wabbit/
	# Hashing Trick: http://arxiv.org/pdf/0902.2206.pdf
	# load READY.DAT (56 cols)
	from dataload import load_object
	x_train, x_test, y_train, y_test = load_object('ready.dat')
	x_train.max(axis=0)
	del x_test, y_test # we don't need the test set for now

	# under-sample the big classes to make the set manageable
	from imblearn.under_sampling import RandomUnderSampler
	rus = RandomUnderSampler(ratio={'normal':50000, 'dos':50000}, random_state=4129)
	x_train, y_train = rus.fit_sample(x_train, y_train.attack_type)