czxttkl/l1vsl2.py

## l1vsl2.py
import sklearn as sklearn
import numpy as np
from sklearn.cross_validation import train_test_split
from sklearn import linear_model

# Create synthetic data
num_samples = 500
num_unrelated_features = 1000

def f(x1, x2):
    return x1*3 + x2*5;

def binarize(y, thres):
    if y > thres:
        return 1
    else:
        return 0

unrelated_features = np.random.rand(num_samples, num_unrelated_features)
fea1 = np.random.rand(num_samples, 1)
fea2 = np.random.rand(num_samples, 1)

data = np.concatenate((fea1, fea2, unrelated_features) ,axis=1)
y = f(fea1, fea2)
binarize_func = np.vectorize(binarize)
y = binarize_func(y, 4)


# Train and test
data_train, data_test, y_train, y_test = train_test_split(data, y, test_size=0.2)
logreg_l1 = linear_model.LogisticRegression(penalty='l1', C=1)
logreg_l1.fit(data_train, y_train.ravel())
l1_score = logreg_l1.score(data_test, y_test)
print("l1 score:%f" % l1_score)

logreg_l2 = linear_model.LogisticRegression(penalty='l2', C=1)
logreg_l2.fit(data_train, y_train.ravel())
l2_score = logreg_l2.score(data_test, y_test)
print("l2 score:%f" % l2_score)
	import sklearn as sklearn
	import numpy as np
	from sklearn.cross_validation import train_test_split
	from sklearn import linear_model

	# Create synthetic data
	num_samples = 500
	num_unrelated_features = 1000

	def f(x1, x2):
	return x13 + x25;

	def binarize(y, thres):
	if y > thres:
	return 1
	else:
	return 0

	unrelated_features = np.random.rand(num_samples, num_unrelated_features)
	fea1 = np.random.rand(num_samples, 1)
	fea2 = np.random.rand(num_samples, 1)

	data = np.concatenate((fea1, fea2, unrelated_features) ,axis=1)
	y = f(fea1, fea2)
	binarize_func = np.vectorize(binarize)
	y = binarize_func(y, 4)


	# Train and test
	data_train, data_test, y_train, y_test = train_test_split(data, y, test_size=0.2)
	logreg_l1 = linear_model.LogisticRegression(penalty='l1', C=1)
	logreg_l1.fit(data_train, y_train.ravel())
	l1_score = logreg_l1.score(data_test, y_test)
	print("l1 score:%f" % l1_score)

	logreg_l2 = linear_model.LogisticRegression(penalty='l2', C=1)
	logreg_l2.fit(data_train, y_train.ravel())
	l2_score = logreg_l2.score(data_test, y_test)
	print("l2 score:%f" % l2_score)