Skip to content

Instantly share code, notes, and snippets.

@fcostin
Created February 23, 2012 14:43
Show Gist options
  • Save fcostin/1893136 to your computer and use it in GitHub Desktop.
Save fcostin/1893136 to your computer and use it in GitHub Desktop.
test_ridge_crime.py
CRIME_DATA_URL := http://archive.ics.uci.edu/ml/machine-learning-databases/communities/communities.data
CRIME_DATA := crime.data
test: $(CRIME_DATA)
time python test_ridge_crime.py $^
.PHONY: test
$(CRIME_DATA):
wget -O $@ $(CRIME_DATA_URL)
import csv
import sys
import numpy
numpy.random.seed(12345)
def parse_value(x):
try:
return float(x)
except ValueError:
return numpy.nan
def get_crime_data(lines):
reader = csv.reader(lines)
cols = numpy.asarray(list(reader)).T
cols = cols[5:, :]
return map(lambda col : map(parse_value, col), cols)
def fill_missing_values(cols):
cols = numpy.asarray(cols)
imputed_cols = []
for i, col in enumerate(cols):
mask = numpy.isfinite(col)
fill_value = numpy.median(col[mask])
imputed_col = numpy.array(col)
imputed_col[numpy.logical_not(mask)] = fill_value
imputed_cols.append(imputed_col)
return numpy.vstack(imputed_cols)
def main():
if len(sys.argv) != 2:
print 'usage: data.csv'
sys.exit(1)
print 'reading data'
with open(sys.argv[1]) as f:
cols = get_crime_data(f)
print 'filling missing values'
filled_cols = fill_missing_values(cols)
data = filled_cols.T
print 'preparing ridge regression'
from sklearn.linear_model.ridge import RidgeCV
from sklearn import cross_validation
learner = RidgeCV(
alphas = 2 ** numpy.linspace(-10, 11, 21),
fit_intercept = True,
normalize = False
)
n_examples = data.shape[0]
# splits = cross_validation.ShuffleSplit(n_examples, n_iterations = 10,
# test_fraction = 0.8, indices = True, random_state = 12345)
splits = [(range(int(n_examples * 0.2)), range(int(n_examples * 0.2), n_examples))]
for i, (train_indices, test_indices) in enumerate(splits):
print 'split %d' % i
train = data[train_indices, :]
test = data[test_indices, :]
print '\ttraining examples: %d' % train.shape[0]
print '\ttesting examples: %d' % test.shape[0]
x_train = train[:, :-1]
y_train = train[:, -1]
print '\tfitting ridge model'
fit = learner.fit(x_train, y_train)
x_test = test[:, :-1]
y_test = test[:, -1]
print '\t\tbest_alpha = %e' % fit.best_alpha
print '\tmaking predictions'
predictions = fit.predict(x_test)
mse = numpy.mean((predictions - y_test) ** 2)
print '\ttest mse = %e' % mse
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment