EugeneLoy/features.m

## features.m

clear ; close all; clc

% helpers used later ---

function [result] = measureError(X, y, theta)
  result = linearRegCostFunction(X, y, theta, 0);
endfunction


% try different d and return errrors on cv set
function [error_val] = inferD(X_to_map, y_training, y_cv, ds, mapper)
  error_val = zeros(columns(ds), 1);
  for i = 1:columns(ds)
    d = ds(i);
    X = mapper(X_to_map, d);
    X_cv = [ones(200, 1), X(1:200, :)];
    X_test = [ones(200, 1), X(201:400, :)];
    X_training = [ones(600, 1), X(401:end, :)];

    theta = trainLinearReg(X_training, y_training, 0); % lambda = 0: good idea?
    error_val(i) = measureError(X_cv, y_cv, theta);
  endfor

endfunction

% try different lambdas and return errrors on training and cv sets
function [error_train, error_val] = inferLambda(X_training, y_training, X_cv, y_cv, lambdas)

  error_train = zeros(rows(lambdas), 12);
  error_val = zeros(rows(lambdas), 12);

  for l = 1:rows(lambdas)
    lambda = lambdas(l);
    for sample = 1:12
      i = 1 + (sample - 1) * 50;
      X_training_i = X_training(1:i, :);
      y_i = y_training(1:i, :);
      theta = trainLinearReg(X_training_i, y_i, lambda);
      error_train(l, sample) = linearRegCostFunction(X_training_i, y_i, theta, 0);
      error_val(l, sample) = linearRegCostFunction(X_cv, y_cv, theta, 0);
    endfor

  endfor

endfunction


function plotLearningCurve(error_train, error_val)
  plot([1, 51:50:551], error_train, [1, 51:50:551], error_val);
  text([1, 51:50:551], error_val, num2str(error_val'), "rotation", 90);
  legend('Train', 'Cross Validation');
endfunction


% Poly map features. For features [x1, x2] and d=2 this will produce:
% [x1, x2, x1*x1, x1*x2, x2*x1, x2*x2]
function result = mapPoly(features, d)

  result = features;
  prev = features;

  for _ = 2:d
    current = [];
    for i = 1:columns(features)
      current = [current, prev .* features(:, i)];
    endfor
    prev = current;
    result = [result, current];

  endfor

endfunction

% Map features with binary operations.
% For features [x1, x2], ops [op1, op2] and d=2 this will produce:
% [x1, x2, op1(x1,x1), op1(x1,x2), op1(x2,x1), op1(x2,x2), ...
%   op2(x1,x1), op2(x1,x2), op2(x2,x1), op2(x2,x2)]
function result = mapOps(features, d, ops)

  result = features;
  prev = features;

  for _ = 2:d
    current = [];
    for i = 1:columns(features)
      for o = 1:rows(ops)
        current = [current, inline(ops(o, :))(prev, features(:, i))];
      endfor
    endfor
    prev = current;
    result = [result, current];

  endfor

endfunction


% initial data ---

m = 1000; % dataset size
rand("seed", 42)
randn("seed", 314); % to rule-out uncertainty between runs
v = rand(m, 1); % voltage (V)
r = rand(m, 1); % resistance (R)

y = v ./ r; % current (I = V/R)
y = y .+ (y .* randn(1000, 1) ./ 10); % simulate measurement error

y_cv = y(1:200);
y_test = y(201:400);
y_training = y(401:end);


% linear regression on raw features ---

X_raw = [v, r];
X_raw_cv = [ones(200, 1), X_raw(1:200, :)];
X_raw_test = [ones(200, 1), X_raw(201:400, :)];
X_raw_training = [ones(600, 1), X_raw(401:end, :)];
lambda_raw = 0; % high bias case, no need for reguralization

theta_raw = trainLinearReg(X_raw_training, y_training, lambda_raw);

error_raw = measureError(X_raw_test, y_test, theta_raw);


% linear regression on polynomial features  ---

d_poly = 8; % inferred
X_poly = mapPoly([v, r], d_poly);
X_poly_cv = [ones(200, 1), X_poly(1:200, :)];
X_poly_test = [ones(200, 1), X_poly(201:400, :)];
X_poly_training = [ones(600, 1), X_poly(401:end, :)];
lambda_poly = 0; % inferred

% this is the code used to infer d and lambda:

%error_val = inferD([v, r], y_training, y_cv, 1:9, ...
%  @(X_to_map, d) mapPoly(X_to_map, d));
%bar(1:9, error_val); break;

%lambdas = [0, (ones(1, 30) .* 10) .^ (-9:20)]';
%[error_train, error_val] = inferLambda(X_poly_training, y_training, X_poly_cv, y_cv, lambdas);
%plotLearningCurve(error_train(1, :), error_val(1, :)); break;;


theta_poly = trainLinearReg(X_poly_training, y_training, lambda_poly);

error_poly = measureError(X_poly_test, y_test, theta_poly);

% linear regression on ops-mapped features  ---

d_ops = 2; % inferred
X_ops = mapOps([v, r], d_ops, ["a .* b"; "a ./ b"; "a .^ b"]);
X_ops_cv = [ones(200, 1), X_ops(1:200, :)];
X_ops_test = [ones(200, 1), X_ops(201:400, :)];
X_ops_training = [ones(600, 1), X_ops(401:end, :)];
lambda_ops = 0; % inferred

% this is the code used to infer d and lambda:

%error_val = inferD([v, r], y_training, y_cv, 1:5, ...
%  @(X_to_map, d) mapOps(X_to_map, d, ["a .* b"; "a ./ b"; "a .^ b"]));
%bar(1:5, error_val); break;

%lambdas = [0, (ones(1, 30) .* 10) .^ (-9:20)]';
%[error_train, error_val] = inferLambda(X_ops_training, y_training, X_ops_cv, y_cv, lambdas);
%plotLearningCurve(error_train(1, :), error_val(1, :)); break;;


theta_ops = trainLinearReg(X_ops_training, y_training, lambda_ops);

error_ops = measureError(X_ops_test, y_test, theta_ops);

%   17.69321 raw
%   10.13354 poly (1.7460 times smaller than raw)
%    0.26357 ops (38.447 times smaller than poly)
bar([error_raw; error_poly; error_ops])


## features2.m

clear ; close all; clc

% helpers used later ---

function [result] = measureError(X, y, theta)
  result = linearRegCostFunction(X, y, theta, 0);
endfunction


% try different d and return errrors on cv set
function [error_val] = inferD(X_to_map, y_training, y_cv, ds, mapper)
  error_val = zeros(columns(ds), 1);
  for i = 1:columns(ds)
    d = ds(i);
    X = mapper(X_to_map, d);
    X_cv = [ones(200, 1), X(1:200, :)];
    X_test = [ones(200, 1), X(201:400, :)];
    X_training = [ones(600, 1), X(401:end, :)];

    theta = trainLinearReg(X_training, y_training, 0); % lambda = 0: good idea?
    error_val(i) = measureError(X_cv, y_cv, theta);
  endfor

endfunction

% try different lambdas and return errrors on training and cv sets
function [error_train, error_val] = inferLambda(X_training, y_training, X_cv, y_cv, lambdas)

  error_train = zeros(rows(lambdas), 12);
  error_val = zeros(rows(lambdas), 12);

  for l = 1:rows(lambdas)
    lambda = lambdas(l);
    for sample = 1:12
      i = 1 + (sample - 1) * 50;
      X_training_i = X_training(1:i, :);
      y_i = y_training(1:i, :);
      theta = trainLinearReg(X_training_i, y_i, lambda);
      error_train(l, sample) = linearRegCostFunction(X_training_i, y_i, theta, 0);
      error_val(l, sample) = linearRegCostFunction(X_cv, y_cv, theta, 0);
    endfor

  endfor

endfunction


function plotLearningCurve(error_train, error_val)
  plot([1, 51:50:551], error_train, [1, 51:50:551], error_val);
  text([1, 51:50:551], error_val, num2str(error_val'), "rotation", 90);
  legend('Train', 'Cross Validation');
endfunction


% Poly map features. For features [x1, x2] and d=2 this will produce:
% [x1, x2, x1*x1, x1*x2, x2*x1, x2*x2]
function result = mapPoly(features, d)

  result = features;
  prev = features;

  for _ = 2:d
    current = [];
    for i = 1:columns(features)
      current = [current, prev .* features(:, i)];
    endfor
    prev = current;
    result = [result, current];

  endfor

endfunction

% Map features with binary operations.
% For features [x1, x2], ops [op1, op2] and d=2 this will produce:
% [x1, x2, op1(x1,x1), op1(x1,x2), op1(x2,x1), op1(x2,x2), ...
%   op2(x1,x1), op2(x1,x2), op2(x2,x1), op2(x2,x2)]
function result = mapOps(features, d, ops)

  result = features;
  prev = features;

  for _ = 2:d
    current = [];
    for i = 1:columns(features)
      for o = 1:rows(ops)
        current = [current, inline(ops(o, :))(prev, features(:, i))];
      endfor
    endfor
    prev = current;
    result = [result, current];

  endfor

endfunction


% initial data ---

m = 1000; % dataset size
rand("seed", 42);
randn("seed", 314); % to rule-out uncertainty between runs
m1 = rand(m, 1); % first mass (m1)
m2 = rand(m, 1); % second mass (m2)
r = rand(m, 1); % distance (r)

y = 6.674e-11 .* ((m1 .* m2) ./ (r .^ 2)); % force (F = G * ((m1 * m2) / r^2)))
y = y .+ (y .* randn(1000, 1) ./ 10); % simulate measurement error

y_cv = y(1:200);
y_test = y(201:400);
y_training = y(401:end);


% linear regression on raw features ---

X_raw = [m1, m2, r];
X_raw_cv = [ones(200, 1), X_raw(1:200, :)];
X_raw_test = [ones(200, 1), X_raw(201:400, :)];
X_raw_training = [ones(600, 1), X_raw(401:end, :)];
lambda_raw = 0; % high bias case, no need for reguralization

theta_raw = trainLinearReg(X_raw_training, y_training, lambda_raw);

error_raw = measureError(X_raw_test, y_test, theta_raw);


% linear regression on polynomial features  ---

d_poly = 5; % inferred
X_poly = mapPoly([m1, m2, r], d_poly);
X_poly_cv = [ones(200, 1), X_poly(1:200, :)];
X_poly_test = [ones(200, 1), X_poly(201:400, :)];
X_poly_training = [ones(600, 1), X_poly(401:end, :)];
lambda_poly = 0; % inferred

% this is the code used to infer d and lambda:

%error_val = inferD([m1, m2, r], y_training, y_cv, 1:9, ...
%  @(X_to_map, d) mapPoly(X_to_map, d));
%bar(1:9, error_val); break;

%lambdas = [0, (ones(1, 30) .* 10) .^ (-9:20)]';
%[error_train, error_val] = inferLambda(X_poly_training, y_training, X_poly_cv, y_cv, lambdas);
%plotLearningCurve(error_train(1, :), error_val(1, :)); break;;


theta_poly = trainLinearReg(X_poly_training, y_training, lambda_poly);

error_poly = measureError(X_poly_test, y_test, theta_poly);

% linear regression on ops-mapped features  ---

d_ops = 3; % inferred
X_ops = mapOps([m1, m2, r], d_ops, ["a .* b"; "a ./ b"; "a .^ b"]);
X_ops_cv = [ones(200, 1), X_ops(1:200, :)];
X_ops_test = [ones(200, 1), X_ops(201:400, :)];
X_ops_training = [ones(600, 1), X_ops(401:end, :)];
lambda_ops = 0; % inferred

% this is the code used to infer d and lambda:

%error_val = inferD([m1, m2, r], y_training, y_cv, 1:5, ...
%  @(X_to_map, d) mapOps(X_to_map, d, ["a .* b"; "a ./ b"; "a .^ b"]));
%bar(1:5, error_val); break;

%lambdas = [0, (ones(1, 30) .* 10) .^ (-9:20)]';
%[error_train, error_val] = inferLambda(X_ops_training, y_training, X_ops_cv, y_cv, lambdas);
%plotLearningCurve(error_train(1, :), error_val(1, :)); break;;


theta_ops = trainLinearReg(X_ops_training, y_training, lambda_ops);

error_ops = measureError(X_ops_test, y_test, theta_ops);


%  4.3083e-016 raw
%  4.1347e-016 poly
%  1.6780e-017 ops (24.641 times smaller than poly)
bar([error_raw; error_poly; error_ops])

	clear ; close all; clc

	% helpers used later ---

	function [result] = measureError(X, y, theta)
	result = linearRegCostFunction(X, y, theta, 0);
	endfunction


	% try different d and return errrors on cv set
	function [error_val] = inferD(X_to_map, y_training, y_cv, ds, mapper)
	error_val = zeros(columns(ds), 1);
	for i = 1:columns(ds)
	d = ds(i);
	X = mapper(X_to_map, d);
	X_cv = [ones(200, 1), X(1:200, :)];
	X_test = [ones(200, 1), X(201:400, :)];
	X_training = [ones(600, 1), X(401:end, :)];

	theta = trainLinearReg(X_training, y_training, 0); % lambda = 0: good idea?
	error_val(i) = measureError(X_cv, y_cv, theta);
	endfor

	endfunction

	% try different lambdas and return errrors on training and cv sets
	function [error_train, error_val] = inferLambda(X_training, y_training, X_cv, y_cv, lambdas)

	error_train = zeros(rows(lambdas), 12);
	error_val = zeros(rows(lambdas), 12);

	for l = 1:rows(lambdas)
	lambda = lambdas(l);
	for sample = 1:12
	i = 1 + (sample - 1) * 50;
	X_training_i = X_training(1:i, :);
	y_i = y_training(1:i, :);
	theta = trainLinearReg(X_training_i, y_i, lambda);
	error_train(l, sample) = linearRegCostFunction(X_training_i, y_i, theta, 0);
	error_val(l, sample) = linearRegCostFunction(X_cv, y_cv, theta, 0);
	endfor

	endfor

	endfunction


	function plotLearningCurve(error_train, error_val)
	plot([1, 51:50:551], error_train, [1, 51:50:551], error_val);
	text([1, 51:50:551], error_val, num2str(error_val'), "rotation", 90);
	legend('Train', 'Cross Validation');
	endfunction


	% Poly map features. For features [x1, x2] and d=2 this will produce:
	% [x1, x2, x1x1, x1x2, x2x1, x2x2]
	function result = mapPoly(features, d)

	result = features;
	prev = features;

	for _ = 2:d
	current = [];
	for i = 1:columns(features)
	current = [current, prev .* features(:, i)];
	endfor
	prev = current;
	result = [result, current];

	endfor

	endfunction

	% Map features with binary operations.
	% For features [x1, x2], ops [op1, op2] and d=2 this will produce:
	% [x1, x2, op1(x1,x1), op1(x1,x2), op1(x2,x1), op1(x2,x2), ...
	% op2(x1,x1), op2(x1,x2), op2(x2,x1), op2(x2,x2)]
	function result = mapOps(features, d, ops)

	result = features;
	prev = features;

	for _ = 2:d
	current = [];
	for i = 1:columns(features)
	for o = 1:rows(ops)
	current = [current, inline(ops(o, :))(prev, features(:, i))];
	endfor
	endfor
	prev = current;
	result = [result, current];

	endfor

	endfunction


	% initial data ---

	m = 1000; % dataset size
	rand("seed", 42)
	randn("seed", 314); % to rule-out uncertainty between runs
	v = rand(m, 1); % voltage (V)
	r = rand(m, 1); % resistance (R)

	y = v ./ r; % current (I = V/R)
	y = y .+ (y .* randn(1000, 1) ./ 10); % simulate measurement error

	y_cv = y(1:200);
	y_test = y(201:400);
	y_training = y(401:end);


	% linear regression on raw features ---

	X_raw = [v, r];
	X_raw_cv = [ones(200, 1), X_raw(1:200, :)];
	X_raw_test = [ones(200, 1), X_raw(201:400, :)];
	X_raw_training = [ones(600, 1), X_raw(401:end, :)];
	lambda_raw = 0; % high bias case, no need for reguralization

	theta_raw = trainLinearReg(X_raw_training, y_training, lambda_raw);

	error_raw = measureError(X_raw_test, y_test, theta_raw);


	% linear regression on polynomial features ---

	d_poly = 8; % inferred
	X_poly = mapPoly([v, r], d_poly);
	X_poly_cv = [ones(200, 1), X_poly(1:200, :)];
	X_poly_test = [ones(200, 1), X_poly(201:400, :)];
	X_poly_training = [ones(600, 1), X_poly(401:end, :)];
	lambda_poly = 0; % inferred

	% this is the code used to infer d and lambda:

	%error_val = inferD([v, r], y_training, y_cv, 1:9, ...
	% @(X_to_map, d) mapPoly(X_to_map, d));
	%bar(1:9, error_val); break;

	%lambdas = [0, (ones(1, 30) .* 10) .^ (-9:20)]';
	%[error_train, error_val] = inferLambda(X_poly_training, y_training, X_poly_cv, y_cv, lambdas);
	%plotLearningCurve(error_train(1, :), error_val(1, :)); break;;


	theta_poly = trainLinearReg(X_poly_training, y_training, lambda_poly);

	error_poly = measureError(X_poly_test, y_test, theta_poly);

	% linear regression on ops-mapped features ---

	d_ops = 2; % inferred
	X_ops = mapOps([v, r], d_ops, ["a .* b"; "a ./ b"; "a .^ b"]);
	X_ops_cv = [ones(200, 1), X_ops(1:200, :)];
	X_ops_test = [ones(200, 1), X_ops(201:400, :)];
	X_ops_training = [ones(600, 1), X_ops(401:end, :)];
	lambda_ops = 0; % inferred

	% this is the code used to infer d and lambda:

	%error_val = inferD([v, r], y_training, y_cv, 1:5, ...
	% @(X_to_map, d) mapOps(X_to_map, d, ["a .* b"; "a ./ b"; "a .^ b"]));
	%bar(1:5, error_val); break;

	%lambdas = [0, (ones(1, 30) .* 10) .^ (-9:20)]';
	%[error_train, error_val] = inferLambda(X_ops_training, y_training, X_ops_cv, y_cv, lambdas);
	%plotLearningCurve(error_train(1, :), error_val(1, :)); break;;


	theta_ops = trainLinearReg(X_ops_training, y_training, lambda_ops);

	error_ops = measureError(X_ops_test, y_test, theta_ops);

	% 17.69321 raw
	% 10.13354 poly (1.7460 times smaller than raw)
	% 0.26357 ops (38.447 times smaller than poly)
	bar([error_raw; error_poly; error_ops])