xaptronic/classifier.php

## classifier.php
<?php

error_reporting(E_ALL);

define('NUM_FEATURES', 3);

// My dataset describes cities around the world where I might consider living.
// Each sample (city) consists of 3 features:
//  * Feature 1: average low winter temperature in the city
//  * Feature 2: city population, in millions
//  * Feature 3: does the city have an airport I can fly to from USA directly?
//
// The labels (categories) are 1 (yes) and 0 (no).
// All the data is floating-point.

$training = array(
    array(-11., 2.6,  1.),
    array(  8., 0.78, 1.),
    array( 15., 4.2,  0.),
    array(-16., 0.18, 0.),
    array(  3., 1.1,  0.),
    array(  7., 1.4,  1.),
    array( -3., 1.44, 1.),
    array( -7., 0.52, 0.),
    array( 30., 0.82, 1.),
    array( 20., 1.32, 0.),
);

$labels = array(
    0.,
    1.,
    0.,
    0.,
    1.,
    1.,
    1.,
    0.,
    0.,
    1
);

$NUM_SAMPLES = sizeof($training);

// Initialize the weights array to random starting values.
// There are always 1+NUM_FEATURES weights, because the first weight
// does not correspond to a feature value, since:
//   weights * features = weight0 + weight1 * feature1 + weight2 * feature2 + ...
$weights = array();
for ($j=0; $j < NUM_FEATURES+1; $j++)
    $weights[$j] = mt_rand()/mt_getrandmax()*5.0;

$learning_rate = 0.05;
$steps = 20000; // number of steps to take for gradient descent

$temp = array(); // temp array to hold updates for weights during the loop
for ($n = 0; $n < $steps; $n++) {

    // For each weight, perform the gradient descent step and save the result to temp
    for ($j = 0; $j < NUM_FEATURES+1; $j++) {
        $sum_m = 0.0;
        for ($i = 0; $i < $NUM_SAMPLES; $i++) {
            $h = hypothesis($training[$i], $weights);
            // The first weight has a dummy 1 "feature" value
            $part = ($h - $labels[$i]) * ($j==0 ? 1.0 : $training[$i][$j-1]);
            $sum_m = $sum_m + $part;
        }
        $temp[$j] = $weights[$j] - $learning_rate * $sum_m/$NUM_SAMPLES;
    }

    $weights = $temp;
}

echo "Executed $n steps\n";
echo "Weights: ", vector_to_str($weights), "\n";

// Validate the results
print "\nValidating training\n";
$correct = 0;
for ($i = 0; $i < $NUM_SAMPLES; $i++) {
    $predict = predict($training[$i], $weights);
    printf("Input: %-16s actual: %d, predict: %d", vector_to_str($training[$i]), $labels[$i], $predict);
    if ($labels[$i] != $predict)
        print " - miss";
    print "\n";
    if ($predict == $labels[$i])
        $correct++;
}
printf("Correctness = %.0f%%\n", $correct/$NUM_SAMPLES*100.0);

// Try some predictions
print "\nTesting the model\n";
$test = array(
    array(-1., 1.1, 1.),
    array(23., 0.9, 0.),
    array( 4., 1.9, 0.),
    array(-14., 1.1, 1.),
);
for ($i = 0; $i < sizeof($test); $i++) {
    $predict = predict($test[$i], $weights);
    printf("Input: %-16s predict: %d\n", vector_to_str($test[$i]), $predict);
}

function hypothesis($x, $weights)
{
    $score = $weights[0]; // free weight
    $k = sizeof($x);
    // Calculate dot product
    for ($i = 0; $i < $k; $i++)
        $score += $weights[$i+1] * $x[$i];
    // Run through the sigmoid (logistic) function
    return 1.0/(1.0 + exp(-$score));
}

function predict($input, $weights)
{
    $output = hypothesis($input, $weights);
    // Threshold on 0.5
    if ($output >= 0.50)
        $predict = 1;
    else
        $predict = 0;
    return $predict;
}

function vector_to_str($x)
{
    return '['.implode(", ", $x).']';
}

?>

## classifier.pl
#!/usr/bin/perl

use strict;
use warnings;

use constant NUM_FEATURES => 3;
use constant STEPS => 20000;
#use constant STEPS => 4;
use constant LEARNING_RATE => 0.05;
use Data::Dumper;

# prototypes
sub hypothesis($$);
sub predict($$);

# Average low winter temperature, city population in millions, does the city have an airport I can fly to from the usa
my $training = [
    [   -11.,   2.6,    1.     ],
    [   8.,     0.78,   1.     ],
    [   15.,    4.2,    0.     ],
    [   -16.,   0.18,   0.     ],
    [   3.,     1.1,    0.     ],
    [   7.,     1.4,    1.     ],
    [   -3.,    1.44,   1.     ],
    [   -7.,    0.52,   0.     ],
    [   30.,    0.82,   1.     ],
    [   20.,    1.32,   0.     ],
];

# the answer to the above data
my $labels = [
    0., 1., 0., 0., 1., 1., 1., 0., 0., 1.,
];

# ten samples
my $NUM_SAMPLES = @$training;

# generate one random weight per feature
my $weights = [];
until (@$weights >= NUM_FEATURES+1) {
    my $rand = rand;
    push @$weights, $rand * 6;
}

my $new_weights = [];

# iterate through gradient descent
my $n;
for ($n = 0; $n < STEPS; $n++) {

    # go through each feature (temp, population, and then whether it has the airport
    # compute a partial derivative and sum it up to the total slope and then perform gradient descent
    for (my $j = 0; $j < NUM_FEATURES + 1; $j++) {
        my $sum_m = 0.0; # slope

        for (my $i = 0; $i < $NUM_SAMPLES; $i++) {
            # compute logistic regression based on these
            my $h = hypothesis($training->[$i], $weights);

            # slope in which i believe is the cost function where it compares it to reality (ie the label)
            $sum_m += ($h - $labels->[$i]) * ($j == 0 ? 1.0 : $training->[$i]->[$j-1]);
        }

        $new_weights->[$j] = $weights->[$j] - LEARNING_RATE * $sum_m / $NUM_SAMPLES;
    }

    @$weights = @$new_weights;
}

print "Executed $n steps\n";
print "Weights: ", join(" / ", @$weights). "\n";

print "\nValidating training\n";
my $correct = 0;

for (my $i = 0; $i < $NUM_SAMPLES; $i++) {
    my $prediction = predict($training->[$i], $weights);
    printf("Input %-16s actual: %d, predict %d", join(",", @{$training->[$i]}), $labels->[$i], $prediction);
    if ($labels->[$i] != $prediction) {
        print " - miss";
    } else {
        $correct++;
    }
    print "\n";
}

sub hypothesis($$) {
    my $x = shift;
    my $weights = shift;
    my $score = $weights->[0]; # free weight (free or bias weight? wtf is that)

    # compute dot product
    for (my $i = 0; $i < @$x; $i++) {
        $score += $weights->[$i+1] * $x->[$i];
    }

    my $sigmoid = 1.0 / (1.0 + exp(-$score));

    # logistic regression sigmoid function
    return $sigmoid;
}

sub predict($$) {
    my $x = shift;
    my $weights = shift;
    my $result = hypothesis($x, $weights);

    $result >= 0.50 ? 1 : 0;
}
	<?php

	error_reporting(E_ALL);

	define('NUM_FEATURES', 3);

	// My dataset describes cities around the world where I might consider living.
	// Each sample (city) consists of 3 features:
	// * Feature 1: average low winter temperature in the city
	// * Feature 2: city population, in millions
	// * Feature 3: does the city have an airport I can fly to from USA directly?
	//
	// The labels (categories) are 1 (yes) and 0 (no).
	// All the data is floating-point.

	$training = array(
	array(-11., 2.6, 1.),
	array( 8., 0.78, 1.),
	array( 15., 4.2, 0.),
	array(-16., 0.18, 0.),
	array( 3., 1.1, 0.),
	array( 7., 1.4, 1.),
	array( -3., 1.44, 1.),
	array( -7., 0.52, 0.),
	array( 30., 0.82, 1.),
	array( 20., 1.32, 0.),
	);

	$labels = array(
	0.,
	1.,
	0.,
	0.,
	1.,
	1.,
	1.,
	0.,
	0.,
	1
	);

	$NUM_SAMPLES = sizeof($training);

	// Initialize the weights array to random starting values.
	// There are always 1+NUM_FEATURES weights, because the first weight
	// does not correspond to a feature value, since:
	// weights * features = weight0 + weight1 * feature1 + weight2 * feature2 + ...
	$weights = array();
	for ($j=0; $j < NUM_FEATURES+1; $j++)
	$weights[$j] = mt_rand()/mt_getrandmax()*5.0;

	$learning_rate = 0.05;
	$steps = 20000; // number of steps to take for gradient descent

	$temp = array(); // temp array to hold updates for weights during the loop
	for ($n = 0; $n < $steps; $n++) {

	// For each weight, perform the gradient descent step and save the result to temp
	for ($j = 0; $j < NUM_FEATURES+1; $j++) {
	$sum_m = 0.0;
	for ($i = 0; $i < $NUM_SAMPLES; $i++) {
	$h = hypothesis($training[$i], $weights);
	// The first weight has a dummy 1 "feature" value
	$part = ($h - $labels[$i]) * ($j==0 ? 1.0 : $training[$i][$j-1]);
	$sum_m = $sum_m + $part;
	}
	$temp[$j] = $weights[$j] - $learning_rate * $sum_m/$NUM_SAMPLES;
	}

	$weights = $temp;
	}

	echo "Executed $n steps\n";
	echo "Weights: ", vector_to_str($weights), "\n";

	// Validate the results
	print "\nValidating training\n";
	$correct = 0;
	for ($i = 0; $i < $NUM_SAMPLES; $i++) {
	$predict = predict($training[$i], $weights);
	printf("Input: %-16s actual: %d, predict: %d", vector_to_str($training[$i]), $labels[$i], $predict);
	if ($labels[$i] != $predict)
	print " - miss";
	print "\n";
	if ($predict == $labels[$i])
	$correct++;
	}
	printf("Correctness = %.0f%%\n", $correct/$NUM_SAMPLES*100.0);

	// Try some predictions
	print "\nTesting the model\n";
	$test = array(
	array(-1., 1.1, 1.),
	array(23., 0.9, 0.),
	array( 4., 1.9, 0.),
	array(-14., 1.1, 1.),
	);
	for ($i = 0; $i < sizeof($test); $i++) {
	$predict = predict($test[$i], $weights);
	printf("Input: %-16s predict: %d\n", vector_to_str($test[$i]), $predict);
	}

	function hypothesis($x, $weights)
	{
	$score = $weights[0]; // free weight
	$k = sizeof($x);
	// Calculate dot product
	for ($i = 0; $i < $k; $i++)
	$score += $weights[$i+1] * $x[$i];
	// Run through the sigmoid (logistic) function
	return 1.0/(1.0 + exp(-$score));
	}

	function predict($input, $weights)
	{
	$output = hypothesis($input, $weights);
	// Threshold on 0.5
	if ($output >= 0.50)
	$predict = 1;
	else
	$predict = 0;
	return $predict;
	}

	function vector_to_str($x)
	{
	return '['.implode(", ", $x).']';
	}

	?>
	#!/usr/bin/perl

	use strict;
	use warnings;

	use constant NUM_FEATURES => 3;
	use constant STEPS => 20000;
	#use constant STEPS => 4;
	use constant LEARNING_RATE => 0.05;
	use Data::Dumper;

	# prototypes
	sub hypothesis($$);
	sub predict($$);

	# Average low winter temperature, city population in millions, does the city have an airport I can fly to from the usa
	my $training = [
	[ -11., 2.6, 1. ],
	[ 8., 0.78, 1. ],
	[ 15., 4.2, 0. ],
	[ -16., 0.18, 0. ],
	[ 3., 1.1, 0. ],
	[ 7., 1.4, 1. ],
	[ -3., 1.44, 1. ],
	[ -7., 0.52, 0. ],
	[ 30., 0.82, 1. ],
	[ 20., 1.32, 0. ],
	];

	# the answer to the above data
	my $labels = [
	0., 1., 0., 0., 1., 1., 1., 0., 0., 1.,
	];

	# ten samples
	my $NUM_SAMPLES = @$training;

	# generate one random weight per feature
	my $weights = [];
	until (@$weights >= NUM_FEATURES+1) {
	my $rand = rand;
	push @$weights, $rand * 6;
	}

	my $new_weights = [];

	# iterate through gradient descent
	my $n;
	for ($n = 0; $n < STEPS; $n++) {

	# go through each feature (temp, population, and then whether it has the airport
	# compute a partial derivative and sum it up to the total slope and then perform gradient descent
	for (my $j = 0; $j < NUM_FEATURES + 1; $j++) {
	my $sum_m = 0.0; # slope

	for (my $i = 0; $i < $NUM_SAMPLES; $i++) {
	# compute logistic regression based on these
	my $h = hypothesis($training->[$i], $weights);

	# slope in which i believe is the cost function where it compares it to reality (ie the label)
	$sum_m += ($h - $labels->[$i]) * ($j == 0 ? 1.0 : $training->[$i]->[$j-1]);
	}

	$new_weights->[$j] = $weights->[$j] - LEARNING_RATE * $sum_m / $NUM_SAMPLES;
	}

	@$weights = @$new_weights;
	}

	print "Executed $n steps\n";
	print "Weights: ", join(" / ", @$weights). "\n";

	print "\nValidating training\n";
	my $correct = 0;

	for (my $i = 0; $i < $NUM_SAMPLES; $i++) {
	my $prediction = predict($training->[$i], $weights);
	printf("Input %-16s actual: %d, predict %d", join(",", @{$training->[$i]}), $labels->[$i], $prediction);
	if ($labels->[$i] != $prediction) {
	print " - miss";
	} else {
	$correct++;
	}
	print "\n";
	}

	sub hypothesis($$) {
	my $x = shift;
	my $weights = shift;
	my $score = $weights->[0]; # free weight (free or bias weight? wtf is that)

	# compute dot product
	for (my $i = 0; $i < @$x; $i++) {
	$score += $weights->[$i+1] * $x->[$i];
	}

	my $sigmoid = 1.0 / (1.0 + exp(-$score));

	# logistic regression sigmoid function
	return $sigmoid;
	}

	sub predict($$) {
	my $x = shift;
	my $weights = shift;
	my $result = hypothesis($x, $weights);

	$result >= 0.50 ? 1 : 0;
	}