-
-
Save derickr/4642394 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
error_reporting(E_ALL); | |
define('NUM_FEATURES', 3); | |
// My dataset describes cities around the world where I might consider living. | |
// Each sample (city) consists of 3 features: | |
// * Feature 1: average low winter temperature in the city | |
// * Feature 2: city population, in millions | |
// * Feature 3: does the city have an airport I can fly to from USA directly? | |
// | |
// The labels (categories) are 1 (yes) and 0 (no). | |
// All the data is floating-point. | |
$training = array( | |
array(-11., 2.6, 1.), | |
array( 8., 0.78, 1.), | |
array( 15., 4.2, 0.), | |
array(-16., 0.18, 0.), | |
array( 3., 1.1, 0.), | |
array( 7., 1.4, 1.), | |
array( -3., 1.44, 1.), | |
array( -7., 0.52, 0.), | |
array( 30., 0.82, 1.), | |
array( 20., 1.32, 0.), | |
); | |
$labels = array( | |
0., | |
1., | |
0., | |
0., | |
1., | |
1., | |
1., | |
0., | |
0., | |
1 | |
); | |
$NUM_SAMPLES = sizeof($training); | |
// Initialize the weights array to random starting values. | |
// There are always 1+NUM_FEATURES weights, because the first weight | |
// does not correspond to a feature value, since: | |
// weights * features = weight0 + weight1 * feature1 + weight2 * feature2 + ... | |
$weights = array(); | |
for ($j=0; $j < NUM_FEATURES+1; $j++) | |
$weights[$j] = mt_rand()/mt_getrandmax()*5.0; | |
$learning_rate = 0.05; | |
$steps = 20000; // number of steps to take for gradient descent | |
$temp = array(); // temp array to hold updates for weights during the loop | |
for ($n = 0; $n < $steps; $n++) { | |
// For each weight, perform the gradient descent step and save the result to temp | |
for ($j = 0; $j < NUM_FEATURES+1; $j++) { | |
$sum_m = 0.0; | |
for ($i = 0; $i < $NUM_SAMPLES; $i++) { | |
$h = hypothesis($training[$i], $weights); | |
// The first weight has a dummy 1 "feature" value | |
$part = ($h - $labels[$i]) * ($j==0 ? 1.0 : $training[$i][$j-1]); | |
$sum_m = $sum_m + $part; | |
} | |
$temp[$j] = $weights[$j] - $learning_rate * $sum_m/$NUM_SAMPLES; | |
} | |
$weights = $temp; | |
} | |
echo "Executed $n steps\n"; | |
echo "Weights: ", vector_to_str($weights), "\n"; | |
// Validate the results | |
print "\nValidating training\n"; | |
$correct = 0; | |
for ($i = 0; $i < $NUM_SAMPLES; $i++) { | |
$predict = predict($training[$i], $weights); | |
printf("Input: %s, actual: %d, predict: %d\n", vector_to_str($training[$i]), $labels[$i], $predict); | |
if ($predict == $labels[$i]) | |
$correct++; | |
} | |
printf("Correctness = %.0f%%\n", $correct/$NUM_SAMPLES*100.0); | |
// Try some predictions | |
print "\nTesting the model\n"; | |
$test = array( | |
array(-1., 1.1, 1.), | |
array(23., 0.9, 0.), | |
array( 4., 1.9, 0.), | |
array(-14., 1.1, 1.), | |
); | |
for ($i = 0; $i < sizeof($test); $i++) { | |
$predict = predict($test[$i], $weights); | |
printf("Input: %s, predict: %d\n", vector_to_str($test[$i]), $predict); | |
} | |
function hypothesis($x, $weights) | |
{ | |
$score = $weights[0]; // free weight | |
$k = sizeof($x); | |
// Calculate dot product | |
for ($i = 0; $i < $k; $i++) | |
$score += $weights[$i+1] * $x[$i]; | |
// Run through the sigmoid (logistic) function | |
return 1.0/(1.0 + exp(-$score)); | |
} | |
function predict($input, $weights) | |
{ | |
$output = hypothesis($input, $weights); | |
// Threshold on 0.5 | |
if ($output >= 0.50) | |
$predict = 1; | |
else | |
$predict = 0; | |
return $predict; | |
} | |
function vector_to_str($x) | |
{ | |
return '['.implode(", ", $x).']'; | |
} | |
?> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment