Skip to content

Instantly share code, notes, and snippets.

@tdebatty
Last active August 29, 2015 13:56
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save tdebatty/8964778 to your computer and use it in GitHub Desktop.
Save tdebatty/8964778 to your computer and use it in GitHub Desktop.
NN-Descent - A PHP implementation of NN-Descent algorithm : "Efficient k-nearest neighbor graph construction for generic similarity measures"
<?php
/* A quick and dirty PHP implemenation of NNDescent algorithm, as proposed in the paper
* Efficient k-nearest neighbor graph construction for generic similarity measures
* http://dl.acm.org/citation.cfm?id=1963487
*
*/
/**
* The algorithm can be used with any kind of item, as long as some
* similarity metric is defined
* Rmrk : usually, a similarity (0 .. 1) can be computed from a distance using
* S = 1 / (1 + d)
**/
class Item implements MetricSpaceElement
{
public $value = 0;
public function __construct($value) {
$this->value = $value;
}
public function Similarity(MetricSpaceElement $other) {
return 1 / (1 + abs($this->value - $other->value));
}
}
$n = 1000;
// Create dataset
$V = array();
for ($i = 0; $i <$n; $i++) {
$V[] = new Item(rand());
}
//var_dump($V);
$nnd = new NNDescent();
$nnd->K = 3;
$nnd->V = $V;
$nnd->Run();
//var_dump($nnd->B);
$nnd->PrintStats();
class NNDescent
{
public $K = 3;
public $V = array();
public $B = array();
public $computed_similarities = 0;
public $iterations = 0;
public $running_time = 0;
public function Run() {
$start = time();
foreach ($this->V as $i => $v) {
$this->B[$i] = $this->sample($this->V, $this->K);
}
while (1) {
$this->iterations++;
$R = $this->reverse($this->B);
$B2 = array();
foreach ($this->V as $i => $v) {
$B2[$i] = array_merge($this->B[$i], $R[$i]);
$B2[$i] = $this->kick_doublons($B2[$i]);
}
$c = 0;
foreach ($this->V as $i => $v) {
foreach ($B2[$i] as $j => $u1) {
foreach ($B2[$j] as $k => $u2) {
if ($v !== $u2->item) {
$l = $v->Similarity($u2->item);
$this->computed_similarities++;
$c += $this->updateNN($this->B[$i], $u2->item, $l);
}
}
}
}
if ($c == 0) {
break;
}
}
$this->running_time = time() - $start;
}
public function PrintStats() {
echo "Completion took {$this->iterations} iterations\n";
echo "Computed similarities: {$this->computed_similarities} \n";
echo "Brute force would be: " . (count($this->V) * (count($this->V) - 1) / 2). "\n";
echo "Running time: {$this->running_time} sec\n";
echo "Peak memory usage: " . round(memory_get_peak_usage() / 1024 / 1024) . "MB\n";
}
/**
* Sample $n elements from array $S
**/
protected function sample($S, $n) {
$return = array();
while(count($return) < $n) {
$i = $S[rand(0, count($S)-1)];
$already_in_return = false;
foreach ($return as $OtherGraphElement) {
if ($i === $OtherGraphElement->item) {
$already_in_return = true;
break;
}
}
if (!$already_in_return) {
$return[] = new KNNGraphElement($i);
}
}
return $return;
}
/**
* Reverse NN array
* $R[v] is the list of elements (u) for which v is a neighbor
* (v is in $B[u])
* */
function reverse($B) {
$R = array();
// For each Item $v from dataset
foreach ($this->V as $i => $v) {
$R[$i] = array();
// For each other Item $v2
foreach ($this->V as $j => $v2) {
// If $v is a NN of $v2
foreach ($B[$j] as $NeighborOfV2) {
if ($v === $NeighborOfV2->item) {
$R[$i][] = new KNNGraphElement($v2);
}
}
}
}
return $R;
}
// $l is a KNNGraphElement[]
function kick_doublons($L) {
$return = array();
foreach ($L as $e) {
$already_in_return = false;
foreach ($return as $other) {
if ($other->item === $e->item) {
$already_in_return = true;
}
}
if (!$already_in_return) {
$return[] = $e;
}
}
return $return;
}
/**
* Update KNN list $K
* Try to insert item $u, which has a similarity $l
* Return 1 if $u was inserted
* 0 otherwise
* */
function updateNN(&$K, Item $u, $l) {
// Find the element of current NN list that has the smallest similarity
// $K is an array of KNNGraphElement
$smallest_similarity_i = 0;
$smallest_similarity = 1E308;
foreach ($K as $i => $GraphElement) {
if ($GraphElement->item === $u) {
// This item is already in neighborhoods
return 0;
}
if ($GraphElement->similarity < $smallest_similarity) {
$smallest_similarity = $GraphElement->similarity;
$smallest_similarity_i = $i;
}
}
if ($smallest_similarity < $l) {
// Insert $u
$K[$smallest_similarity_i] = new KNNGraphElement($u, $l);
return 1;
}
return 0;
}
}
interface MetricSpaceElement
{
public function Similarity(MetricSpaceElement $other);
}
class KNNGraphElement
{
public $item;
public $similarity;
public function __construct(Item $item, $similarity = 0) {
$this->item = $item;
$this->similarity = $similarity;
}
}
<?php
/* A quick and dirty PHP implemenation of NNDescentFull algorithm, as proposed in the paper
* Efficient k-nearest neighbor graph construction for generic similarity measures
* http://dl.acm.org/citation.cfm?id=1963487
*
*/
class Item implements MetricSpaceElement
{
public $value = 0;
public function __construct($value) {
$this->value = $value;
}
public function Similarity(MetricSpaceElement $other) {
return 1 / (1 + abs($this->value - $other->value));
}
}
$n = 800;
// Create dataset
$V = array();
for ($i = 0; $i <$n; $i++) {
$V[] = new Item(rand());
}
//var_dump($V);
$nnd = new NNDescentFull();
$nnd->K = 3;
$nnd->rho = 0.5;
$nnd->delta = 0.001;
$nnd->V = $V;
$nnd->Run();
//var_dump($nnd->B);
$nnd->PrintStats();
class NNDescentFull
{
public $V;
public $K = 3;
public $rho = 0.5;
public $delta = 0.001;
/**
* Contains KNNGraphElements
* */
public $B;
public $iterations = 0;
public $computed_similarities = 0;
public $running_time = 0;
public function __construct() {
$this->B = new SplObjectStorage();
}
public function Run() {
$start = time();
foreach ($this->V as $v) {
$this->B[$v] = $this->sampleGraphElems($this->V, $this->K);
}
$old = new SplObjectStorage();
$new = new SplObjectStorage();
while (true) {
$this->iterations++;
foreach ($this->V as $v) {
$old[$v] = $this->pick_falses($this->B[$v]);
$new[$v] = $this->pick_trues_and_mark_false($this->B[$v], $this->rho);
}
$old2 = $this->reverse($old);
$new2 = $this->reverse($new);
$c = 0;
foreach ($this->V as $v) {
$old[$v] = $this->union($old[$v], $this->sample($old2[$v], $this->rho * $this->K));
$new[$v] = $this->union($new[$v], $this->sample($new2[$v], $this->rho * $this->K));
foreach ($new[$v] as $u1) {
// Process only u2 if u1 < u2
$processing = false;
foreach ($new[$v] as $u2) {
if ($u2 === $u1) {
$processing = true;
continue;
}
if ($processing) {
$l = $u1->Similarity($u2);
$this->computed_similarities++;
$this->B[$u1] = $this->updateNN($this->B[$u1], $u2, $l, $c);
$this->B[$u2] = $this->updateNN($this->B[$u2], $u1, $l, $c);
}
}
foreach ($old[$v] as $u2) {
if ($u2 === $u1) {
continue;
}
$l = $u1->Similarity($u2);
$this->computed_similarities++;
$this->B[$u1] = $this->updateNN($this->B[$u1], $u2, $l, $c);
$this->B[$u2] = $this->updateNN($this->B[$u2], $u1, $l, $c);
}
}
}
if ($c < $this->delta * count($this->V) * $this->K) {
break;
}
}
$this->running_time = time() - $start;
}
public function PrintStats() {
echo "Completion took {$this->iterations} iterations\n";
echo "Computed similarities: {$this->computed_similarities} \n";
echo "Brute force would be: " . (count($this->V) * (count($this->V) - 1) / 2). "\n";
echo "Running time: {$this->running_time} sec\n";
}
/**
* $a is an array of KNNGraphElement
* return array of items
* */
protected function pick_falses($A) {
$return = array();
foreach ($A as $element) {
if (!$element->new) {
$return[] = $element->item;
}
}
return $return;
}
/**
* pick new items (marked as true) with a probability of rho
* mark them as false
* $A is an array of KNNGraphElement
* return array of items
* */
protected function pick_trues_and_mark_false($A, $rho) {
$return = array();
foreach ($A as $element) {
if ($element->new
AND (rand() / getrandmax() <= $rho) ) {
$element->new = false;
$return[] = $element->item;
}
}
return $return;
}
/**
* $S is an array
*
* */
protected function sample($S, $n) {
if (count($S) <= $n) {
return $S;
}
$return = array();
// should use array_rand() !!!
while(count($return) < $n) {
$i = $S[rand(0, count($S)-1)];
$already_in_return = false;
foreach ($return as $OtherGraphElement) {
if ($i === $OtherGraphElement) {
$already_in_return = true;
break;
}
}
if (!$already_in_return) {
$return[] = $i;
}
}
return $return;
}
protected function sampleGraphElems($S, $n) {
$return = array();
// should use array_rand() !!!
while(count($return) < $n) {
$i = $S[rand(0, count($S)-1)];
$already_in_return = false;
foreach ($return as $OtherGraphElement) {
if ($i === $OtherGraphElement->item) {
$already_in_return = true;
break;
}
}
if (!$already_in_return) {
$return[] = new KNNGraphElement($i);
}
}
return $return;
}
/**
* Reverse NN array
* $R[v] is the list of elements (u) for which v is a neighbor
* (v is in $B[u])
* */
protected function reverse($B) {
$R = new SplObjectStorage();
// For each Item $v from dataset
foreach ($this->V as $v) {
$r = array();
// For each other Item $v2
foreach ($this->V as $v2) {
// If $v is a NN of $v2
foreach ($B[$v2] as $NeighborOfV2) {
if ($v === $NeighborOfV2) {
$r[] = $v2;
}
}
}
$R[$v] = $r;
}
return $R;
}
protected function union($a, $b) {
foreach ($b as $new) {
foreach($a as $other) {
if ($new === $other) {
continue;
}
$a[] = $new;
}
}
return $a;
}
/**
* Update KNN list $K
* $K is an array of NNGraphElement
* Try to insert item $u, which has a similarity $l
* Returns the new array of NNGraphElement
* If a modification has occured, $c is incremented by one
* */
protected function updateNN($K, Item $u, $l, &$c) {
// Find the element of current NN list that has the smallest similarity
// $K is an array of KNNGraphElement
$smallest_similarity_i = 0;
$smallest_similarity = 1E308;
foreach ($K as $i => $GraphElement) {
if ($GraphElement->item === $u) {
// This item is already in neighborhoods
return $K;
}
if ($GraphElement->similarity < $smallest_similarity) {
$smallest_similarity = $GraphElement->similarity;
$smallest_similarity_i = $i;
}
}
if ($smallest_similarity < $l) {
// Insert $u
$K[$smallest_similarity_i] = new KNNGraphElement($u, $l);
$c++;
return $K;
}
return $K;
}
}
interface MetricSpaceElement
{
public function Similarity(MetricSpaceElement $other);
}
class KNNGraphElement
{
public $item;
public $similarity;
public $new = true;
public function __construct(Item $item, $similarity = 0) {
$this->item = $item;
$this->similarity = $similarity;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment