Skip to content

Instantly share code, notes, and snippets.

@bdelespierre
Created December 17, 2014 14:58
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save bdelespierre/65bd2ff451d5f6ec4554 to your computer and use it in GitHub Desktop.
Save bdelespierre/65bd2ff451d5f6ec4554 to your computer and use it in GitHub Desktop.
Keywords hamming distance
<?php
$campaigns = [
'foo' => ['keywords' => ['a','b','c']],
'bar' => ['keywords' => ['b','c','e']],
'baz' => ['keywords' => ['d','f','b']],
];
$allKeywords = array_unique(call_user_func_array('array_merge', array_map(function($c) { return $c['keywords']; }, $campaigns)));
asort($allKeywords);
$map = array_flip($allKeywords);
// keywords bitfield calculation
foreach ($campaigns as & $campaign) {
$bits = 0;
foreach($campaign['keywords'] as $k)
$bits |= 1 << $map[$k];
$campaign['bits'] = $bits;
}
$user = [
'keywords' => ['c','d','e'],
'bits' => 0,
];
foreach($user['keywords'] as $k) {
if (!isset($map[$k]))
continue;
$user['bits'] |= 1 << $map[$k];
}
// calculate distance for each campaign
foreach($campaigns as $name => $campaign) {
$distance = number_of_non_zero($campaign['bits'] ^ $user['bits']); // '^' is not 'xor' !!
echo "Name: $name, distance: $distance\n";
}
function number_of_non_zero($num)
{
if ($num == 0)
return 0;
$res = 0;
do {
$num &= $num -1;
$res += 1;
} while($num);
return $res;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment