Skip to content

Instantly share code, notes, and snippets.

@lalinsky
Created April 18, 2012 20:34
Show Gist options
  • Star 13 You must be signed in to star a gist
  • Fork 2 You must be signed in to fork a gist
  • Save lalinsky/2416390 to your computer and use it in GitHub Desktop.
Save lalinsky/2416390 to your computer and use it in GitHub Desktop.
Simple audio fingerprint matching in PHP
<?php
include "utils.php";
$fp = parse_fp($_GET['fp']);
$dbh = new PDO('mysql:host=localhost;dbname=fingerprint', 'fingerprint');
$dbh->beginTransaction();
$sth = $dbh->prepare("INSERT INTO fp (length) VALUES (?)");
$sth->execute(array(count($fp)));
$fp_id = $dbh->lastInsertId();
$sth = $dbh->prepare("
INSERT INTO fp_item (fp_id, position, value, query_value)
VALUES (?, ?, ?, ?)
");
$i = 0;
foreach ($fp as $value) {
$sth->execute(array($fp_id, $i, $value, strip_fp_index_bits($value)));
$i += 1;
}
$dbh->commit();
?>
-- MySQL dump 10.13 Distrib 5.1.58, for debian-linux-gnu (x86_64)
--
-- Host: localhost Database: fingerprint
-- ------------------------------------------------------
-- Server version 5.1.58-1ubuntu1
/*!40101 SET @OLD_CHARACTER_SET_CLIENT=@@CHARACTER_SET_CLIENT */;
/*!40101 SET @OLD_CHARACTER_SET_RESULTS=@@CHARACTER_SET_RESULTS */;
/*!40101 SET @OLD_COLLATION_CONNECTION=@@COLLATION_CONNECTION */;
/*!40101 SET NAMES utf8 */;
/*!40103 SET @OLD_TIME_ZONE=@@TIME_ZONE */;
/*!40103 SET TIME_ZONE='+00:00' */;
/*!40014 SET @OLD_UNIQUE_CHECKS=@@UNIQUE_CHECKS, UNIQUE_CHECKS=0 */;
/*!40014 SET @OLD_FOREIGN_KEY_CHECKS=@@FOREIGN_KEY_CHECKS, FOREIGN_KEY_CHECKS=0 */;
/*!40101 SET @OLD_SQL_MODE=@@SQL_MODE, SQL_MODE='NO_AUTO_VALUE_ON_ZERO' */;
/*!40111 SET @OLD_SQL_NOTES=@@SQL_NOTES, SQL_NOTES=0 */;
--
-- Table structure for table `fp`
--
DROP TABLE IF EXISTS `fp`;
/*!40101 SET @saved_cs_client = @@character_set_client */;
/*!40101 SET character_set_client = utf8 */;
CREATE TABLE `fp` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`length` int(11) NOT NULL,
PRIMARY KEY (`id`)
) ENGINE=InnoDB AUTO_INCREMENT=2 DEFAULT CHARSET=latin1;
/*!40101 SET character_set_client = @saved_cs_client */;
--
-- Table structure for table `fp_item`
--
DROP TABLE IF EXISTS `fp_item`;
/*!40101 SET @saved_cs_client = @@character_set_client */;
/*!40101 SET character_set_client = utf8 */;
CREATE TABLE `fp_item` (
`fp_id` int(11) NOT NULL,
`position` int(11) NOT NULL,
`value` int(11) NOT NULL,
`query_value` int(11) NOT NULL,
PRIMARY KEY (`fp_id`,`position`),
KEY `fp_item_idx_query` (`query_value`),
CONSTRAINT `fp_item_fk_fp_id` FOREIGN KEY (`fp_id`) REFERENCES `fp` (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=latin1;
/*!40101 SET character_set_client = @saved_cs_client */;
/*!40103 SET TIME_ZONE=@OLD_TIME_ZONE */;
/*!40101 SET SQL_MODE=@OLD_SQL_MODE */;
/*!40014 SET FOREIGN_KEY_CHECKS=@OLD_FOREIGN_KEY_CHECKS */;
/*!40014 SET UNIQUE_CHECKS=@OLD_UNIQUE_CHECKS */;
/*!40101 SET CHARACTER_SET_CLIENT=@OLD_CHARACTER_SET_CLIENT */;
/*!40101 SET CHARACTER_SET_RESULTS=@OLD_CHARACTER_SET_RESULTS */;
/*!40101 SET COLLATION_CONNECTION=@OLD_COLLATION_CONNECTION */;
/*!40111 SET SQL_NOTES=@OLD_SQL_NOTES */;
-- Dump completed on 2012-04-18 20:28:21
<?php
include "utils.php";
$fp = parse_fp($_GET['fp']);
$dbh = new PDO('mysql:host=localhost;dbname=fingerprint', 'fingerprint');
$query = prepare_fp_query($fp);
$results = $dbh->query("
SELECT fp_id, count(*) AS score
FROM fp_item
WHERE query_value IN (" . implode(",", $query) . ")
GROUP BY fp_id
ORDER BY count(*) DESC
");
echo "Possible candidates:<br /><ol>";
$candidate_ids = array();
foreach ($results as $result) {
$candidate_ids[] = $result["fp_id"];
echo "<li> ID " . $result["fp_id"] . " (" . $result["score"] . ")</li>";
}
echo "</ol>";
$items = $dbh->query("
SELECT fp_id, value
FROM fp_item
WHERE fp_id IN (" . implode(",", $candidate_ids) . ")
ORDER BY fp_id, position
");
$candidates = array();
foreach ($items as $item) {
$candidates[$item['fp_id']][] = $item['value'];
}
echo "Matches:<br /><ol>";
foreach ($candidate_ids as $id) {
list($score, $offset) = compare_fp($candidates[$id], $fp);
echo "<li> ID $id with score $score, starting at " . format_time($offset) . "</li>";
if ($score < 0.5) {
break;
}
}
echo "</ol>";
?>
<?php
$popcnt_table_8bit = array(
0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,
1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,
1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,
2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7,
1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,
2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7,
2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7,
3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7,4,5,5,6,5,6,6,7,5,6,6,7,6,7,7,8,
);
// count the number of set bits (1s) in the given number
function popcount($x) {
global $popcnt_table_8bit;
return
$popcnt_table_8bit[($x >> 0) & 255] +
$popcnt_table_8bit[($x >> 8) & 255] +
$popcnt_table_8bit[($x >> 16) & 255] +
$popcnt_table_8bit[($x >> 24) & 255];
}
// convert a fingerprint string into an array of numbers
function parse_fp($string) {
return explode(',', $_GET['fp']);
}
// take the lowest 20 bits of the given number
function strip_fp_index_bits($x) {
return $x & ((1 << 20) - 1);
}
function prepare_fp_query($fp) {
$query = array();
foreach ($fp as $x) {
$query[] = strip_fp_index_bits($x);
}
return $query;
}
function bit_error_rate($fp1, $fp2, $offset) {
if ($offset > 0) {
$fp1 = array_slice($fp1, $offset);
}
elseif ($offset < 0) {
$fp2 = array_slice($fp2, -$offset);
}
$max_i = min(count($fp1), count($fp2));
$errors = 0;
$count = 0;
for ($i = 0; $i < $max_i; $i++) {
$errors += popcount($fp1[$i] ^ $fp2[$i]); // count the set bits in the XOR of the two fingerprints
$count += 1;
}
return max(0.0, 1.0 - 2.0 * $errors / (32.0 * $count));
}
function invert_fp($fp) {
$map = array();
foreach ($fp as $i => $x) {
$map[$x][] = $i;
}
return $map;
}
function compare_fp($fp1, $fp2) {
// reduce the fingerprint items to 20 bits
$fp1_s = prepare_fp_query($fp1);
$fp2_s = prepare_fp_query($fp2);
// create small inverted indexes of the fingerprints
$map1 = invert_fp($fp1_s);
$map2 = invert_fp($fp2_s);
// find items that are present in both fingerprints
$common = array_intersect_key($map1, $map2);
$common = array_keys($common);
// find all matching offsets
$offsets = array();
foreach ($common as $a) {
foreach ($map1[$a] as $i) {
foreach ($map2[$a] as $j) {
$offset = $i - $j;
if (!isset($offsets[$offset])) {
$offsets[$offset] = 0;
}
$offsets[$offset] += 1;
}
}
}
arsort($offsets);
// calculate the actual similarity for the top 20% matching offsets
$matches = array();
$max_count = null;
foreach ($offsets as $offset => $count) {
if (is_null($max_count)) {
$max_count = $count;
}
if ($count < 0.8 * $max_count) {
break;
}
$matches[$offset] = bit_error_rate($fp1, $fp2, $offset);
}
arsort($matches);
foreach ($matches as $offset => $score) {
$time_offset = $offset * 0.1238; // each fingerprint item represents 0.1238 seconds
return array($score, $time_offset);
}
return array(0.0, 0);
}
function format_time($time) {
$sign = $time < 0 ? "-" : "";
$time = abs($time);
$secs = floor($time);
$msecs = ($time - $secs) * 1000;
return $sign . sprintf("%d:%02d.%d", $secs / 60, $secs % 60, $msecs);
}
?>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment