Skip to content

Instantly share code, notes, and snippets.

@bdw
Created January 7, 2013 21:42
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save bdw/4478770 to your computer and use it in GitHub Desktop.
Save bdw/4478770 to your computer and use it in GitHub Desktop.
PHP on-disk dictionary. It stores a key-value hash as a binary tree (depth-first), with a 'right offset pointer'. A long time ago, I had methods that manipulated this on-disk, but unfortunately, I don't anymore. I will figure out how, though, and rewrite this into a more honest language.
<?php
class Dictionary {
var $file;
var $index;
var $current;
function Dictionary($file) {
$this->file = $file;
$this->index = array();
}
function store() {
$fd = fopen($this->file, 'w');
$k = array_keys($this->index);
sort($k);
$tree = $this->write_node(0, sizeof($k) , $k);
fwrite($fd, $tree);
fclose($fd);
$this->current = true;
}
function write_node($l, $u, $k) {
if($l < $u) {
$m = ($l + $u) >> 1;
$left = $this->write_node($l, $m, $k);
$right = $this->write_node($m + 1, $u, $k);
$record = $this->write_record($k[$m], strlen($left));
return $record . $left . $right;
}
return '';
}
function write_record($word, $bytes) {
return sprintf("%02X %02X %08X %s %s\n", strlen($word),
strlen($this->index[$word]), $bytes,
$word, $this->index[$word]);
}
function read_record($fd) {
$head = fread($fd, 15);
if(strlen($head) != 15) return null;
$vars = sscanf($head, '%02X %02X %08X ');
$data = fread($fd, $vars[0] + $vars[1] + 2);
return array(substr($data, 0, $vars[0]),
substr($data, $vars[0]+1, -1), $vars[2]);
}
function load() {
$fd = fopen($this->file, 'r');
$this->index = array();
while(!feof($fd)) {
$result = $this->read_record($fd);
if(empty($result)) break;
list($word, $data, $next) = $result;
$this->index[$word] = $data;
}
fclose($fd);
$this->current = true;
}
function unload() {
$this->index = array();
$this->current = false;
}
function find($word) {
if($this->current)
return $this->index[$word];
$fd = fopen($this->file,'r');
$data = null;
while(!feof($fd)) {
$record = $this->read_record($fd);
if(empty($record)) break;
$c = strcmp($record[0], $word);
if($c == 0) { $data = $record[1]; break; }
else if($c > 0) continue;
else if($record[2] == 0) break;
else if($c < 0) fseek($fd, $record[2], SEEK_CUR);
}
fclose($fd);
return $data;
}
function add($word, $data) {
$this->index[$word] = $data;
}
function find_many($words) {
$fd = fopen($this->file, 'r');
$data = $this->seek_from($fd, 0, $words);
fclose($fd);
return $data;
}
/* There is a bug in this function and
that is why I compensate by searching
with regular search afterwards. */
function seek_from($fd, $pos, $words) {
if($pos != 0)
fseek($fd, $pos, SEEK_SET);
$data = array();
if(empty($words)) return $data;
$rec = $this->read_record($fd);
$pos = ftell($fd);
list($left, $right, $found) = $this->divide_words($rec[0], $words);
if($found)
$data[$found] = $rec[1];
if(!empty($left))
$data = array_merge($data, $this->seek_from($fd, 0, $left));
if(!empty($right) && $rec[2] != 0)
$data = array_merge($data, $this->seek_from($fd, $pos + $rec[2], $right));
return $data;
}
function divide_words($c, $words) {
$a = array();
$b = array();
foreach($words as $i => $w) {
$t = strcmp($c, $w);
if($t == 0) $f = $w;
else if($t < 0) $b[] = $w;
else $a[] = $w;
}
return array($a, $b, $f);
}
}
?>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment