Last active
June 17, 2018 22:46
-
-
Save timwhitlock/8255619 to your computer and use it in GitHub Desktop.
Testing Gettext MO hash table compilation in PHP
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Compile this to abc.mo to run mo-hash.php | |
# $ msgfmt abc.po -o abc.mo | |
# | |
msgid "" | |
msgstr "" | |
"Project-Id-Version: Test\n" | |
"Language: English\n" | |
"Plural-Forms: nplurals=2; plural=n!=1\n" | |
"MIME-Version: 1.0\n" | |
"Content-Type: text/plain; charset=UTF-8\n" | |
"Content-Transfer-Encoding: 8bit" | |
msgid "a" | |
msgstr "X" | |
msgid "b" | |
msgstr "Y" | |
msgid "c" | |
msgstr "Z" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/** | |
* MO hash table test. | |
*/ | |
// Compile hash table from simple array including empty header key. | |
$msgids = array( '', 'a', 'b', 'c' ); | |
$table = compile_table( $msgids ); | |
echo "Hash table compiled by PHP: "; | |
print_binary( $table ); | |
// Pull hash table from abc.mo compiled by gettext msgfmt | |
$mo = file_get_contents('abc.mo'); | |
list(,$magic) = unpack('V', substr($mo,0,4) ); | |
$fmt = 0x950412de === $magic ? 'V*' : ( 0xde120495 === $magic ? 'N' : die('Bad MO file') ); | |
list(,$s,$h) = unpack( $fmt, substr( $mo, 20, 8 ) ); | |
$table = substr( $mo, $h, $s * 4 ); | |
echo "Hash table from abc.mo: "; | |
print_binary( $table ); | |
/** | |
* Displays hash table as hex | |
*/ | |
function print_binary( $str ){ | |
$pairs = str_split( bin2hex($str), 2 ); | |
$lines = str_split( implode(' ', $pairs ), 12 ); | |
echo strlen($str)," bytes\n",implode("\n", $lines), "\n\n"; | |
} | |
/** | |
* Build hash table string from array of msgid strings. (From write-mo.c) | |
* http://fossies.org/dox/gettext-0.18.3.1/write-mo_8c_source.html#l00384 | |
*/ | |
function compile_table( array $msgids ){ | |
$j = 0; | |
$n = count($msgids); | |
$nul_word = "\0\0\0\0"; | |
$hash_tab_size = max( 3, next_prime( $n * 4 / 3 ) ); | |
$map = array_fill( 0, $hash_tab_size, $nul_word ); | |
foreach( $msgids as $msgid ){ | |
$hash_val = hash_string( $msgid ); | |
$idx = $hash_val % $hash_tab_size; | |
// We need the second hashing function if another item in this slot already | |
if( $nul_word !== $map[$idx] ){ | |
$incr = 1 + ($hash_val % ( $hash_tab_size - 2 ) ); | |
do { | |
$diff = $hash_tab_size - $incr; | |
if ( $idx >= $diff ) { | |
$idx -= $diff; | |
} | |
else { | |
$idx += $incr; | |
} | |
} | |
while( $nul_word !== $map[$idx] ); | |
} | |
$map[$idx] = pack( 'V', ++$j ); | |
} | |
return implode('',$map); | |
} | |
/** | |
* hashpjw function by P.J. Weinberger. (From hash-string.c) | |
* http://fossies.org/dox/gettext-0.18.3.1/hash-string_8c_source.html#l00031 | |
*/ | |
function hash_string( $str ){ | |
$i = -1; | |
$hval = 0; | |
$len = strlen($str); | |
while( ++$i < $len ){ | |
$ord = ord( $str{$i} ); | |
$hval = ( $hval << 4 ) + $ord; | |
$g = $hval & 0xf0000000; | |
if( $g !== 0 ){ | |
$hval ^= $g >> 24; | |
$hval ^= $g; | |
} | |
} | |
return $hval; | |
} | |
/** | |
* Get next prime number up from seed number. (From hash.c) | |
* http://fossies.org/dox/gettext-0.18.3.1/gettext-tools_2gnulib-lib_2hash_8c_source.html#l00073 | |
*/ | |
function next_prime( $seed ){ | |
$seed |= 1; | |
while ( ! is_prime($seed) ){ | |
$seed += 2; | |
} | |
return $seed; | |
} | |
/** | |
* Test if a given integer is a prime number. | |
* http://icdif.com/computing/2011/09/15/check-number-prime-number/ | |
*/ | |
function is_prime( $num ) { | |
if ($num === 1 ){ | |
return false; | |
} | |
if( $num === 2 ){ | |
return true; | |
} | |
if( $num % 2 === 0 ) { | |
return false; | |
} | |
$max = ceil( sqrt($num) ); | |
for( $i = 3; $i <= $max; $i+=2 ) { | |
if( $num % $i === 0 ){ | |
return false; | |
} | |
} | |
return true; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment