Skip to content

Instantly share code, notes, and snippets.

@muglug
Last active November 15, 2018 02:37
Show Gist options
  • Save muglug/02d95a8418265f283e3baf73b5e0b00b to your computer and use it in GitHub Desktop.
Save muglug/02d95a8418265f283e3baf73b5e0b00b to your computer and use it in GitHub Desktop.
String anonymisation
<?php
/**
* This tries to anonymise all letters in a string.
*
* @param string $s
* @return string
*/
function anonymize_letters($s) {
$tokens = str_split($s, 1);
$ns = '';
$l = strlen($s);
for ($i = 0; $i < $l; $i++) {
$cp = ord($s[$i]);
if ($cp < 128) {
$offset = null;
$range = null;
if ($cp > 64 && $cp < 91) { // roman capital letters
$offset = 65;
$range = 25;
} elseif ($cp > 96 && $cp < 123) { // roman lowercase letters
$offset = 97;
$range = 25;
} elseif ($cp > 47 && $cp < 58) { // numbers
$offset = 48;
$range = 9;
}
if ($offset && $range) {
$ns .= chr($offset + \mt_rand(0, $range));
} else {
$ns .= $s[$i];
}
} else { // a non-ASCII, hopefully UTF-8 string
$t = '';
// byte length according to UTF-8 standard
$bl = $cp < 224 ? 2 : ($cp < 240 ? 3 : 4);
if ($cp < 224) {
$t = $s[$i] . $s[$i + 1];
$i++;
} elseif ($cp < 240) {
$t = $s[$i] . $s[$i + 1] . $s[$i + 2];
$i += 2;
} else {
$t = $s[$i] . $s[$i + 1] . $s[$i + 2] . $s[$i + 3];
$i += 3;
}
if (preg_match('/\p{L}/', $t)) {
$cp = mb_ord($t);
$n = $t;
$j = 0;
do {
$n = mb_chr($cp + (\mt_rand(-10, 10))); // find a letter nearby
$j++;
} while ($j < 10 && (!preg_match('/\p{L}/', $n) || ctype_upper($n) !== ctype_upper($t)));
$ns .= $n;
} else {
$ns .= $t;
}
}
}
return $ns;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment