Skip to content

Instantly share code, notes, and snippets.

@motin
Created July 18, 2016 07:55
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save motin/a65e6c1cc303e46900d10894bf2da87f to your computer and use it in GitHub Desktop.
Save motin/a65e6c1cc303e46900d10894bf2da87f to your computer and use it in GitHub Desktop.
<?php
use Codeception\Util\Stub;
class StringTransliterationTest extends \Codeception\TestCase\Test
{
use DnaTestTrait;
/**
* @var \CodeGuy
*/
protected $codeGuy;
protected function _before()
{
}
protected function _after()
{
}
public static function transliterationDataProvider()
{
$propel2TestData = [
['foo', 'foo'],
['fôo', 'foo'],
['€', 'EUR'],
[
'CŠŒŽšœžŸµÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÑÒÓÔÕÖÙÚÛÜÝßàáâãäåæçèéêëìíîïñòóôõöùúûüýÿ',
'CSOEZsoezYuAAAAAAAECEEEEIIIINOOOOOUUUUYssaaaaaaaeceeeeiiiinooooouuuuyy'
],
['ø', 'o'],
['Ø', 'O'],
['¥Ðð', 'YDd'],
];
// Adapter from https://raw.githubusercontent.com/lingtalfi/Bat/master/btests/StringTool/removeAccents/stringTool.removeAccents.test.php
$batStringToolTestData = array_combine(
[
// easy
'',
'a',
'après',
'dédé fait la fête ?',
// hard
'àáâãäçèéêëìíîïñòóôõöùúûüýÿÀÁÂÃÄÇÈÉÊËÌÍÎÏÑÒÓÔÕÖÙÚÛÜÝ',
'ŻŹĆŃĄŚŁĘÓżźćńąśłęó',
'qqqqŻŹĆŃĄŚŁĘÓżźćńąśłęóqqq',
'ŠŽšžŸÀÁÂÃÄÅÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝàáâãäåçèéêëìíîïðñòóôõöøùúûüýÿ',
'ÀÁÂÃÄÅÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝàáâãäåçèéêëìíîïñòóôõöøùúûüýÿ',
'ĀāĂ㥹ĆćĈĉĊċČčĎďĐđĒēĔĕĖėĘęĚěĜĝĞğĠġĢģĤĥĦħĨĩĪīĬĭĮįİĴĵĶķ',
'ĹĺĻļĽľĿŀŁłŃńŅņŇňʼnŌōŎŏŐőŔŕŖŗŘřŚśŜŝŞşŠšŢţŤťŦŧŨũŪūŬŭŮůŰűŲųŴŵŶŷŸŹźŻżŽž',
'ſƒƠơƯưǍǎǏǐǑǒǓǔǕǖǗǘǙǚǛǜǺǻǾǿ',
'Ǽǽ',
],
[
// easy
'',
'a',
'apres',
'dede fait la fete ?',
// hard
'aaaaaceeeeiiiinooooouuuuyyAAAAACEEEEIIIINOOOOOUUUUY',
'ZZCNASLEOzzcnasleo',
'qqqqZZCNASLEOzzcnasleoqqq',
//'SZszYAAAAAACEEEEIIIIDNOOOOOOUUUUYaaaaaaceeeeiiiionoooooouuuuyy', // original
'SZszYAAAAAACEEEEIIIIDNOOOOOOUUUUYaaaaaaceeeeiiiidnoooooouuuuyy',
'AAAAAACEEEEIIIIDNOOOOOOUUUUYaaaaaaceeeeiiiinoooooouuuuyy',
//'AaAaAaCcCcCcCcDdDdEeEeEeEeEeGgGgGgGgHhHhIiIiIiIiIJjKk', // original
'AaAaAaCcCcCcCcDdDjdjEeEeEeEeEeGgGgGgGgHhHhIiiiIiIiIJjkk',
'LlLlLlLlLlNnNnNnnOoOoOoRrRrRrSsSsSsSsTtTtTtUuUuUuUuUuUuWwYyYZzZzZz',
'ifOoUuAaIiOoUuUuUuUuUuAaOo',
//'Aa', // original
'Aeae',
]
);
$batStringToolTestDataFormatted = [];
foreach ($batStringToolTestData as $k => $v) {
$batStringToolTestDataFormatted[] = [$k, $v];
}
// https://github.com/infralabs/DiacriticsRemovePHP/blob/master/test_SpecialCharacters_to_Latin.php
$removeDiaCriticsTestData = [
//Latin-1 Supplement
[
"ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ",
"AAAAAAAECEEEEIIIIDNOOOOO×OUUUUYTHssaaaaaaaeceeeeiiiidnooooo÷ouuuuythy"
],
//Latin Extended-A
[
"ĀāĂ㥹ĆćĈĉĊċČčĎďĐđĒēĔĕĖėĘęĚěĜĝĞğĠġĢģĤĥĦħĨĩĪīĬĭĮįİıIJijĴĵĶķĸĹĺĻļĽľĿŀŁłŃńŅņŇňʼnŊŋŌōŎŏŐőŒœŔŕŖŗŘřŚśŜŝŞşŠšŢţŤťŦŧŨũŪūŬŭŮůŰűŲųŴŵŶŷŸŹźŻżŽžſ",
//"AaAaAaCcCcCcCcDdDdEeEeEeEeEeGgGgGgGgHhHhIiIiIiIiIiIJijJjKkĸLlLlLlLlLlNnNnNnnNnOoOoOoOEoeRrRrRrSsSsSsSsTtTtTtUuUuUuUuUuUuWwYyYZzZzZzs" // original
"AaAaAaCcCcCcCcDdDjdjEeEeEeEeEeGgGgGgGgHhHhIiiiIiIiIiIJijJjkkkLlLlLlLlLlNnNnNnnNnOoOoOoOEoeRrRrRrSsSsSsSsTtTtTtUuUuUuUuUuUuWwYyYZzZzZzi"
],
//Latin Extended-B
[
"ƒǺǻǼǽǾǿ",
//"fAaAEaeOo" // original
"fAaAeaeOo"
],
//Latin Extended Additional
[
"ẀẁẂẃẄẅỲỳ",
"WwWwWwYy"
],
];
$data = array_merge(
$propel2TestData,
$batStringToolTestDataFormatted,
$removeDiaCriticsTestData
);
return $data;
}
/**
* @group coverage:full
* @dataProvider transliterationDataProvider
*/
/*
public function testTransliterationUsingRemoveDiacritics($in, $out)
{
$translit = RemoveDiacritics::process($in);
$this->assertEquals($out, $translit, 'RemoveDiacritics behaves as expected');
}
*/
/**
* @group coverage:full
* @dataProvider transliterationDataProvider
*/
public function testTransliterationUsingDjangosUrlify($in, $out)
{
URLify::add_chars(
array(
'¿' => '?',
'®' => '(r)',
'¼' => '1-4',
'½' => '1-2',
'¾' => '3-4',
'¶' => 'P',
'€' => 'EUR',
// TODO: Merge upstream:
'Ÿ' => 'Y',
'µ' => 'u',
'¥' => 'Y',
'Ĉ' => 'C',
'ĉ' => 'c',
'Ċ' => 'C',
'ċ' => 'c',
'Ĝ' => 'G',
'ĝ' => 'g',
'Ġ' => 'G',
'ġ' => 'g',
'Ĥ' => 'H',
'ĥ' => 'h',
'Ħ' => 'H',
'ħ' => 'h',
'Ĕ' => 'E',
'ĕ' => 'e',
'Ĭ' => 'I',
'ĭ' => 'i',
'Ĵ' => 'J',
'ĵ' => 'j',
'Ĺ' => 'L',
'ĺ' => 'l',
'Ľ' => 'L',
'ľ' => 'l',
'Ŀ' => 'L',
'ŀ' => 'l',
'ʼn' => 'n',
'Ō' => 'O',
'ō' => 'o',
'Ŏ' => 'O',
'ŏ' => 'o',
'Ŕ' => 'R',
'ŕ' => 'r',
'Ŗ' => 'R',
'ŗ' => 'r',
'Ŝ' => 'S',
'ŝ' => 's',
'Ŧ' => 'T',
'ŧ' => 't',
'Ŭ' => 'U',
'ŭ' => 'u',
'Ŵ' => 'W',
'ŵ' => 'w',
'Ŷ' => 'Y',
'ŷ' => 'y',
'ſ' => 'i',
'ƒ' => 'f',
'O' => 'O',
'o' => 'o',
'U' => 'U',
'u' => 'u',
'Ǎ' => 'A',
'ǎ' => 'a',
'Ǐ' => 'I',
'ǐ' => 'i',
'Ǒ' => 'O',
'ǒ' => 'o',
'Ǔ' => 'U',
'ǔ' => 'u',
'Ǖ' => 'U',
'ǖ' => 'u',
'Ǘ' => 'U',
'ǘ' => 'u',
'Ǚ' => 'U',
'ǚ' => 'u',
'Ǜ' => 'U',
'ǜ' => 'u',
'Ǻ' => 'A',
'ǻ' => 'a',
'Ǿ' => 'O',
'ǿ' => 'o',
'Ǽ' => 'Ae',
'ǽ' => 'ae',
'IJ' => 'IJ',
'ij' => 'ij',
'J' => 'J',
'ĸ' => 'k',
'Ŋ' => 'N',
'ŋ' => 'n',
'Ẁ' => 'W',
'ẁ' => 'w',
'Ẃ' => 'W',
'ẃ' => 'w',
'Ẅ' => 'W',
'ẅ' => 'w',
)
);
$translit = URLify::transliterate($in);
$this->assertEquals($out, $translit, 'djangos urlify transliteration behaves as expected');
}
/**
* @group coverage:full
* @dataProvider transliterationDataProvider
*/
/*
public function testTransliterationUsingPHPNormalizer($in, $out)
{
$this->assertTrue(extension_loaded('intl'));
$translit = Normalizer::normalize($in, Normalizer::FORM_KD);
$this->assertEquals($out, $translit, 'php\'s normalizer transliterates as expected');
}
*/
/**
* @group coverage:full
* @dataProvider transliterationDataProvider
*/
/*
public function testTransliterationUsingIconv($in, $out)
{
if (!function_exists('iconv')) {
$this->markTestSkipped();
}
$translit = iconv('utf-8', 'us-ascii//TRANSLIT', $in);
$this->assertEquals($out, $translit, 'iconv transliteration behaves as expected');
}
*/
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment