Last active
August 29, 2015 14:16
-
-
Save uda/1720c73ea2f06a7a774d to your computer and use it in GitHub Desktop.
Transcode, translate UTF-8 string with chars from windows encoding to another windows encoding
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env php | |
<?php | |
function transCode($sourceStringArray, $sourceMap, $targetMap) { | |
$newString = []; | |
foreach ($sourceStringArray as $index => $char) { | |
if ($char == ' ') { | |
$newString[] = $char; | |
continue; | |
} | |
$sourceIndex = array_search($char, $sourceMap); | |
$targetIndex = isset($targetMap[$sourceIndex]) && !empty($targetMap[$sourceIndex]) ? $sourceIndex : false; | |
$newString[] = $targetIndex === false ? $char : $targetMap[$targetIndex]; | |
} | |
return implode($newString); | |
} | |
function mbStringToArray($string) { | |
$strlen = mb_strlen($string); | |
while ($strlen) { | |
$array[] = mb_substr($string, 0, 1, "UTF-8"); | |
$string = mb_substr($string, 1, $strlen, "UTF-8"); | |
$strlen = mb_strlen($string); | |
} | |
return $array; | |
} | |
$enc = [ | |
'1250' => 'ŕáâăäĺćçčéęëěíîďđńňóôőö÷řůúűüýţ˙', | |
'1250_up' => 'ŔÁÂĂÄĹĆÇČÉĘËĚÍÎĎĐŃŇÓÔŐÖ×ŘŮÚŰÜÝŢß', | |
'1251' => 'абвгдежзийклмнопрстуфхцчшщъыьэюя', | |
'1251_up' => 'АБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ', | |
'1252' => 'àáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ', | |
'1252_up' => 'ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞß', | |
'1253' => 'ΰαβγδεζηθικλμνξοπρςστυφχψωϊϋόύώ ', | |
'1253_up' => 'ΐΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡ ΣΤΥΦΧΨΩΪΫάέήί', | |
'1254' => 'àáâãäåæçèéêëìíîïğñòóôõö÷øùúûüışÿ', | |
'1254_up' => 'ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏĞÑÒÓÔÕÖ×ØÙÚÛÜİŞß', | |
'1255' => 'אבגדהוזחטיךכלםמןנסעףפץצקרשת', | |
'1255_up' => 'ְֱֲֳִֵֶַָֹֺֻּֽ־ֿ׀ׁׂ׃װױײ׳״ ', | |
// TODO add hebrew up and 1256-8 | |
]; | |
if ($argc < 2 || $argv == '--test') { | |
exit('You need to provide a string'); | |
} | |
$test = array_search('--test', $argv); | |
if ($test !== false) { | |
unset($argv[$test]); | |
$argc = count($argv); | |
} | |
$sourceString = $argv[1]; | |
$sourceStringArray = mbStringToArray($sourceString); | |
$sourceEncoding = $argc > 2 && intval($argv[2]) == $argv[2] ? $argv[2] : 1250; | |
if (!isset($enc[$sourceEncoding])) { | |
exit('No such encoding: ' . $sourceEncoding); | |
} | |
$sourceEncodingArray = mbStringToArray($enc[$sourceEncoding]); | |
$targetEncoding = $argc > 3 && intval($argv[3]) == $argv[3] ? $argv[3] : 1255; | |
if (!isset($enc[$targetEncoding])) { | |
exit('No such encoding: ' . $targetEncoding); | |
} | |
$targetEncodingArray = mbStringToArray($enc[$targetEncoding]); | |
if ($test) { | |
foreach ($enc as $code => $mapString) { | |
if ($code == $sourceEncoding) { | |
continue; | |
} | |
echo str_pad($code . ':', 10, ' ', STR_PAD_RIGHT), transCode($sourceStringArray, mbStringToArray($mapString), $targetEncodingArray), "\n"; | |
} | |
} else { | |
echo transCode($sourceStringArray, $sourceEncodingArray, $targetEncodingArray), "\n"; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment