Skip to content

Instantly share code, notes, and snippets.

@uda
Last active August 29, 2015 14:16
Show Gist options
  • Save uda/1720c73ea2f06a7a774d to your computer and use it in GitHub Desktop.
Save uda/1720c73ea2f06a7a774d to your computer and use it in GitHub Desktop.
Transcode, translate UTF-8 string with chars from windows encoding to another windows encoding
#!/usr/bin/env php
<?php
function transCode($sourceStringArray, $sourceMap, $targetMap) {
$newString = [];
foreach ($sourceStringArray as $index => $char) {
if ($char == ' ') {
$newString[] = $char;
continue;
}
$sourceIndex = array_search($char, $sourceMap);
$targetIndex = isset($targetMap[$sourceIndex]) && !empty($targetMap[$sourceIndex]) ? $sourceIndex : false;
$newString[] = $targetIndex === false ? $char : $targetMap[$targetIndex];
}
return implode($newString);
}
function mbStringToArray($string) {
$strlen = mb_strlen($string);
while ($strlen) {
$array[] = mb_substr($string, 0, 1, "UTF-8");
$string = mb_substr($string, 1, $strlen, "UTF-8");
$strlen = mb_strlen($string);
}
return $array;
}
$enc = [
'1250' => 'ŕáâăäĺćçčéęëěíîďđńňóôőö÷řůúűüýţ˙',
'1250_up' => 'ŔÁÂĂÄĹĆÇČÉĘËĚÍÎĎĐŃŇÓÔŐÖ×ŘŮÚŰÜÝŢß',
'1251' => 'абвгдежзийклмнопрстуфхцчшщъыьэюя',
'1251_up' => 'АБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ',
'1252' => 'àáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ',
'1252_up' => 'ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞß',
'1253' => 'ΰαβγδεζηθικλμνξοπρςστυφχψωϊϋόύώ ',
'1253_up' => 'ΐΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡ ΣΤΥΦΧΨΩΪΫάέήί',
'1254' => 'àáâãäåæçèéêëìíîïğñòóôõö÷øùúûüışÿ',
'1254_up' => 'ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏĞÑÒÓÔÕÖ×ØÙÚÛÜİŞß',
'1255' => 'אבגדהוזחטיךכלםמןנסעףפץצקרשת',
'1255_up' => 'ְֱֲֳִֵֶַָֹֺֻּֽ־ֿ׀ׁׂ׃װױײ׳״ ',
// TODO add hebrew up and 1256-8
];
if ($argc < 2 || $argv == '--test') {
exit('You need to provide a string');
}
$test = array_search('--test', $argv);
if ($test !== false) {
unset($argv[$test]);
$argc = count($argv);
}
$sourceString = $argv[1];
$sourceStringArray = mbStringToArray($sourceString);
$sourceEncoding = $argc > 2 && intval($argv[2]) == $argv[2] ? $argv[2] : 1250;
if (!isset($enc[$sourceEncoding])) {
exit('No such encoding: ' . $sourceEncoding);
}
$sourceEncodingArray = mbStringToArray($enc[$sourceEncoding]);
$targetEncoding = $argc > 3 && intval($argv[3]) == $argv[3] ? $argv[3] : 1255;
if (!isset($enc[$targetEncoding])) {
exit('No such encoding: ' . $targetEncoding);
}
$targetEncodingArray = mbStringToArray($enc[$targetEncoding]);
if ($test) {
foreach ($enc as $code => $mapString) {
if ($code == $sourceEncoding) {
continue;
}
echo str_pad($code . ':', 10, ' ', STR_PAD_RIGHT), transCode($sourceStringArray, mbStringToArray($mapString), $targetEncodingArray), "\n";
}
} else {
echo transCode($sourceStringArray, $sourceEncodingArray, $targetEncodingArray), "\n";
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment