Created
July 11, 2014 01:24
-
-
Save cod1ingcoding/1455b7809749b30ed54b to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/** | |
* function: 测试字符串编码(utf-8/gbk/ascii) | |
* author: 华仔 2010/12/27 | |
* params: | |
* @s:string 要测试的文本 | |
* @priencoding:string 当非acsii字节太少(少于3),且可能是两种编码时,优先使用的编码(一般是gbk) | |
* @reserveascii:boolean 如果为ascii,是否设置为优先编码 | |
* return:string 字符串编码(utf-8/gbk/ascii) | |
*/ | |
// 功能函数 | |
function getencoding( $s, $priencoding='gbk', $reserveascii=true ) { | |
// 用 bom 判断 | |
if ( ord($s[0]) == 0xEF | |
&& ord($s[1]) == 0xBF | |
&& ord($s[2]) == 0xBB | |
) { | |
return 'utf-8'; | |
} | |
$isascii = true; | |
$isutf8 = true; | |
$utf8_mode_times = 0; | |
for( $i = 0; $i < strlen( $s ); $i++ ) { | |
// 排除 ascii | |
if ( $isascii && ord($s[$i]) > 0x7F ) { | |
$isascii = false; | |
} | |
// 排除 utf-8 | |
if ( $isutf8 && ord($s[$i]) >= 0xC2 ) { | |
$is_utf8_multi_mode = false; | |
$multi_modes = array( | |
1 => array( 0xC2, 0xDF ), | |
2 => array( 0xE0, 0xEF ), | |
3 => array( 0xF0, 0xF4 ), | |
); | |
foreach( $multi_modes as $key => $bound ) { | |
if ( $i + $key < strlen( $s ) | |
&& ord($s[$i]) >= $bound[0] && ord($s[$i]) <= $bound[1] // 首字节限制 | |
) { | |
$is_utf8_multi_mode = $key; | |
for( $j = 1; $j <= $key; $j++ ) { | |
if ( ! (ord($s[$i+$j]) >= 0x80 && ord($s[$i+$j]) <= 0xBF) ) { | |
$is_utf8_multi_mode = false; | |
} | |
} | |
} | |
} | |
if ( $is_utf8_multi_mode ) { | |
$utf8_mode_times += $is_utf8_multi_mode; | |
$i += $is_utf8_multi_mode; | |
} else { | |
$isutf8 = false; | |
break; | |
} | |
} | |
} | |
if ( $isascii ) { | |
if ( $reserveascii ) return 'ascii'; | |
else return $priencoding; | |
} else { | |
if ( $isutf8 && $utf8_mode_times == 1 ) | |
return $priencoding; | |
else | |
return $isutf8 ? 'utf-8' : 'gbk'; | |
} | |
} | |
// 测试代码,文件保存为utf-8格式 | |
if ( str_replace( '\\', '/', __FILE__ ) == $_SERVER['SCRIPT_FILENAME'] ) { | |
$s = '测试代码,文件保存为utf-8格式'; | |
echo getencoding( $s ); | |
echo "<br />"; | |
echo getencoding( iconv( 'utf-8', 'gbk', $s ) ); | |
echo "<br />"; | |
$s = 'abcd'; | |
echo getencoding( $s ); | |
echo "<br />"; | |
echo getencoding( $s, 'gbk', false ); | |
} | |
?> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment