Skip to content

Instantly share code, notes, and snippets.

@cod1ingcoding
Created July 11, 2014 01:24
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save cod1ingcoding/1455b7809749b30ed54b to your computer and use it in GitHub Desktop.
Save cod1ingcoding/1455b7809749b30ed54b to your computer and use it in GitHub Desktop.
<?php
/**
* function: 测试字符串编码(utf-8/gbk/ascii)
* author: 华仔 2010/12/27
* params:
* @s:string 要测试的文本
* @priencoding:string 当非acsii字节太少(少于3),且可能是两种编码时,优先使用的编码(一般是gbk)
* @reserveascii:boolean 如果为ascii,是否设置为优先编码
* return:string 字符串编码(utf-8/gbk/ascii)
*/
// 功能函数
function getencoding( $s, $priencoding='gbk', $reserveascii=true ) {
// 用 bom 判断
if ( ord($s[0]) == 0xEF
&& ord($s[1]) == 0xBF
&& ord($s[2]) == 0xBB
) {
return 'utf-8';
}
$isascii = true;
$isutf8 = true;
$utf8_mode_times = 0;
for( $i = 0; $i < strlen( $s ); $i++ ) {
// 排除 ascii
if ( $isascii && ord($s[$i]) > 0x7F ) {
$isascii = false;
}
// 排除 utf-8
if ( $isutf8 && ord($s[$i]) >= 0xC2 ) {
$is_utf8_multi_mode = false;
$multi_modes = array(
1 => array( 0xC2, 0xDF ),
2 => array( 0xE0, 0xEF ),
3 => array( 0xF0, 0xF4 ),
);
foreach( $multi_modes as $key => $bound ) {
if ( $i + $key < strlen( $s )
&& ord($s[$i]) >= $bound[0] && ord($s[$i]) <= $bound[1] // 首字节限制
) {
$is_utf8_multi_mode = $key;
for( $j = 1; $j <= $key; $j++ ) {
if ( ! (ord($s[$i+$j]) >= 0x80 && ord($s[$i+$j]) <= 0xBF) ) {
$is_utf8_multi_mode = false;
}
}
}
}
if ( $is_utf8_multi_mode ) {
$utf8_mode_times += $is_utf8_multi_mode;
$i += $is_utf8_multi_mode;
} else {
$isutf8 = false;
break;
}
}
}
if ( $isascii ) {
if ( $reserveascii ) return 'ascii';
else return $priencoding;
} else {
if ( $isutf8 && $utf8_mode_times == 1 )
return $priencoding;
else
return $isutf8 ? 'utf-8' : 'gbk';
}
}
// 测试代码,文件保存为utf-8格式
if ( str_replace( '\\', '/', __FILE__ ) == $_SERVER['SCRIPT_FILENAME'] ) {
$s = '测试代码,文件保存为utf-8格式';
echo getencoding( $s );
echo "<br />";
echo getencoding( iconv( 'utf-8', 'gbk', $s ) );
echo "<br />";
$s = 'abcd';
echo getencoding( $s );
echo "<br />";
echo getencoding( $s, 'gbk', false );
}
?>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment