Skip to content

Instantly share code, notes, and snippets.

@blackbing
Created August 17, 2011 06:34
Show Gist options
  • Star 6 You must be signed in to star a gist
  • Fork 4 You must be signed in to fork a gist
  • Save blackbing/1150937 to your computer and use it in GitHub Desktop.
Save blackbing/1150937 to your computer and use it in GitHub Desktop.
為了解決中文筆劃排序的問題(只適用繁體中文)
1 a440 a441
2 a442 a453
3 a454 a47e
4 a4a1 a4fd
5 a4fe a5df
6 a5e0 a6e9
7 a6ea a8c2
8 a8c3 ab44
9 ab45 adbb
10 adbc b0ad
11 b0ae b3c2
12 b3c3 b6c3
13 b6c4 b9ab
14 b9ac bbf4
15 bbf5 bea6
16 bea7 c074
17 c075 c24e
18 c24f c35e
19 c35f c454
20 c455 c4d6
21 c3d7 c56a
22 c56b c5c7
23 c5c8 c5c7
24 c5f1 c654
25 c655 c664
26 c665 c66b
27 c66c c675
28 c676 c67a
29 c67b c67e
2 c940 c944
3 c945 c94c
4 c94d c95c
5 c95d c9aa
6 c9ab c959
7 ca5a cbb0
8 cbb1 cddc
9 cddd d0c7
10 d0c8 d44a
11 d44b d850
12 d851 dcb0
13 dcb1 e0ef
14 e0f0 e4e5
15 e4e6 e8f3
16 e8f4 ecb8
17 ecb9 efb6
18 efb7 f1ea
19 f1eb f3fc
20 f3fd f5bf
21 f5c0 f6d5
22 f6d6 f7cf
23 f6d6 f7cf
24 f8a5 f8ed
25 f8e9 f96a
26 f96b f9a1
27 f9a2 f9b9
28 f9ba f9c5
29 f9c6 f9dc
9 f9da f9da
12 f9db f9db
13 f9d6 f9d8
15 f9dc f9dc
16 f9d9 f9d9
30 c67b c67d
30 f9cc f9cf
31 f9c6 f9c6
31 f9d0 f9d0
32 f9d1 f9d1
33 c67e c67e
33 f9d2 f9d2
34 f9d3 f9d3
36 f9d4 f9d5
<?
/*
* Author: blackbing@gmail.com
* Desc: 為了解決中文筆劃排序的問題(只適用繁體中文)
* php 可以直接執行cht_strokesort
* http API : http://localhost/cht_strokesort.php?compare=%E9%A9%A2%E5%AD%90,%E5%8F%B2%E7%91%9E%E5%85%8B,%E7%99%BD%E9%9B%AA%E5%85%AC%E4%B8%BB,%E9%95%B7%E9%9D%B4%E8%B2%93,%E5%B0%8F%E6%9C%A8%E5%81%B6,%E8%96%91%E9%A4%85%E4%BA%BA, %E4%B8%80,%E4%BA%8C,%E4%B8%89,a,b,c,1,2,5,%E4%B8%83&callback=test&dont_sort=true
* 注意一定要用UTF8 encodeURIComponent傳遞參數
*/
define("BIG5_HB_MIN" , 0x81); // 高位元組最小值
define("BIG5_HB_MAX" , 0xfe); // 高位元組最大值
define("BIG5_LB1_MIN" , 0x40); // 低位元組最小值
define("BIG5_LB1_MAX" , 0x7e); // 低位元組最大值
define("BIG5_LB2_MIN" , 0xa1); // 低位元組最小值
define("BIG5_LB2_MAX" , 0xfe); // 低位元組最大值
function big5_isHB($c) {
$asc = Ord($c);
if($asc>=BIG5_HB_MIN && $asc<=BIG5_HB_MAX) return true;
return false;
}
function big5_isLB($c) {
$asc = Ord($c);
if(($asc>=BIG5_LB1_MIN && $asc<=BIG5_LB1_MAX) || ($asc>=BIG5_LB2_MIN && $asc<=BIG5_LB2_MAX))
return true;
return false;
}
function utf8_2_big5($utf8_str) {
$i=0;
$len = strlen($utf8_str);
$big5_str="";
for ($i=0;$i<$len;$i++) {
$sbit = ord(substr($utf8_str,$i,1));
if ($sbit < 128) {
$big5_str.=substr($utf8_str,$i,1);
} else if($sbit > 191 && $sbit < 224) {
$new_word=iconv("UTF-8","Big5",substr($utf8_str,$i,2));
$big5_str.=($new_word=="")?(mb_convert_encoding(substr($utf8_str,$i,3), 'HTML-ENTITIES', 'UTF-8')):$new_word;
$i++;
} else if($sbit > 223 && $sbit < 240) {
$new_word=iconv("UTF-8","Big5",substr($utf8_str,$i,3));
$big5_str.=($new_word=="")?(mb_convert_encoding(substr($utf8_str,$i,3), 'HTML-ENTITIES', 'UTF-8')):$new_word;
$i+=2;
} else if($sbit > 239 && $sbit < 248) {
$new_word=iconv("UTF-8","Big5",substr($utf8_str,$i,4));
$big5_str.=($new_word=="")?(mb_convert_encoding(substr($utf8_str,$i,3), 'HTML-ENTITIES', 'UTF-8')):$new_word;
$i+=3;
}
}
return $big5_str;
}
// 計算中文字筆劃
function big5_stroke($str)
{
$tab=@File("./big5_stroke.tab");
if(!$tab)
{
echo "Can't Open file big5_stroke.tab, plz check define BIG5_FILE_DIR is valid";
exit;
}
/* 讀取轉換表至陣列 $StrokeMapping */
$i=0;
while(list($key,$val)=Each($tab))
{
$StrokeMapping[$i] = split(" ",$val);
$StrokeMapping[$i][1] = HexDec($StrokeMapping[$i][1]);
$StrokeMapping[$i][2] = HexDec($StrokeMapping[$i][2]);
$i++;
}
$s1 = substr($str,0,1);
$s2 = substr($str,1,1);
$s = Hexdec(Bin2hex($s1.$s2));
if( big5_isHB($s1) && big5_isLB($s2) )
{
for($i=0;$i<count($StrokeMapping);$i++)
if($StrokeMapping[$i][1] <= $s && $StrokeMapping[$i][2] >= $s)
return $StrokeMapping[$i][0];
}
else
return false;
}
function get_string_stroke($str){
$str = utf8_2_big5($str);
$stroke = big5_stroke($str);
return $stroke;
}
function ucompare($a, $b){
if ($a['ord'] == $b['ord']) {
return 0;
}
return ($a['ord'] < $b['ord']) ? -1 : 1;
}
function cht_strokesort($str_arr, $dontSort = false){
$ord_arr = array();
//若是英數字,則依照ord來做排序,而筆劃排序則由base開始起算
$stroke_base = 50000;
while (list($key, $value) = each($str_arr)) {
$value = urldecode($value);
$firstChar = mb_substr($value, 0, 1, 'UTF-8');
$stroke = get_string_stroke($firstChar);
if($stroke>0){
$ord = $stroke_base + $stroke;
}else{
$ord = ord($firstChar);
}
$ord_arr[] = array(
'firstChar' => $firstChar,
'stroke' => $stroke?$stroke:-1,
'ord' => $ord,
'original_index' => $key,
'string' => $value
);
}
//若指定不排序
if(!$dontSort){
usort($ord_arr, "ucompare");
}
// print_r($ord_arr);
return $ord_arr;
}
/*
$string = array('驢子','史瑞克','白雪公主','長靴貓','小木偶','薑餅人','三隻小豬','睡美人','壞皇后','七個小矮人','小美人魚','神仙教母', '龜', '台', '灣', '1', '2', '3', 'a', 'b', 'c', '一', '二', '三');
$sorted = cht_strokesort($string);
foreach($sorted as $k => $v){
$index = $v['original_index'];
echo $string[$index].",";
}
*/
//http API: http://localhost/CHTSort/cht_strokesort.php?compare=%E9%A9%A2%E5%AD%90,%E5%8F%B2%E7%91%9E%E5%85%8B,%E7%99%BD%E9%9B%AA%E5%85%AC%E4%B8%BB,%E9%95%B7%E9%9D%B4%E8%B2%93,%E5%B0%8F%E6%9C%A8%E5%81%B6,%E8%96%91%E9%A4%85%E4%BA%BA,%E4%B8%80,%E4%BA%8C,%E4%B8%89,a,b,c,1,2,5,%E4%B8%83&callback=test&dont_sort=true
//
if(isset($_GET['compare'])){
$arr = split(',', $_GET['compare']);
$dontSort = $_GET['dont_sort']?true:false;
$sorted = cht_strokesort($arr, $dontSort);
//為了減少http的傳輸, 原本傳入的字串不做回傳
foreach($sorted as $k => $v){
unset($sorted[$k]['string']);
}
$callback = $_GET['callback'];
if($callback){
echo $callback."(";
}
echo json_encode($sorted);
if($callback){
echo ")";
}
}
?>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment