Skip to content

Instantly share code, notes, and snippets.

@niratama
Forked from ayumu83s/kirisute_gomen.pl
Last active August 29, 2015 14:01
Show Gist options
  • Save niratama/afa4ca12961c2ca232be to your computer and use it in GitHub Desktop.
Save niratama/afa4ca12961c2ca232be to your computer and use it in GitHub Desktop.
use utf8;
use strict;
use warnings;
use Encode qw(encode_utf8 decode_utf8);
# 文字が指定のバイト数を超えている場合に切り取る。
sub kirisute_gomen {
my ($string, $byte_len, $str_len) = @_;
return $string if (length(encode_utf8($string)) <= $byte_len && length($string) <= $str_len);
# 指定の文字数まで切り取って、byte超えてたら1文字ずづ捨てていく作戦
my $result = substr($string, 0, $str_len);
while (length(encode_utf8($result)) > $byte_len || length($result) > $str_len) {
$result = substr($result, 0, -1);
}
return $result;
}
sub re_kirisute_gomen {
my ($string, $byte_len, $str_len) = @_;
# とりあえず指定文字数までぶった切ってバイト列化
my $byte_str = encode_utf8(substr($string, 0, $str_len));
if (length($byte_str) > $byte_len) {
# 指定バイト数より長かったら一旦指定バイト数まで切り詰めて
$byte_str = substr($byte_str, 0, $byte_len);
# バイト列の最後に中途半端なutf-8コードが残っていたら削除する
$byte_str =~ s/(?:
[\xc0-\xdf]|
[\xe0-\xef][\x80-\xbf]{0,1}|
[\xf0-\xf7][\x80-\xbf]{0,2}|
[\xf8-\xfb][\x80-\xbf]{0,3}|
[\xfc-\xfd][\x80-\xbf]{0,4}
)\z//msx;
}
# バイト列をデコードして返す
return decode_utf8($byte_str);
}
sub show_string_detail {
my $str = shift;
my $utf8_str = encode_utf8($str);
printf '"%s" %d characters, %d bytes', $utf8_str, length($str), length($utf8_str);
print "\n";
}
# 5文字(10byte)制限のテスト
show_string_detail(kirisute_gomen('あいうえお', 10, 5)); # あいう 9byte
show_string_detail(kirisute_gomen('あいueお', 10, 5)); # あいue 8byte
print "\n";
show_string_detail(re_kirisute_gomen('あいうえお', 10, 5)); # あいう 9byte
show_string_detail(re_kirisute_gomen('あいueお', 10, 5)); # あいue 8byte
#
show_string_detail(re_kirisute_gomen('あいuえお', 10, 5));
show_string_detail(re_kirisute_gomen('あいうДお', 10, 5));
show_string_detail(re_kirisute_gomen('あいueДお', 10, 5));
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment