Created
October 27, 2009 11:22
-
-
Save kyanny/219503 to your computer and use it in GitHub Desktop.
エンコーディングを意識して文字列を指定のバイト数でカットする
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env perl | |
use strict; | |
use Benchmark; | |
use Encode; | |
use Encode::Guess; | |
my $var = q{いいいいいいいいいいいいいいいいいいいいいいいいいいいいいいいいいいいいいいいいいいいいいいいいいいいいいいいいいいいいいいいいいいいいいいいいいいいいいいいいいいいいいいいいいいいいいいいいいいいいいいいいいいいいいいいいいいいいいいいいいいいいいいいab}; # euc-jp octets, 256 bytes | |
my $charset = eval { Encode::Guess::guess_encoding($var, qw(utf-8 euc-jp shiftjis))->name } || 'euc-jp'; # euc-jp | |
my $maxlength = 255; | |
my $new_var = strcut_until_maxlength($var, $charset, $maxlength); | |
print length $var, "\n"; | |
# => 256 | |
print length $new_var, "\n"; | |
# => 255 | |
sub strcut_until_maxlength { | |
my ($var, $charset, $maxlength) = @_; | |
while (length $var > $maxlength) { | |
chop $var; | |
} | |
my $new_var = Encode::encode($charset, Encode::decode($charset, $var)); | |
return $new_var; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment