Skip to content

Instantly share code, notes, and snippets.

@ZzZombo
Last active November 19, 2017 06:20
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ZzZombo/e78cad56722eda7f9fe8f40b33ad6bb5 to your computer and use it in GitHub Desktop.
Save ZzZombo/e78cad56722eda7f9fe8f40b33ad6bb5 to your computer and use it in GitHub Desktop.
use NativeCall;
class Encoding::UCS2::Encoder does Encoding::Encoder
{
my Str:D $endianness=nativecast(CArray[uint8],Buf[uint16].new(0x1234))[0].base(16)==12 ?? 'BE' !! 'LE';
my Junction:D $valid-range=(0x0000..0xD7FF)|(0xE000..0xFFFF);
has Bool:D $!write-bom=Bool::True;
has Bool:D $!translate-nl=Bool::True;
has Str:D $!target-endianness='BE';
has Str:D $!replacement='�';
submethod TWEAK(:$!target-endianness where 'BE'|'LE'='BE',:$!write-bom=Bool::True,:$!replacement where {.ord ~~ $valid-range}='�',
:$!translate-nl=Bool::True)
{
}
method encode-chars(Str:D $input is copy --> Blob:D)
{
my $encoded=Buf[uint8].new;
$input.subst-mutate(/\r?\n/,"\r\n") if $!translate-nl && $*VM.osname ~~ /win/;
my @cps=$input.ords;
@cps.unshift(0xFEFF) if $!write-bom;
$encoded.append(@cps.map(
{
my $cp=$_ ~~ $valid-range ?? $_ !! $!replacement.ord;
my uint8 @cus=$cp % 256,$cp +> 8 % 256; #code units.
|($!target-endianness ne $endianness ?? @cus.reverse !! @cus);
}));
$encoded;
}
}
class Encoding::UCS2::Decoder does Encoding::Decoder
{
has Buf:D $!bytes=Buf[uint8].new;
has UInt:D $!position=0;
has Str:D @!seps;
method !decode(UInt:D $amount=$!bytes.elems --> Str:D)
{
say "DEC $!position,$amount -> {$!position+$amount}/$!bytes.elems()";
my $result=$!bytes.subbuf($!position,$amount).decode: 'utf-16';
#"<$result>".say;
$!position += $amount;
$!position min= $!bytes.elems;
$result;
}
method add-bytes(Blob:D $bytes --> Nil)
{
say "ADD $!position+$bytes.elems() ($bytes.gist()) -> {$!position+$bytes.elems}/$!bytes.elems()";
$!bytes.append: $bytes;
}
method consume-available-chars(--> Str:D)
{
"CAVC".say;
self!decode: $_ %% 2 ?? $_ !! $_ - 1 given $.bytes-available;
}
method consume-all-chars(--> Str:D)
{
"CALC".say;
$.consume-available-chars;
}
method consume-exactly-chars(int $chars, Bool:D :$eof = False --> Str)
{
"CEC $chars, $eof".say;
return Str if $.bytes-available div 2 < $chars;
self!decode: $chars*2;
}
method set-line-separators(@seps --> Nil)
{
say "SEPS: ",@seps.map(*.NFC);@!seps=@seps;
}
method consume-line-chars(Bool:D :$chomp=False,Bool:D :$eof=False --> Str)
{
"CLC $chomp, $eof".say;
my ($s,$sep);
while !$sep && !$.is-empty
{
$s ~= $.consume-exactly-chars(1);
for @!seps
{
if $s.ends-with: $_
{
$sep=$_;
last;
}
}
}
if $chomp
{
$s.subst-mutate($sep,'');
}
$s;
}
method is-empty(--> Bool:D)
{
$!bytes.elems < $!position;
}
method bytes-available(--> Int:D)
{
my $r=$!bytes.elems-$!position max 0;
"BYTES $!position/$!bytes.elems() -> $r".say;$r;
}
method consume-exactly-bytes(int $bytes --> Blob)
{
"CEB $bytes".say;
return Blob if $.bytes-available < $bytes;
my $result=$!bytes.subbuf: $!position,$bytes;
$!position += $bytes;
}
}
class Encoding::UCS2 does Encoding
{
method name(::?CLASS: --> Str:D)
{
'iso-10646-ucs-2';
}
method alternative-names(::?CLASS: --> List:D)
{
'ucs2','ucs-2';
}
method encoder(::?CLASS: |args --> Encoding::Encoder:D)
{
Encoding::UCS2::Encoder.new(|args);
}
method decoder(::?CLASS: |args --> Encoding::Decoder:D)
{
#Encoding::Registry.find('utf16').decoder(|args);
Encoding::UCS2::Decoder.new(|args);
}
}
class Encoding::UCS2BE is Encoding::UCS2
{
method name(::?CLASS: --> Str:D)
{
callsame~'-be';
}
method alternative-names(::?CLASS: --> List:D)
{
callsame.map({|($_~"-be",$_~'be')}).List;
}
method encoder(::?CLASS: |args --> Encoding::Encoder:D)
{
nextwith(|args,:target-endianness<BE>);
}
}
class Encoding::UCS2LE is Encoding::UCS2
{
method name(::?CLASS: --> Str:D)
{
callsame~'-le';
}
method alternative-names(::?CLASS: --> List:D)
{
my @a=callsame.map({|($_~"-le",$_~'le')});
}
method encoder(::?CLASS: |args --> Encoding::Encoder:D)
{
nextwith(|args,:target-endianness<LE>);
}
}
Encoding::Registry.register(Encoding::UCS2);
Encoding::Registry.register(Encoding::UCS2LE);
Encoding::Registry.register(Encoding::UCS2BE);
Encoding::Registry.find('ucs-2le').say;
multi sub MAIN(Str:D :$dir='.')
{
my $filepath=$dir.IO.add('out.txt');
say "\t$filepath.absolute()...";
my $h=$filepath.open(:mode<rw>,:enc<ucs-2le>);
my $text=$h.slurp;say +$text.comb;
$text.say;
exit 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment