Skip to content

Instantly share code, notes, and snippets.

@hanabokuro
Created September 2, 2013 09:01
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save hanabokuro/6410792 to your computer and use it in GitHub Desktop.
Save hanabokuro/6410792 to your computer and use it in GitHub Desktop.
use strict;
use warnings;
use utf8;
use Test::More;
use Test::Builder;
BEGIN {
use_ok('AvoidCoreTextBug');
}
my $Tester = Test::Builder->new;
subtest simple => sub {
is_filterd("\x{600}\x{20}");
is_filterd("&#x600\x{20}");
is_filterd("؀\x{20}");
is_filterd("\x{600} ");
is_filterd("&#x600 ");
is_filterd("؀ ");
is_filterd("\x{600} ");
is_filterd("&#x600 ");
is_filterd("؀ ");
is_filterd("\x{600} ");
is_filterd("&#x600&#x20");
is_filterd("؀&#32");
};
subtest 'simple no entity reference' => sub {
is_filterd("\x{600}\x{20}", 1);
is_no_filterd("&#x600\x{20}", 1);
is_no_filterd("؀\x{20}", 1);
is_filterd("\x{600} ", 1);
is_no_filterd("&#x600 ", 1);
is_no_filterd("؀ ", 1);
is_no_filterd("\x{600} ", 1);
is_no_filterd("&#x600 ", 1);
is_no_filterd("؀ ", 1);
is_no_filterd("\x{600} ", 1);
is_no_filterd("&#x600&#x20", 1);
is_no_filterd("؀&#32", 1);
};
subtest 'multi line' => sub {
my $filterd = AvoidCoreTextBug::filter("\na\nb\x{600}\x{20}a\nb\nc\n\x{600}\x{20}\n\n\n");
is($filterd, "\na\nb\x{600}\x{20}\x{200e}a\nb\nc\n\x{600}\x{20}\x{200e}\n\n\n");
};
subtest ascii => sub {
is_no_filterd("abc");
};
subtest with_tag => sub {
is_no_filterd("foo<a>bar");
is_no_filterd("foo <a> bar </a> baz");
is_no_filterd("foo <a> &nbsp;");
};
subtest with_entity => sub {
is_no_filterd("a&#98;");
is_no_filterd("a&#98;&#x63");
is_no_filterd("a&#98;&#x63;");
is_no_filterd("a&#98;&#x63;d");
};
subtest 'with 0x00-0xff entity' => sub {
is_no_filterd("a&lt;&gt;b");
is_no_filterd("a&#x3c;&#x3e;b");
is_no_filterd("a&#60;&#62;b");
is_no_filterd("&#X0000003C;&#x00003E");
is_no_filterd("&#000000060;&#0000062");
is_no_filterd("&#000000060");
};
subtest 'with 0x100- entity (not Arabic)' => sub {
is_no_filterd("&#x3042;&#x3044&#x0003046;&#12360;&#00012362;");
is_no_filterd("a&#x3042;i&#x3044u&#x0003046;e&#12360;o&#00012362;");
is_no_filterd("&#97;&#x3042;&#x62;&#x3044u&#x0003046;e&#12360;o&#00012362;");
is_no_filterd("a&#x3042;b&#x62;");
};
subtest with_Arabic => sub {
is_no_filterd("&#x600;");
is_filterd("&#x600;\t");
is_no_filterd("&#x750;");
is_filterd("&#x750&nbsp");
is_no_filterd("&#x8a0");
is_filterd("&#x8a0 ");
is_no_filterd("&#xfb50;");
is_filterd("&#xfb50;\n");
is_no_filterd("&#x7e70;");
is_filterd("&#x7e70;\r");
is_no_filterd("&#x10e60;");
is_filterd("&#x10e60;\x{07}");
is_no_filterd("&#x1ee00;");
is_filterd("&#x1ee00; ");
is_no_filterd("&#no_x200f;");
is_filterd("&#x200f; ");
is_no_filterd("&#x202b;");
is_filterd("&#x202b; ");
is_no_filterd("&#x202e;");
is_filterd("&#x202e; ");
};
subtest with_Hebrew => sub {
is_no_filterd("&#x590;");
is_filterd("&#x590;&nbsp;");
is_no_filterd("&#xFB1D;");
is_filterd("&#xFB1D;\t");
};
subtest with_Syriac => sub {
is_filterd("&#x700; ");
};
subtest with_all_right_to_left_char => sub {
foreach my $code (0x600 .. 0x6ff,
0x750 .. 0x77f,
0x8a0 .. 0x8ff,
0xfb50 .. 0xfdff,
0x7e70 .. 0xfeff,
0x10e60 .. 0x10e7f,
0x1ee00 .. 0x1eeff,
0x590 .. 0x05FF,
0xfb1d .. 0xfb4f,
0x700 .. 0x74f){
my $str = "&#$code";
my $filterd = AvoidCoreTextBug::filter($str);
if($str ne $filterd){
is($str, $filterd, $code);
}
$filterd = AvoidCoreTextBug::filter($str . " ");
my $expected = $str . " \x{200e}";
if($expected ne $filterd){
is($str, $filterd, "$code + space");
}
}
ok(1);
};
subtest separator => sub {
is_filterd("&#x600;\x{2003}");
foreach my $code (0x20, 0xa0,
0x1680, 0x180e,
0x2000, 0x2001, 0x2002, 0x2003, 0x2004, 0x2005, 0x2006, 0x2007, 0x2008, 0x2009, 0x200a,
0x202f, 0x205f,
0x3000){
}
};
subtest complex => sub {
is_no_filterd("a&#x600;b<&lt;&#1234あいう&rlm;&#xfb50か&#x200fきく");
is(AvoidCoreTextBug::filter("a&#x600\tb<&lt;&#1234あいう&rlm; &#xfb50&nbspか&#x200f\nきく"), "a&#x600\t\x{200e}b<&lt;&#1234あいう&rlm; \x{200e}&#xfb50&nbsp\x{200e}か&#x200f\n\x{200e}きく");
};
subtest 'bug fix' => sub {
is_no_filterd("}&nbsp;");
is_filterd("\x{600}&shy;");
is_filterd("\x{600}&#173;");
is_filterd("\x{600}\x{ad}");
};
sub is_filterd {
my($original, $dont_convert_entity_reference) = @_;
my $filterd = AvoidCoreTextBug::filter($original, $dont_convert_entity_reference);
$Tester->is_eq($filterd, $original . "\x{200e}");
}
sub is_no_filterd {
my($original, $dont_convert_entity_reference) = @_;
my $filterd = AvoidCoreTextBug::filter($original, $dont_convert_entity_reference);
$Tester->is_eq($filterd, $original);
}
done_testing;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment