Skip to content

Instantly share code, notes, and snippets.

@Meettya
Created December 30, 2011 14:47
Show Gist options
  • Save Meettya/1540175 to your computer and use it in GitHub Desktop.
Save Meettya/1540175 to your computer and use it in GitHub Desktop.
test indexing compare compile regex and others features
use IO::File;
use File::Slurp;
#use re 'debug';
#use Smart::Comments;
use English qw(-no_match_vars);
use Data::Dumper 'Dumper';
use Benchmark qw/cmpthese timethese/;
my $cnt = 30;
my $testfile = 'test.xml';
make_test_xml( $cnt, $testfile );
my $testfile2 = 'test2.xml';
make_test_xml( $cnt, $testfile2 );
my $r = timethese(
-3,
{
test => sub { my $ko = invoke( $testfile, 1, 0 ) },
test2 => sub { my $ko = invoke( $testfile2, 2, 0 ) }
}
);
cmpthese $r;
sub invoke {
my $filename = shift;
my $type = shift;
my $verbose = shift;
my $idx_file = $filename . q{.idx};
_clear_file($idx_file);
my $cnt_of_idx;
my $fh = IO::File->new( $filename, 'r' )
or die "Couldn't open $filename for read and write: $ERRNO\n";
if ( $type = 1 ) {
$cnt_of_idx = _generate_index( $fh, $idx_file );
}
else {
$cnt_of_idx = _generate_index2( $fh, $idx_file );
}
$fh->close();
print
"index with $index_func for file $filename with $cnt_of_idx row created ok\n"
if $verbose;
return 1;
}
sub _clear_file {
my $file = shift;
if ( -f $file ) {
unlink($file);
}
return 1;
}
sub _generate_index {
my ( $fh, $idx_file ) = @_;
my ( $tag_value, @index_table, $index_row, $end );
my $start_tag = '<?xml version="1.0" encoding="UTF-8"?>';
my $tag = 'Account';
my $tag_string = "\<$tag\>([A-Za-z0-9]+)\<\/$tag\>";
my $RGX_TAG = qr/$tag_string/ims;
my $RGX_START = qr/\Q$start_tag\E/ims;
my $count_xml = 1;
my $start = 1;
while ( my $line = <$fh> ) {
if ( $line =~ /$RGX_TAG/ ) {
$tag_value = $1;
}
if ( $line =~ /$RGX_START/ ) {
$end = $INPUT_LINE_NUMBER - 1;
$index_row = join ',', $start, $end, $tag_value;
if ( $count_xml > 1 ) {
push @index_table, $index_row . "\n";
}
$start = $INPUT_LINE_NUMBER;
$count_xml++;
}
}
$index_row = join ',', $start, $INPUT_LINE_NUMBER, $tag_value;
push @index_table, $index_row;
write_file( $idx_file, \@index_table );
return scalar @index_table;
}
sub _generate_index2 {
my ( $fh, $idx_file ) = @_;
my ( $tag_value, @index_table, $index_row, $end );
my $start_tag = '<?xml version="1.0" encoding="UTF-8"?>';
my $tag = 'Account';
my $RGX_START = qr/\Q$start_tag\E/ims;
my $tag_string = "<$tag>(\w+)</$tag>";
my $RGX_TAG = qr/$tag_string/ims;
my $count_xml = 1;
my $start = 1;
while ( my $line = <$fh> ) {
if ( $line =~ /\<Account\>(\w+)\<\/Account\>/ ) {
$tag_value = $1;
}
if ( $line =~ /\Q$start_tag\E/ims ) {
$end = $INPUT_LINE_NUMBER - 1;
$index_row = join ',', $start, $end, $tag_value;
if ( $count_xml > 1 ) {
push @index_table, $index_row . "\n";
}
$start = $INPUT_LINE_NUMBER;
$count_xml++;
}
}
$index_row = join ',', $start, $INPUT_LINE_NUMBER, $tag_value;
push @index_table, $index_row;
write_file( $idx_file, \@index_table );
return scalar @index_table;
}
sub make_test_xml {
my $max = shift;
my $file = shift;
my @data = ();
my $chunk;
for my $Account_id ( 1 .. $max ) {
$chunk = <<"END_XML";
<?xml version="1.0" encoding="UTF-8"?>
<RECORD>
<Account>${Account_id}2550004455</Account>
<message-size xmlns="">01</message-size>
<message-size1 xmlns="">04</message-size>
<message-size2 xmlns="">05</message-size>
<message-size3 xmlns="">05</message-size>
</RECORD>
END_XML
push @data, $chunk;
}
write_file( $file, \@data );
return 1;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment