Skip to content

Instantly share code, notes, and snippets.

@risou
Created December 8, 2016 08:45
Show Gist options
  • Save risou/6b733d25cea426b14e0856be25a3e36c to your computer and use it in GitHub Desktop.
Save risou/6b733d25cea426b14e0856be25a3e36c to your computer and use it in GitHub Desktop.
List::Compare を使ったリストの差分抽出……なんでこんなに速いんだろう?
use strict;
use warnings;
use t::Utils;
use Test::More;
use List::MoreUtils qw/natatime/;
use List::Compare;
use Math::Random::MT;
use Time::Seconds;
use Time::HiRes qw/gettimeofday tv_interval/;
my $random = Math::Random::MT->new;
my $list1 = [];
my $list2 = [];
for (my $i = 1; $i <= 1000000; $i++) {
push @$list1, sprintf("%7d", $i);
push @$list2, sprintf("%7d", $i);
}
my @expect;
my $n = 1000000;
for (0..99999) {
my $i = int $random->rand($n--);
push @expect, splice @$list2, $i, 1;
}
diag "list1 num: " . scalar(@$list1);
diag "list2 num: " . scalar(@$list2);
diag "expect: ";
# diag explain @expect;
my $list3 = [];
for (@$list2) {
my $x = int ( $_ / 5000);
$x-- if $x == ($_ / 5000);
$list3->[$x] = [] unless $list3->[$x];
push @{$list3->[$x]}, $_;
}
subtest 'list matching' => sub {
my $start = [gettimeofday];
my @diff;
# my @diff = grep { !{ map { $_, 1 } @$list2 }->{$_} } @$list1;
# my $count = 0;
# my $it = natatime(5000, @$list1);
# while (my @parts = $it->()) {
# my $start_sub = [gettimeofday];
# my @parts_diff = grep { !{ map {$_, 1 } @{$list3->[$count]} }->{$_} } @parts;
# @diff = (@diff, @parts_diff);
# my $end_sub = [gettimeofday];
# diag "$count > ";
# diag explain @parts_diff;
# diag "sub time: " . tv_interval($start_sub, $end_sub);
# $count++;
# }
my $count = 0;
my $it = natatime(5000, @$list1);
while (my @parts = $it->()) {
my $start_sub = [gettimeofday];
my $lc = List::Compare->new(\@parts, $list3->[$count]);
my @parts_diff = $lc->get_Lonly;
@diff = (@diff, @parts_diff);
my $end_sub = [gettimeofday];
diag "$count";
# diag explain \@parts_diff;
diag "sub time: " . tv_interval($start_sub, $end_sub);
$count++;
}
my $end = [gettimeofday];
diag explain \@diff;
diag "time: " . tv_interval($start, $end);
is scalar(@diff), 100000;
is_deeply [sort(@expect)], [sort(@diff)];
};
done_testing;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment