Create a search snippet.
sub snippet { | |
my ($text, $terms, $max, $tag_s, $tag_e) = @_; | |
utf8::decode($text); | |
if (length($text) < $max) { | |
return $text; | |
} | |
if (! ref($terms)) { | |
utf8::decode($terms); | |
$terms = [ grep { $_ ne '' } split(/[\x00-\x2F\x3A-\x40\x5B-\x60\x7B-\x7F]+/, $terms) ]; | |
} | |
my @idxes; | |
foreach my $term (@{$terms}) { | |
my $offset = -1; | |
while (1) { | |
$offset = index(lc $text, lc $term, $offset + 1); | |
if ($offset >= 0) { | |
push @idxes, $offset; | |
} else { | |
last; | |
} | |
} | |
} | |
my $idx_begin = 0; | |
if (scalar(@idxes) > 0) { | |
@idxes = sort { $a <=> $b } @idxes; | |
my @idx_pairs; | |
for (my $i = 0; $i < scalar(@idxes) - 1; $i++) { | |
for (my $j = scalar(@idxes) -1; $j > $i; $j--) { | |
if ($idxes[$j] - $idxes[$i] < $max) { | |
push @idx_pairs, [ $idxes[$i], $idxes[$j], $j - $i ]; | |
last; | |
} | |
} | |
} | |
if (scalar(@idx_pairs) > 0) { | |
@idx_pairs = sort { $b->[2] <=> $a->[2] || $a->[0] <=> $b->[0] } @idx_pairs; | |
$idx_begin = (int(($idx_pairs[0]->[0] + $idx_pairs[0]->[1]) / 2 - $max / 2) > 0) ? int(($idx_pairs[0]->[0] + $idx_pairs[0]->[1]) / 2 - $max / 2) : 0; | |
} else { | |
$idx_begin = (int($idxes[0] - $max / 2) > 0) ? int($idxes[0] - $max / 2) : 0; | |
} | |
} | |
if ($idx_begin + $max > length($text)) { | |
$idx_begin = length($text) - $max; | |
} | |
my $snippet = substr($text, $idx_begin, $max); | |
if ($idx_begin > 0) { | |
$snippet = '... ' . $snippet; | |
} | |
if ($idx_begin + $max < length($text)) { | |
$snippet = $snippet . ' ...'; | |
} | |
if (defined $tag_s && defined $tag_e && scalar(@{$terms}) > 0) { | |
my $re = join('|', @{$terms}); | |
$snippet =~ s/($re)/$tag_s$1$tag_e/ig; | |
} | |
return $snippet; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment