Skip to content

Instantly share code, notes, and snippets.

@ceekz
Last active Dec 18, 2015
Embed
What would you like to do?
Create a search snippet.
sub snippet {
my ($text, $terms, $max, $tag_s, $tag_e) = @_;
utf8::decode($text);
if (length($text) < $max) {
return $text;
}
if (! ref($terms)) {
utf8::decode($terms);
$terms = [ grep { $_ ne '' } split(/[\x00-\x2F\x3A-\x40\x5B-\x60\x7B-\x7F]+/, $terms) ];
}
my @idxes;
foreach my $term (@{$terms}) {
my $offset = -1;
while (1) {
$offset = index(lc $text, lc $term, $offset + 1);
if ($offset >= 0) {
push @idxes, $offset;
} else {
last;
}
}
}
my $idx_begin = 0;
if (scalar(@idxes) > 0) {
@idxes = sort { $a <=> $b } @idxes;
my @idx_pairs;
for (my $i = 0; $i < scalar(@idxes) - 1; $i++) {
for (my $j = scalar(@idxes) -1; $j > $i; $j--) {
if ($idxes[$j] - $idxes[$i] < $max) {
push @idx_pairs, [ $idxes[$i], $idxes[$j], $j - $i ];
last;
}
}
}
if (scalar(@idx_pairs) > 0) {
@idx_pairs = sort { $b->[2] <=> $a->[2] || $a->[0] <=> $b->[0] } @idx_pairs;
$idx_begin = (int(($idx_pairs[0]->[0] + $idx_pairs[0]->[1]) / 2 - $max / 2) > 0) ? int(($idx_pairs[0]->[0] + $idx_pairs[0]->[1]) / 2 - $max / 2) : 0;
} else {
$idx_begin = (int($idxes[0] - $max / 2) > 0) ? int($idxes[0] - $max / 2) : 0;
}
}
if ($idx_begin + $max > length($text)) {
$idx_begin = length($text) - $max;
}
my $snippet = substr($text, $idx_begin, $max);
if ($idx_begin > 0) {
$snippet = '... ' . $snippet;
}
if ($idx_begin + $max < length($text)) {
$snippet = $snippet . ' ...';
}
if (defined $tag_s && defined $tag_e && scalar(@{$terms}) > 0) {
my $re = join('|', @{$terms});
$snippet =~ s/($re)/$tag_s$1$tag_e/ig;
}
return $snippet;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment