Skip to content

Instantly share code, notes, and snippets.

@renatocron
Created September 13, 2019 18:44
Show Gist options
  • Save renatocron/abc21af2010f31868b242a779722dd4d to your computer and use it in GitHub Desktop.
Save renatocron/abc21af2010f31868b242a779722dd4d to your computer and use it in GitHub Desktop.
sub parse {
my $self = shift;
my $text = lc shift;
my $ret = shift;
$ret->{fixed} ||= 0;
return 'overflow_error' if $ret->{fixed} > 4; # nunca deve ocorrer, mas bugs sempre estao soltos.
sub reformate_espaces {
my $text = shift;
$text =~ s/\s+/ /go; # espacos duplicadoss
$text =~ s/^\s+//o; # trim
$text =~ s/\s+$//o;
$text =~ s/\s*\(\s*/ ( /go; # trim entre os parenteses
$text =~ s/\s*\)\s*/ ) /go;
$text =~ s/(\w)\)/$1 ) /go; # untrim entre as palavras e os parenteses
$text =~ s/\((\w)/( $1/go;
return $text;
}
#fala "text = $text";
my $tag = qr/!?[a-z]+/;
my $operador = qr/[&\|]/o;
$text =~ s/\(\s+\)//go;
$text =~ s/\(($operador\s*)+\)//go;
$text = reformate_espaces($text);
# operadores repetidos
$text =~ s/($operador)(:?\s?$operador)+/$1/g;
# trocando palavras sem operadores por "e"
my $reg = qr /($tag+)\s+([^\&\|])?($tag+)/o;
while ( $text =~ /$reg/ ) {
$text =~ s/$reg/$1 & $2$3 /o;
$text = reformate_espaces($text);
}
# coloca "e" entre parentes depois
$reg = qr /\s\)\s($tag)/o;
$text =~ s/$reg/ ) & $1/o while $text =~ /$reg/;
# ou antes
$reg = qr /($tag)\s\(/o;
$text =~ s/$reg/$1 & (/o while $text =~ /$reg/;
# reformata, afinal, ninguem sabe que bagunca pode estar o texto nessas alturas!
$text = reformate_espaces($text);
$text =~ s/\s*($operador)\s*/ $1 /go;
# --------------------------------------
# tratando erros de sintax.
# operadores repetidos no final
$text =~ s/$operador+\s*$//o;
$text =~ s/^\s*$operador+//o;
$text =~ s/\(\s*$operador+/(/go;
$text =~ s/$operador+\s*\)/)/go;
my $open = () = $text =~ /\(/g;
my $close = () = $text =~ /\)/g;
if ( $open != $close || $text =~ /\(\s+\)/ ) {
$ret->{fixed}++;
$text =~ s/[\&\|\(\)!]/ | /g;
$text = $self->parse( $text, $ret );
}
$text = reformate_espaces($text);
return $text;
}
sub get_query {
my ( $self, $text ) = @_;
my $ret = {};
$text = $self->parse( $text, $ret );
return { tratado => $text, %$ret };
}
sub parse_search {
my $self = shift;
my $opt = shift;
my $keywords = {};
my $reverse_search = $self->reverse_search();
my $translate = $self->translate();
my $text = lc $converter->convert( $opt->{text} );
$text =~ s/\b(ou|or)\b/ | /go; # traduzindo "ou" por |
$text =~ s/\b(e|and|\+)\b/ & /go; # traduzindo "e" por &
$text =~ tr/\[\]/()/;
$text =~ s/\b(não|nao|no)(\s+|$)/ !/go; # adicioando NOT
$text =~ s/\bpeitos?\b/ !pussy /gi;
$text =~ s/[^a-z\&\+\|\(\)\[\]\s\!0-9]//go;
# operador nao nao = sim =D
$text =~ s/!!//g;
foreach my $eng ( sort { length $b <=> length $a } keys %$reverse_search ) {
my $find = lc $eng;
$find =~ s/s$//;
$text =~ s/${find}s?/ $reverse_search->{$eng} /gi;
}
$text =~ s/!\s+/!/go;
my @words = split /\s+/, $text;
my @out = ();
foreach my $w (@words) {
if ( $w =~ /^!?[\|&()]$/ || ( $w =~ /^!(.+)/ && exists( $translate->{$1} ) ) ) {
push @out, $w;
next;
}
if ( exists( $translate->{$w} ) ) {
push @out, $w;
}
}
$text = join ' ', @out;
my $ret = $self->get_query($text);
$ret->{tsquery} = $ret->{tratado};
$ret->{text} = $self->_translate_tsquery( $ret->{tsquery} );
return $ret;
}
sub _translate_tsquery {
my $self = shift;
my $tsquery = shift;
my $translate = $self->translate();
$tsquery =~ s/ \| / ou /go;
$tsquery =~ s/ & / e /go;
$tsquery =~ s/!/não /go;
foreach ( keys %$translate ) {
$tsquery =~ s/\b$_\b/$translate->{$_}/g;
}
$tsquery =~ s/\s+/ /go;
return $tsquery;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment