Skip to content

Instantly share code, notes, and snippets.

Created September 5, 2012 19:10
Show Gist options
  • Save anonymous/3642781 to your computer and use it in GitHub Desktop.
Save anonymous/3642781 to your computer and use it in GitHub Desktop.
KinoSearch Example
package MyApp::Web::Controller::Search;
use 5.010;
use Moose;
use namespace::autoclean;
BEGIN { extends 'Catalyst::Controller'; }
sub index : Path : Args(0) {
my ( $self, $c ) = @_;
my $hits = $c->model('Search')->search( $c->req->param('q') // '' );
$c->log->debug("Search matched @{[ $hits->{total_hits} ]} items");
$c->stash(
meta_title => 'Search',
template => 'main/search.mc',
hits => $hits,
);
}
__PACKAGE__->meta->make_immutable;
1;
#!/usr/bin/env perl
# ABSTRACT: Document indexer for search engine
use v5.10;
use strict;
use warnings;
use FindBin::libs;
use Config::JFDI;
use KSx::Simple;
use HTML::Strip;
use Path::Class;
use MyApp::Schema::CMS;
my $config = Config::JFDI->new( name => 'myapp_web' )->get;
# Get CMS schema info from config
my $schema =
MyApp::Schema::CMS->connect( $config->{'Model::CMSdb'}->{connect_info} );
my $pages = $schema->resultset('Page');
# Get indexer info from the config
my $path_to_index = dir( $config->{'Model::Search'}->{index} );
say "Index is: $path_to_index";
$path_to_index->rmtree if -e $path_to_index;
$path_to_index->mkpath;
# Create the analyzer and the Inverted Indexer
my $index = KSx::Simple->new(
path => $path_to_index,
language => 'en',
);
# Populated the Inverted Index from all pages in the CMS index
my $stripper = HTML::Strip->new(
decode_entities => 1,
emit_spaces => 1,
striptags => [qw(title style script applet sup form)] );
my $pages_rs = $pages->search( { visible => 1 } );
while ( my $page = $pages_rs->next ) {
my $title = $page->section('title')->live->html;
my $contents = $page->section('content')->live->html;
my $url = "/page/" . $page->url;
# Strip out any tags
$title = trim( $stripper->parse($title) );
$contents = trim( $stripper->parse($contents) );
# Report what we are doing
say "Add: $url";
say " => $title";
say " -> ", do {
my $x = substr( $contents, 0, 70 );
$x =~ s/\n/ /g;
$x =~ s/\s+/ /g;
$x;
}, "...";
$index->add_doc({
title => $title,
url => $url,
content => $contents,
});
}
sub trim {
my $str = shift;
$str =~ s{^\s+|\s+$}{}g;
return $str;
}
package MyApp::Web::Model::Search;
use Moose;
use namespace::autoclean;
extends 'Catalyst::Model';
use KSx::Simple;
use KinoSearch::Highlight::Highlighter;
has 'index' => ( is => 'ro', isa => 'Str', );
has '_index' => (
is => 'ro',
isa => 'KSx::Simple',
lazy => 1,
default => sub {
my $self = shift;
return KSx::Simple->new(
path => $self->index,
language => 'en',
);
} );
sub search {
my ( $self, $q, $offset, $hits_per_page ) = @_;
$offset //= 0;
$hits_per_page //= 100; # TODO: get from config
# TODO: implement paging of search results
my $index = $self->_index;
my $total_hits = $index->search(
query => $q,
offset => $offset,
num_wanted => $hits_per_page,
);
my $highlighter = KinoSearch::Highlight::Highlighter->new(
searcher => $index->{searcher}, # Non-documented
query => $q,
field => 'content',
);
my $results = {
total_hits => $total_hits,
hits => [] };
while ( my $hit = $index->next ) {
push @{ $results->{hits} },
{
excerpt => $highlighter->create_excerpt($hit),
url => $hit->{url},
score => $hit->get_score,
title => $hit->{title},
};
}
return $results;
}
__PACKAGE__->meta->make_immutable;
1;
#!/usr/bin/env perl
# ABSTRACT: Command line search test script
use v5.10;
use strict;
use warnings;
use Config::JFDI;
use KSx::Simple;
use KinoSearch::Highlight::Highlighter;
use Path::Class;
use Data::Dump;
my $config = Config::JFDI->new( name => 'myapp_web' );
my $path_to_index = $config->get->{'Model::Search'}->{index};
say "Index is: $path_to_index";
file($path_to_index)->dir->mkpath;
# Create the analyzer and the Inverted Indexer
my $index = KSx::Simple->new(
path => $path_to_index,
language => 'en',
);
my $query = 'the';
my $total_hits = $index->search(
query => $query,
offset => 0,
num_wanted => 10,
);
dd $index;
my $highlighter = KinoSearch::Highlight::Highlighter->new(
searcher => $index->{searcher},
query => $query,
field => 'content',
);
print "Total hits: $total_hits\n";
print "Total hits: " . $index->{hits}->total_hits ."\n";
while ( my $hit = $index->next ) {
my $excerpt = $highlighter->create_excerpt($hit);
print "Title: $hit->{title}\n",;
print "URL: $hit->{url}\n",;
print "Excerpt: $excerpt\n",;
}
<%args>
$hits
</%args>
<h2>Search Results</h2>
% if( $hits->{total_hits} > 0 ) {
<ul>
% for my $hit ( @{$hits->{hits} }){
<li>
<a href="<% $hit->{url} %>"> <% $hit->{title} %> </a>
(score: <% sprintf "%0.2f", 100 * $hit->{score} %>) <br/>
<% $hit->{excerpt} |n %> <br/>
%# <span class="excerptURL"><% $hit->{url} %></span>
</li>
% }
</ul>
% } else {
<p>Sorry, no results were found for that search. One of the following
pages may be a good starting point:</p>
% }
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment