Skip to content

Instantly share code, notes, and snippets.

Created August 21, 2009 16:34
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save anonymous/172169 to your computer and use it in GitHub Desktop.
Save anonymous/172169 to your computer and use it in GitHub Desktop.
Index: t/Search/Index.t
===================================================================
--- t/Search/Index.t (revision 12178)
+++ t/Search/Index.t (working copy)
@@ -33,14 +33,14 @@
title => 'title',
menuTitle => 'menuTitle',
} );
-WebGUI::Test->tagsToRollback(
- WebGUI::VersionTag->getWorking( $session ),
-);
+#WebGUI::Test->tagsToRollback(
+# WebGUI::VersionTag->getWorking( $session ),
+#);
#----------------------------------------------------------------------------
# Tests
-plan tests => 16; # Increment this number for each test you create
+plan tests => 15; # Increment this number for each test you create
use_ok( 'WebGUI::Search::Index' );
@@ -123,7 +123,7 @@
} );
$indexer = WebGUI::Search::Index->create( $article );
-ok ( my $row = $db->quickHashRef( "SELECT * FROM assetIndex WHERE assetId=?", [ $article->getId ] ),
+ok ( $row = $db->quickHashRef( "SELECT * FROM assetIndex WHERE assetId=?", [ $article->getId ] ),
"assetId exists in assetIndex"
);
cmp_deeply (
@@ -149,7 +149,7 @@
),
lineage => $article->get('lineage'),
},
- "Index has correct information"
+ "Index has synopsis information in keywords"
);
@@ -161,9 +161,7 @@
});
$indexer = WebGUI::Search::Index->create( $article );
-ok ( my $row = $db->quickHashRef( "SELECT * FROM assetIndex WHERE assetId=?", [ $article->getId ] ),
- "assetId exists in assetIndex"
-);
+$row = $db->quickHashRef( "SELECT * FROM assetIndex WHERE assetId=?", [ $article->getId ]);
cmp_deeply (
$row,
{
@@ -187,7 +185,7 @@
),
lineage => $article->get('lineage'),
},
- "Index has correct information"
+ "Index has description in keywords"
);
@@ -199,9 +197,7 @@
});
$indexer = WebGUI::Search::Index->create( $article );
-ok ( my $row = $db->quickHashRef( "SELECT * FROM assetIndex WHERE assetId=?", [ $article->getId ] ),
- "assetId exists in assetIndex"
-);
+$row = $db->quickHashRef( "SELECT * FROM assetIndex WHERE assetId=?", [ $article->getId ] );
cmp_deeply (
$row,
{
@@ -224,7 +220,35 @@
),
lineage => $article->get('lineage'),
},
- "Index has correct information"
+ "Index has synopsis and description in keywords"
);
+#----------------------------------------------------------------------------
+# Test that HTML entities are decoded.
+$article->update({
+ description => "schön & cañón",
+});
+$indexer = WebGUI::Search::Index->create( $article );
+
+my $pretty = "schön";
+my $canyon = "cañón";
+
+utf8::upgrade($pretty);
+utf8::upgrade($canyon);
+
+$row = $db->quickHashRef( "SELECT * FROM assetIndex WHERE assetId=?", [ $article->getId ] );
+my $keywords = $row->{keywords};
+utf8::upgrade($keywords);
+cmp_deeply (
+ $row,
+ superhashof({
+ keywords => all( # keywords contains title, menuTitle, every part of the URL and every keyword
+ re($pretty),
+ re("&"),
+ re($canyon),
+ ),
+ }),
+ "Index has decoded entities"
+);
+
#vim:ft=perl
Index: lib/WebGUI/Search/Index.pm
===================================================================
--- lib/WebGUI/Search/Index.pm (revision 12178)
+++ lib/WebGUI/Search/Index.pm (working copy)
@@ -15,6 +15,7 @@
=cut
use strict;
+use HTML::Entities;
=head1 NAME
@@ -82,7 +83,6 @@
my $self = shift;
my $text = join(" ", @_);
- $text = WebGUI::HTML::filter($text, "all");
$text = $self->_filterKeywords($text);
my ($keywords) = $self->session->db->quickArray("select keywords from assetIndex where assetId=?",[$self->getId]);
$self->session->db->write("update assetIndex set keywords =? where assetId=?", [$keywords.' '.$text, $self->getId]);
@@ -166,7 +166,8 @@
=head2 _filterKeywords ( $keywords )
-Perform filtering and cleaning up of the keywords before submitting them.
+Perform filtering and cleaning up of the keywords before submitting them. Ideographic characters are padded
+so that they are still searchable. HTML entities are decoded.
=head3 $keywords
@@ -179,6 +180,8 @@
my $keywords = shift;
$keywords = WebGUI::HTML::filter($keywords, "all");
+ $keywords = HTML::Entities::decode_entities($keywords);
+ utf8::upgrade($keywords);
# split into 'words'. Ideographic characters (such as Chinese) are
# treated as distinct words. Everything else is space delimited.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment