Created
August 21, 2009 16:34
-
-
Save anonymous/172169 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Index: t/Search/Index.t | |
=================================================================== | |
--- t/Search/Index.t (revision 12178) | |
+++ t/Search/Index.t (working copy) | |
@@ -33,14 +33,14 @@ | |
title => 'title', | |
menuTitle => 'menuTitle', | |
} ); | |
-WebGUI::Test->tagsToRollback( | |
- WebGUI::VersionTag->getWorking( $session ), | |
-); | |
+#WebGUI::Test->tagsToRollback( | |
+# WebGUI::VersionTag->getWorking( $session ), | |
+#); | |
#---------------------------------------------------------------------------- | |
# Tests | |
-plan tests => 16; # Increment this number for each test you create | |
+plan tests => 15; # Increment this number for each test you create | |
use_ok( 'WebGUI::Search::Index' ); | |
@@ -123,7 +123,7 @@ | |
} ); | |
$indexer = WebGUI::Search::Index->create( $article ); | |
-ok ( my $row = $db->quickHashRef( "SELECT * FROM assetIndex WHERE assetId=?", [ $article->getId ] ), | |
+ok ( $row = $db->quickHashRef( "SELECT * FROM assetIndex WHERE assetId=?", [ $article->getId ] ), | |
"assetId exists in assetIndex" | |
); | |
cmp_deeply ( | |
@@ -149,7 +149,7 @@ | |
), | |
lineage => $article->get('lineage'), | |
}, | |
- "Index has correct information" | |
+ "Index has synopsis information in keywords" | |
); | |
@@ -161,9 +161,7 @@ | |
}); | |
$indexer = WebGUI::Search::Index->create( $article ); | |
-ok ( my $row = $db->quickHashRef( "SELECT * FROM assetIndex WHERE assetId=?", [ $article->getId ] ), | |
- "assetId exists in assetIndex" | |
-); | |
+$row = $db->quickHashRef( "SELECT * FROM assetIndex WHERE assetId=?", [ $article->getId ]); | |
cmp_deeply ( | |
$row, | |
{ | |
@@ -187,7 +185,7 @@ | |
), | |
lineage => $article->get('lineage'), | |
}, | |
- "Index has correct information" | |
+ "Index has description in keywords" | |
); | |
@@ -199,9 +197,7 @@ | |
}); | |
$indexer = WebGUI::Search::Index->create( $article ); | |
-ok ( my $row = $db->quickHashRef( "SELECT * FROM assetIndex WHERE assetId=?", [ $article->getId ] ), | |
- "assetId exists in assetIndex" | |
-); | |
+$row = $db->quickHashRef( "SELECT * FROM assetIndex WHERE assetId=?", [ $article->getId ] ); | |
cmp_deeply ( | |
$row, | |
{ | |
@@ -224,7 +220,35 @@ | |
), | |
lineage => $article->get('lineage'), | |
}, | |
- "Index has correct information" | |
+ "Index has synopsis and description in keywords" | |
); | |
+#---------------------------------------------------------------------------- | |
+# Test that HTML entities are decoded. | |
+$article->update({ | |
+ description => "schön & cañón", | |
+}); | |
+$indexer = WebGUI::Search::Index->create( $article ); | |
+ | |
+my $pretty = "schön"; | |
+my $canyon = "cañón"; | |
+ | |
+utf8::upgrade($pretty); | |
+utf8::upgrade($canyon); | |
+ | |
+$row = $db->quickHashRef( "SELECT * FROM assetIndex WHERE assetId=?", [ $article->getId ] ); | |
+my $keywords = $row->{keywords}; | |
+utf8::upgrade($keywords); | |
+cmp_deeply ( | |
+ $row, | |
+ superhashof({ | |
+ keywords => all( # keywords contains title, menuTitle, every part of the URL and every keyword | |
+ re($pretty), | |
+ re("&"), | |
+ re($canyon), | |
+ ), | |
+ }), | |
+ "Index has decoded entities" | |
+); | |
+ | |
#vim:ft=perl | |
Index: lib/WebGUI/Search/Index.pm | |
=================================================================== | |
--- lib/WebGUI/Search/Index.pm (revision 12178) | |
+++ lib/WebGUI/Search/Index.pm (working copy) | |
@@ -15,6 +15,7 @@ | |
=cut | |
use strict; | |
+use HTML::Entities; | |
=head1 NAME | |
@@ -82,7 +83,6 @@ | |
my $self = shift; | |
my $text = join(" ", @_); | |
- $text = WebGUI::HTML::filter($text, "all"); | |
$text = $self->_filterKeywords($text); | |
my ($keywords) = $self->session->db->quickArray("select keywords from assetIndex where assetId=?",[$self->getId]); | |
$self->session->db->write("update assetIndex set keywords =? where assetId=?", [$keywords.' '.$text, $self->getId]); | |
@@ -166,7 +166,8 @@ | |
=head2 _filterKeywords ( $keywords ) | |
-Perform filtering and cleaning up of the keywords before submitting them. | |
+Perform filtering and cleaning up of the keywords before submitting them. Ideographic characters are padded | |
+so that they are still searchable. HTML entities are decoded. | |
=head3 $keywords | |
@@ -179,6 +180,8 @@ | |
my $keywords = shift; | |
$keywords = WebGUI::HTML::filter($keywords, "all"); | |
+ $keywords = HTML::Entities::decode_entities($keywords); | |
+ utf8::upgrade($keywords); | |
# split into 'words'. Ideographic characters (such as Chinese) are | |
# treated as distinct words. Everything else is space delimited. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment