Skip to content

Instantly share code, notes, and snippets.

@jmhobbs
Created January 24, 2012 23:48
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jmhobbs/1673557 to your computer and use it in GitHub Desktop.
Save jmhobbs/1673557 to your computer and use it in GitHub Desktop.
Naive Search with JavaScript
#!/usr/bin/env python
import json
import re
def tokenize ( string ):
# Strip extra punctuation
string = re.sub( r'[^a-z0-9A-Z \'\-]', '', string.lower() )
return string.split( ' ' )
def main ():
index = {}
with open( 'data.json', 'r' ) as handle:
obj = json.loads( handle.read() )
for entry in obj['entries']:
# Break up both title and body
tokens = tokenize( entry['title'] )
tokens.extend( tokenize( entry['body'] ) )
# Make them unique by casting to set
tokens = set( tokens )
# Now add them to the index
for token in tokens:
# Make a new entry for the token if it doesn't exist
if token not in index.keys():
index[token] = []
# Add this id to the list of matches for this token
index[token].append( entry['id'] )
with open( 'index.json', 'w' ) as handle:
handle.write( json.dumps( index ) )
if __name__ == '__main__':
main()
{
"entries": [
{
"id": 1,
"title": "The lazy white cat slept.",
"body": "What a lazy cat."
},
{
"id": 2,
"title": "George, though angry, didn't make a sound.",
"body": "George is a quiet man."
},
{
"id": 3,
"title": "Anyone could see that white didn't suit her.",
"body": "Plus, it's after Labor Day."
},
{
"id": 4,
"title": "By Thor's Hammer, I will have my revenge.",
"body": "Also, by Odin's Eye"
},
{
"id": 5,
"title": "Get off the couch you lazy bum.",
"body": "Yeah, it's way better to sit at a computer desk."
}
]
}
<!doctype html>
<html>
<head>
<meta charset="utf-8">
<title>Naive Search with JavaScript</title>
</head>
<body>
<input type="text" id="search-terms" />
<ul id="search-results"></ul>
<script type="text/javascript" src="http://ajax.googleapis.com/ajax/libs/jquery/1.7.1/jquery.min.js"></script>
<script type="text/javascript" src="search.js"></script>
<script type="text/javascript">
$( function () {
$( '#search-terms' ).keyup( function () {
var results = Search.search( Search.tokenize( $(this).val() ) ),
$results = $( '#search-results' );
// Clear current results
$results.children().remove();
// Add new ones
$.each( results, function ( index, element ) {
$results.append( $( '<li/>' ).text( element ) );
} );
} );
} );
</script>
</body>
</html>
{"thor's": [4], "is": [2], "bum": [5], "didn't": [2, 3], "yeah": [5], "see": [3], "computer": [5], "at": [5], "have": [4], "couch": [5], "hammer": [4], "george": [2], "better": [5], "what": [1], "eye": [4], "her": [3], "sit": [5], "it's": [3, 5], "make": [2], "slept": [1], "labor": [3], "anyone": [3], "also": [4], "that": [3], "get": [5], "suit": [3], "odin's": [4], "white": [1, 3], "sound": [2], "lazy": [1, 5], "to": [5], "though": [2], "after": [3], "revenge": [4], "you": [5], "by": [4], "desk": [5], "day": [3], "man": [2], "a": [1, 2, 5], "off": [5], "i": [4], "could": [3], "quiet": [2], "cat": [1], "will": [4], "way": [5], "plus": [3], "the": [1, 5], "my": [4], "angry": [2]}
var Search = {
$results: null,
$terms: null,
index: null,
init: function () {
Search.$results = $( '#search-results' );
$.getJSON( 'index.json' )
.error( Search.index_load_error )
.success( Search.index_load_success );
},
index_load_error: function () {
Search.$results.append( $('<li/>').text( 'Error Loading Index' ) );
},
index_load_success: function ( data ) {
Search.index = data;
},
search: function ( terms ) {
var matches = [];
// For each term passed in, check it in the index
$.each( terms, function ( i, term ) {
if( Search.index[term] ) {
matches = matches.concat( Search.index[term] );
}
} );
// Make the array unique (cribbed: http://tinyurl.com/6mgxnyg)
var u = {}, a = [];
for( var i = 0, l = matches.length; i < l; ++i ) {
if( matches[i] in u ) continue;
a.push( matches[i] );
u[matches[i]] = 1;
}
matches = a;
// Return the list of matches
return matches;
},
tokenize: function ( string ) {
string = string.toLowerCase();
string = string.replace( /[^a-z0-9A-Z \'\-]/, '' );
return string.split( ' ' );
}
};
$(Search.init);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment