Created
May 10, 2018 17:07
-
-
Save psychemedia/0e72646590d217968d34bcb313bfa1c0 to your computer and use it in GitHub Desktop.
Simple test of lunr search over lunr indexed Jupyter notebook markdown cells
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<html><head> | |
<script type="application/javascript" src="assets/js/jquery-3.3.1.js"></script> | |
<script type="application/javascript" src="assets/js/showdown.min.js"></script> | |
<!-- | |
https://markjs.io/ | |
<script type="application/javascript" src="assets/js/jquery.mark.min.js"></script> | |
--> | |
<script type="application/javascript" src="assets/js/lunr.js"></script> | |
<script src="lunr.jsonp"></script> | |
<link rel="stylesheet" href="assets/css/bootstrap.min.css" /> | |
<style> | |
ul {margin-bottom:50px;} | |
ul li{margin-bottom:50px; background-color: #f8f8f8;} | |
</style> | |
</head> | |
<body> | |
<div class='container' > | |
<div><img src='assets/images/OU_logo_unofficial.png' alt='OU logo' /></div> | |
<h1>TM351 Notebook Search</h1><div><input id='search' /></div> | |
<hr/> | |
<div><ul id='searchresults' style='list-style-type: none'></ul></div> | |
<hr/> | |
<div><em>To refresh the index, ...</em></div></div></body><script type="text/javascript"> | |
//https://matthewdaly.co.uk/blog/2015/04/18/how-i-added-search-to-my-site-with-lunr-dot-js/ | |
$(document).ready(function () { | |
'use strict'; | |
// Set up search | |
var index, store; | |
//I'm importing the lunr.json as JSONP to get around CORS issues | |
//$.getJSON('./lunr.json', function (response) { | |
// Create index | |
index = lunr.Index.load(response.index); | |
// Create store | |
store = response.store; | |
// Handle search | |
$('input#search').on('keyup', function () { | |
// Get query | |
var query = $(this).val(); | |
// Search for it | |
var result = index.search(query); | |
// Output it | |
var resultdiv = $('ul#searchresults'); | |
// Keep track of search terms in result | |
var terms = new Set(); | |
if (result.length === 0) { | |
// Hide results | |
resultdiv.hide(); | |
} else { | |
// Show results | |
resultdiv.empty(); | |
for (var item in result) { | |
var ref = result[item].ref; | |
var converter = new showdown.Converter(); | |
var html = converter.makeHtml(store[ref].cell); | |
var searchitem = '<li>'+html+'<br/>Link: <a href="' + store[ref].title+ '">' + store[ref].title + '</a></li>'; | |
//alert(JSON.stringify(result),null,4) | |
// Keep track of search terms in result | |
//result.forEach(function (item) { | |
// Object.keys(item.matchData.metadata).forEach(function (term) { | |
// terms.add(term) | |
// }) | |
//}) | |
resultdiv.append(searchitem); | |
} | |
//Highlight search terms - was working, now broken? | |
//resultdiv.mark(query); | |
resultdiv.show(); | |
} | |
}); | |
//}); | |
}); | |
</script> | |
</html> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import nbformat | |
from lunr import lunr | |
import json | |
def nbpathwalk(path): | |
''' Walk down a directory path looking for ipynb notebook files... ''' | |
for path, _, files in os.walk(path): | |
if '.ipynb_checkpoints' in path: continue | |
for f in [i for i in files if i.endswith('.ipynb')]: | |
yield os.path.join(path, f) | |
def get_md(nb_fn, c_md=None): | |
''' Extract the content of markdown ''' | |
if c_md is None: c_md = [] | |
nb=nbformat.read(nb_fn,nbformat.NO_CONVERT) | |
_c_md=[i for i in nb.cells if i['cell_type']=='markdown'] | |
ix=len(c_md) | |
for c in _c_md: | |
c.update( {"ix":str(ix)}) | |
c.update( {"title":nb_fn}) | |
ix = ix+1 | |
c_md = c_md + _c_md | |
return c_md | |
def index_notebooks(nbpath='.', outfile='lunr.json', jsonp=None): | |
''' Get content from each notebook down a path and index it ''' | |
c_md=[] | |
for fn in nbpathwalk(nbpath): | |
c_md = get_md(fn,c_md) | |
idx = lunr(ref='ix', fields=('title','source'), documents=c_md) | |
#Create a lookup for each md cell | |
store = {} | |
for c in c_md: | |
store[c['ix']]={'title':c['title'],'cell':c['source']} | |
out ={'index':idx.serialize(),'store':store} | |
with open(outfile, 'w') as f: | |
#Provide ability to write JSON or JSONP output file | |
if jsonp is None and not outfile.endswith('.jsonp'): | |
json.dump(out, f) | |
else: | |
if jsonp is None: | |
jsonp="var response = " | |
else: | |
jsonp="var {} = ".format(jsonp) | |
f.write('{}{}'.format(jsonp,json.dumps(out))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment