Created
May 24, 2021 09:46
-
-
Save inscapist/6e1314a9a6549d684ea5e7a6a60adcc0 to your computer and use it in GitHub Desktop.
Local website search with elastic-lunr and ahocorasick
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* eslint-disable no-undef */ | |
import AhoCorasick from "ahocorasick"; | |
import elasticlunr from "elasticlunr"; | |
import PropTypes from "prop-types"; | |
import queryString from "query-string"; | |
import React, { useState, useEffect } from "react"; | |
import Layout from "../components/layout"; | |
import SEO from "../components/seo"; | |
const SearchContentPage = ({ location }) => { | |
const [query, setQuery] = useState(""); | |
const [results, setResults] = useState([]); | |
const [index, setIndex] = useState(undefined); | |
const extractExcerpt = (sentences, queryTerm) => { | |
const sorted = sentences.sort((a, b) => b.length - a.length); | |
const ac = new AhoCorasick([queryTerm]); | |
let match = sorted.find((sentence) => { | |
const res = ac.search(sentence); | |
if (res.length > 0) { | |
return true; | |
} | |
return false; | |
}); | |
if (match === undefined) { | |
[match] = sentences; | |
} | |
if (match.length > 200) { | |
match = `${match.substring(0, 200)}...`; | |
} | |
return match; | |
}; | |
const extractBreadcrumbs = (docStore, url) => { | |
const breadcrumbs = []; | |
let path = ""; | |
url.split("/").forEach((part) => { | |
path += `/${part}`; | |
if (path.slice(1) in docStore) { | |
breadcrumbs.push({ | |
path, | |
title: docStore[path.slice(1)].title, | |
hasUrl: true, | |
}); | |
} else { | |
breadcrumbs.push({ | |
path, | |
title: part.charAt(0).toUpperCase() + part.slice(1), | |
hasUrl: false, | |
}); | |
} | |
}); | |
return breadcrumbs; | |
}; | |
useEffect(() => { | |
fetch("/searchIndex.json") | |
.then((r) => r.json()) | |
.then((data) => { | |
setIndex(elasticlunr.Index.load(data)); | |
}); | |
}, []); | |
useEffect(() => { | |
const { term: q } = queryString.parse(location.search); | |
setQuery(q); | |
}, [location]); | |
useEffect(() => { | |
if (index === undefined) { | |
return; | |
} | |
const parsedResults = index | |
.search(query, { | |
fields: { | |
title: { boost: 2 }, | |
sentences: { boost: 1 }, | |
}, | |
bool: "OR", | |
expand: true, | |
}) | |
.map(({ ref: url }) => { | |
const { title, sentences } = index.documentStore.getDoc(url); | |
const excerpt = extractExcerpt(sentences, query); | |
const breadcrumbs = extractBreadcrumbs(index.documentStore.docs, url); | |
return { | |
url, | |
title, | |
excerpt, | |
breadcrumbs, | |
}; | |
}); | |
setResults(parsedResults); | |
}, [index, query]); | |
return ( | |
<Layout> | |
<SEO title="Search" /> | |
<div className="content"> | |
<div className="container flex-column mt-64 w-736"> | |
<div>{`${results.length} results for "${query}"`}</div> | |
{results.map(({ url, title, excerpt, breadcrumbs }) => { | |
return ( | |
<div className="mt-32" key={url}> | |
<a href={url}> | |
<div className="font-bold font-blue font-24">{title}</div> | |
</a> | |
<div className="mt-8">{excerpt}</div> | |
<div className="mt-16"> | |
{breadcrumbs | |
.map((crumb) => { | |
if (crumb.hasUrl) { | |
return ( | |
<a href={crumb.path} key={crumb.title}> | |
<span className="font-blue">{crumb.title}</span> | |
</a> | |
); | |
} | |
return ( | |
<span className="font-blue" key={crumb.title}> | |
{crumb.title} | |
</span> | |
); | |
}) | |
.reduce((acc, el) => | |
acc === null ? ( | |
el | |
) : ( | |
<> | |
{acc} | |
<span className="plr-8 font-blue"> > </span> | |
{el} | |
</> | |
) | |
)} | |
</div> | |
</div> | |
); | |
})} | |
</div> | |
</div> | |
</Layout> | |
); | |
}; | |
SearchContentPage.propTypes = { | |
location: PropTypes.shape({ | |
search: PropTypes.string.isRequired, | |
}), | |
}; | |
SearchContentPage.defaultProps = { | |
location: {}, | |
}; | |
export default SearchContentPage; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment