Skip to content

Instantly share code, notes, and snippets.

@panchicore
Last active February 6, 2023 18:49
Show Gist options
  • Save panchicore/d20bed8de919ce772778b06bdb339477 to your computer and use it in GitHub Desktop.
Save panchicore/d20bed8de919ce772778b06bdb339477 to your computer and use it in GitHub Desktop.
https://www.postgresql.org/docs/current/textsearch-controls.html - PostgreSQL - 12.3. Controlling Text Search
create index if not exists idx_text_fts
on public.metadata_texts using gin (to_tsvector('english'::regconfig, text));
SELECT mt.id,
ts_headline('english', mt.text, query, 'MaxWords=10,MinWords=2,MaxFragments=2') as headlines,
ts_rank(to_tsvector(mt.text), query) AS rank
FROM metadata_texts as mt, websearch_to_tsquery('english', 'generative') query
WHERE to_tsvector(mt.text) @@ query
ORDER BY rank DESC
LIMIT 10;
[
{
"id": 7,
"headlines": "recent development in <b>generative</b> AI e.g. chatGPT, the public ... opportunity with this new <b>generative</b> AI, some of the risks",
"rank": 0.08654518
}
]
/**
* A set of functions called "actions" for `search`
*/
function getKeywords(str){
const regex = /\<b>(.+?)\<\/b>/gm;
let m;
let keywords = [];
while ((m = regex.exec(str)) !== null) {
if (m.index === regex.lastIndex) {
regex.lastIndex++;
}
m.forEach((match, groupIndex) => {
if(groupIndex > 0 && !keywords.includes(match)){
keywords.push(match)
}
});
}
return keywords;
}
export default {
search: async (ctx, next) => {
const {query} = ctx.request.query
try {
const knex = strapi.db.connection;
const matches = await knex.select(
[
'id',
knex.raw(`ts_headline('english', text, query, 'MaxWords=10,MinWords=2,MaxFragments=2') as headlines`),
knex.raw(`ts_rank(to_tsvector(text), query) AS rank`)
]
).fromRaw(
`metadata_texts, websearch_to_tsquery('english', '${query}') query`
).whereRaw(
'to_tsvector(text) @@ query'
).orderBy(
'rank', 'desc'
).limit(
10
)
let results = []
for (let i = 0; i < matches.length; i++) {
results.push(
{data: matches[i], keywords: getKeywords(matches[i].headlines)}
)
}
ctx.body = results;
} catch (err) {
console.log(err)
ctx.body = err;
}
}
};
[
{
"data": {
"id": 7,
"headlines": "recent development in <b>generative</b> AI e.g. chatGPT, the public ... opportunity with this new <b>generative</b> AI, some of the risks",
"rank": 0.08654518
},
"keywords": [
"generative"
]
}
]
'use strict'
module.exports = {
async up(knex) {
// You have full access to the Knex.js API with an already initialized connection to the database
console.log("migrating....", __filename)
const res = await knex.schema.withSchema('public').raw(
`create index if not exists idx_text_fts on public.metadata_texts using gin (to_tsvector('english'::regconfig, text));`
)
console.log(res)
},
};
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment