Skip to content

Instantly share code, notes, and snippets.

@cfitz
Created April 23, 2020 19:22
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save cfitz/12b46314efb51eee1f1d604977cb85de to your computer and use it in GitHub Desktop.
Save cfitz/12b46314efb51eee1f1d604977cb85de to your computer and use it in GitHub Desktop.
import synonyms from './synonyms'
// Keywords that should not be stemmed
const swedishKeywords = ['senior', 'island']
const indexConfig = {
settings: {
index: {
'mapping.total_fields.limit': 2200,
number_of_shards: 1,
number_of_replicas: 2,
},
analysis: {
normalizer: {
downcased_and_folded: {
type: 'custom',
filter: ['lowercase', 'asciifolding'],
},
downcased: {
type: 'custom',
filter: ['lowercase'],
},
alpha_numeric: {
type: 'custom',
char_filter: ['alpha_numeric_filter'],
filter: ['lowercase'],
},
},
filter: {
synonyms: {
type: 'synonym',
synonyms,
},
swedish_stop: {
type: 'stop',
stopwords: '_swedish_',
},
swedish_stemmer: {
type: 'stemmer',
language: 'swedish',
},
swedish_keywords: {
type: 'keyword_marker',
keywords: swedishKeywords,
},
'22_char_hyphenation_decompound': {
type: 'hyphenation_decompounder',
hyphenation_patterns_path: '/elasticsearch/config/analysis/se.xml',
word_list_path: '/elasticsearch/config/analysis/swedish_words.txt',
max_subword_size: 22,
min_subword_size: 4,
},
},
char_filter: {
alpha_numeric_filter: {
type: 'pattern_replace',
pattern: '[^A-ZÅÄÖa-zåäö0-9 ]',
replacement: '',
},
},
analyzer: {
custom_swedish: {
tokenizer: 'standard',
filter: [
'lowercase',
// 'swedish_stop',
'swedish_keywords',
'swedish_stemmer',
'synonyms',
],
},
swedish_decompounder: {
tokenizer: 'standard',
filter: [
'lowercase',
// 'swedish_stop',
'swedish_keywords',
'swedish_stemmer',
'synonyms',
'22_char_hyphenation_decompound',
],
},
},
},
},
mappings: {
_doc: {
dynamic_templates: [
{
bag_of_dates: {
match: '*_time',
mapping: { type: 'date' },
},
},
{
bag_of_sort_dates: {
match: '*_date',
mapping: { type: 'date' },
},
},
{
files: {
match: 'files.*',
mapping: { type: 'keyword' },
},
},
{
swedish: {
path_match: '*_sv',
mapping: {
type: 'text',
analyzer: 'swedish_decompounder',
search_analyzer: 'custom_swedish',
search_quote_analyzer: 'simple',
},
},
},
{
normalized: {
path_match: '*_normalized',
mapping: {
type: 'keyword',
normalizer: 'downcased_and_folded',
},
},
},
{
downcased: {
path_match: '*_downcased',
mapping: {
type: 'keyword',
normalizer: 'downcased',
},
},
},
{
facets: {
match_pattern: 'regex',
match: '^(language_level)|(keywords)|(subject_tree)|(categories)$',
mapping: {
type: 'keyword',
copy_to: ['{name}_normalized', '{name}_downcased', '{name}_sv'],
},
},
},
{
titles: {
match_pattern: 'regex',
match: '(.*title)|(usp)$',
mapping: {
type: 'text',
analyzer: 'swedish_decompounder',
search_analyzer: 'custom_swedish',
search_quote_analyzer: 'simple',
copy_to: ['{name}_sort', 'all_titles'],
},
},
},
{
sorts: {
path_match: '*_sort',
mapping: { type: 'keyword', normalizer: 'alpha_numeric' },
},
},
{
languages: {
match_pattern: 'regex',
match:
'^(languages)|(languages_translations)|(subtitle_languages)$',
mapping: {
type: 'keyword',
copy_to: [
'{name}_normalized',
'{name}_downcased',
'{name}_sv',
'all_languages_and_codes_normalized',
],
},
},
},
],
properties: {
description: { type: 'text', analyzer: 'swedish' },
episode_number: { type: 'integer' },
number_of_episodes: { type: 'integer' },
duration: { type: 'integer' },
product_type: { type: 'keyword' },
format: { type: 'keyword' },
typical_age_range: { type: 'keyword' },
has_teacher_resource: { type: 'boolean' },
published_at: { type: 'date' },
subject_tree_raw: {
type: 'keyword',
normalizer: 'downcased_and_folded',
},
participants: {
type: 'nested',
properties: {
firstname: {
type: 'keyword',
copy_to: ['firstname_sv', 'fullname'],
},
lastname: {
type: 'keyword',
copy_to: ['lastname_sv', 'fullname'],
},
fullname: { type: 'keyword', copy_to: 'fullname_sv' },
role: { type: 'keyword' },
profession: { type: 'keyword' },
},
},
main_genre: { type: 'keyword' },
emotional_tags: { type: 'keyword' },
modified: { type: 'date' },
subtitle_languages: { type: 'keyword' },
image: { type: 'object', enabled: false },
age_ranges: {
type: 'nested',
properties: {
from: { type: 'integer' },
to: { type: 'integer' },
},
},
accessible_platforms: {
type: 'nested',
dynamic: true,
},
platforms: { type: 'keyword' },
broadcasts: {
type: 'nested',
dynamic: true,
properties: {
platform: { type: 'keyword' },
},
},
files: {
type: 'nested',
dynamic: true,
},
pod_info: {
type: 'object',
properties: {
is_downloadable: { type: 'boolean' },
file: { type: 'keyword' },
filename: { type: 'keyword' },
format: { type: 'keyword' },
},
},
sab: { type: 'keyword' },
streaming_info: {
type: 'object',
enabled: false,
},
seo_description: {
type: 'text',
analyzer: 'swedish_decompounder',
search_analyzer: 'custom_swedish',
search_quote_analyzer: 'simple',
},
only_in_sweden: { type: 'boolean' },
parental_lock: { type: 'boolean' },
is_sign_laguage_interpreted: { type: 'boolean' },
is_audio_described: { type: 'boolean' },
easy_to_read: { type: 'boolean' },
super_series_id: { type: 'integer' },
series_id: { type: 'integer' },
program_ids: { type: 'integer' },
product_views: { type: 'integer' },
},
},
},
}
export default indexConfig
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment