Created
April 23, 2020 19:22
-
-
Save cfitz/12b46314efb51eee1f1d604977cb85de to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import synonyms from './synonyms' | |
// Keywords that should not be stemmed | |
const swedishKeywords = ['senior', 'island'] | |
const indexConfig = { | |
settings: { | |
index: { | |
'mapping.total_fields.limit': 2200, | |
number_of_shards: 1, | |
number_of_replicas: 2, | |
}, | |
analysis: { | |
normalizer: { | |
downcased_and_folded: { | |
type: 'custom', | |
filter: ['lowercase', 'asciifolding'], | |
}, | |
downcased: { | |
type: 'custom', | |
filter: ['lowercase'], | |
}, | |
alpha_numeric: { | |
type: 'custom', | |
char_filter: ['alpha_numeric_filter'], | |
filter: ['lowercase'], | |
}, | |
}, | |
filter: { | |
synonyms: { | |
type: 'synonym', | |
synonyms, | |
}, | |
swedish_stop: { | |
type: 'stop', | |
stopwords: '_swedish_', | |
}, | |
swedish_stemmer: { | |
type: 'stemmer', | |
language: 'swedish', | |
}, | |
swedish_keywords: { | |
type: 'keyword_marker', | |
keywords: swedishKeywords, | |
}, | |
'22_char_hyphenation_decompound': { | |
type: 'hyphenation_decompounder', | |
hyphenation_patterns_path: '/elasticsearch/config/analysis/se.xml', | |
word_list_path: '/elasticsearch/config/analysis/swedish_words.txt', | |
max_subword_size: 22, | |
min_subword_size: 4, | |
}, | |
}, | |
char_filter: { | |
alpha_numeric_filter: { | |
type: 'pattern_replace', | |
pattern: '[^A-ZÅÄÖa-zåäö0-9 ]', | |
replacement: '', | |
}, | |
}, | |
analyzer: { | |
custom_swedish: { | |
tokenizer: 'standard', | |
filter: [ | |
'lowercase', | |
// 'swedish_stop', | |
'swedish_keywords', | |
'swedish_stemmer', | |
'synonyms', | |
], | |
}, | |
swedish_decompounder: { | |
tokenizer: 'standard', | |
filter: [ | |
'lowercase', | |
// 'swedish_stop', | |
'swedish_keywords', | |
'swedish_stemmer', | |
'synonyms', | |
'22_char_hyphenation_decompound', | |
], | |
}, | |
}, | |
}, | |
}, | |
mappings: { | |
_doc: { | |
dynamic_templates: [ | |
{ | |
bag_of_dates: { | |
match: '*_time', | |
mapping: { type: 'date' }, | |
}, | |
}, | |
{ | |
bag_of_sort_dates: { | |
match: '*_date', | |
mapping: { type: 'date' }, | |
}, | |
}, | |
{ | |
files: { | |
match: 'files.*', | |
mapping: { type: 'keyword' }, | |
}, | |
}, | |
{ | |
swedish: { | |
path_match: '*_sv', | |
mapping: { | |
type: 'text', | |
analyzer: 'swedish_decompounder', | |
search_analyzer: 'custom_swedish', | |
search_quote_analyzer: 'simple', | |
}, | |
}, | |
}, | |
{ | |
normalized: { | |
path_match: '*_normalized', | |
mapping: { | |
type: 'keyword', | |
normalizer: 'downcased_and_folded', | |
}, | |
}, | |
}, | |
{ | |
downcased: { | |
path_match: '*_downcased', | |
mapping: { | |
type: 'keyword', | |
normalizer: 'downcased', | |
}, | |
}, | |
}, | |
{ | |
facets: { | |
match_pattern: 'regex', | |
match: '^(language_level)|(keywords)|(subject_tree)|(categories)$', | |
mapping: { | |
type: 'keyword', | |
copy_to: ['{name}_normalized', '{name}_downcased', '{name}_sv'], | |
}, | |
}, | |
}, | |
{ | |
titles: { | |
match_pattern: 'regex', | |
match: '(.*title)|(usp)$', | |
mapping: { | |
type: 'text', | |
analyzer: 'swedish_decompounder', | |
search_analyzer: 'custom_swedish', | |
search_quote_analyzer: 'simple', | |
copy_to: ['{name}_sort', 'all_titles'], | |
}, | |
}, | |
}, | |
{ | |
sorts: { | |
path_match: '*_sort', | |
mapping: { type: 'keyword', normalizer: 'alpha_numeric' }, | |
}, | |
}, | |
{ | |
languages: { | |
match_pattern: 'regex', | |
match: | |
'^(languages)|(languages_translations)|(subtitle_languages)$', | |
mapping: { | |
type: 'keyword', | |
copy_to: [ | |
'{name}_normalized', | |
'{name}_downcased', | |
'{name}_sv', | |
'all_languages_and_codes_normalized', | |
], | |
}, | |
}, | |
}, | |
], | |
properties: { | |
description: { type: 'text', analyzer: 'swedish' }, | |
episode_number: { type: 'integer' }, | |
number_of_episodes: { type: 'integer' }, | |
duration: { type: 'integer' }, | |
product_type: { type: 'keyword' }, | |
format: { type: 'keyword' }, | |
typical_age_range: { type: 'keyword' }, | |
has_teacher_resource: { type: 'boolean' }, | |
published_at: { type: 'date' }, | |
subject_tree_raw: { | |
type: 'keyword', | |
normalizer: 'downcased_and_folded', | |
}, | |
participants: { | |
type: 'nested', | |
properties: { | |
firstname: { | |
type: 'keyword', | |
copy_to: ['firstname_sv', 'fullname'], | |
}, | |
lastname: { | |
type: 'keyword', | |
copy_to: ['lastname_sv', 'fullname'], | |
}, | |
fullname: { type: 'keyword', copy_to: 'fullname_sv' }, | |
role: { type: 'keyword' }, | |
profession: { type: 'keyword' }, | |
}, | |
}, | |
main_genre: { type: 'keyword' }, | |
emotional_tags: { type: 'keyword' }, | |
modified: { type: 'date' }, | |
subtitle_languages: { type: 'keyword' }, | |
image: { type: 'object', enabled: false }, | |
age_ranges: { | |
type: 'nested', | |
properties: { | |
from: { type: 'integer' }, | |
to: { type: 'integer' }, | |
}, | |
}, | |
accessible_platforms: { | |
type: 'nested', | |
dynamic: true, | |
}, | |
platforms: { type: 'keyword' }, | |
broadcasts: { | |
type: 'nested', | |
dynamic: true, | |
properties: { | |
platform: { type: 'keyword' }, | |
}, | |
}, | |
files: { | |
type: 'nested', | |
dynamic: true, | |
}, | |
pod_info: { | |
type: 'object', | |
properties: { | |
is_downloadable: { type: 'boolean' }, | |
file: { type: 'keyword' }, | |
filename: { type: 'keyword' }, | |
format: { type: 'keyword' }, | |
}, | |
}, | |
sab: { type: 'keyword' }, | |
streaming_info: { | |
type: 'object', | |
enabled: false, | |
}, | |
seo_description: { | |
type: 'text', | |
analyzer: 'swedish_decompounder', | |
search_analyzer: 'custom_swedish', | |
search_quote_analyzer: 'simple', | |
}, | |
only_in_sweden: { type: 'boolean' }, | |
parental_lock: { type: 'boolean' }, | |
is_sign_laguage_interpreted: { type: 'boolean' }, | |
is_audio_described: { type: 'boolean' }, | |
easy_to_read: { type: 'boolean' }, | |
super_series_id: { type: 'integer' }, | |
series_id: { type: 'integer' }, | |
program_ids: { type: 'integer' }, | |
product_views: { type: 'integer' }, | |
}, | |
}, | |
}, | |
} | |
export default indexConfig |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment