diff --git a/src/common/class.wpes-analyzer-builder.php b/src/common/class.wpes-analyzer-builder.php | |
index 0357ea4..5787479 100644 | |
--- a/src/common/class.wpes-analyzer-builder.php | |
+++ b/src/common/class.wpes-analyzer-builder.php | |
@@ -340,6 +340,29 @@ class WPES_Analyzer_Builder { | |
continue; | |
} | |
+ if ( 'de' == $lang ) { | |
+ ////From: http://gibrown.wordpress.com/2013/05/01/three-principles-for-multilingal-indexing-in-elasticsearch/#comment-857 | |
+ $settings['analyzer'][ $config['name'] ]['tokenizer'] = $config['tokenizer']; | |
+ $settings['filter'][$lang . '_stop_filter'] = array( | |
+ 'type' => 'stop', | |
+ 'stopwords' => array( $config['stopwords'] ) | |
+ ); | |
+ $settings['filter'][ $lang . '_stem_filter' ] = array( | |
+ 'type' => 'stemmer', | |
+ 'name' => $config['stemming'] | |
+ ); | |
+ $settings['char_filter'][$lang . '_char_filter'] = array( | |
+ 'type' => "mapping", | |
+ 'mappings' => array( 'ß=>ss', 'Ä=>ae', 'ä=>ae', 'Ö=>oe', 'ö=>oe', 'Ü=>ue', 'ü=>ue', 'ph=>f' ), | |
+ ); | |
+ $settings['analyzer'][ $config['name'] ]['filter'][] = 'icu_normalizer'; | |
+ $settings['analyzer'][ $config['name'] ]['filter'][] = $lang . '_stop_filter'; | |
+ $settings['analyzer'][ $config['name'] ]['filter'][] = $lang . '_stem_filter'; | |
+ $settings['analyzer'][ $config['name'] ]['filter'][] = 'icu_folding'; | |
+ $settings['analyzer'][ $config['name'] ]['char_filter'] = array( $lang . '_char_filter' ); | |
+ continue; | |
+ } | |
+ | |
///////////////////////////////////////////////// | |
//First filter is normalization | |
// normalization needs to be before stopwords so we combine UTF-8 characters (eg ê) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment