Skip to content

Instantly share code, notes, and snippets.

@gibrown
Created May 23, 2014 20:53
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
Star You must be signed in to star a gist
Save gibrown/b984bd41a27580f47f43 to your computer and use it in GitHub Desktop.
diff --git a/src/common/class.wpes-analyzer-builder.php b/src/common/class.wpes-analyzer-builder.php
index 0357ea4..5787479 100644
--- a/src/common/class.wpes-analyzer-builder.php
+++ b/src/common/class.wpes-analyzer-builder.php
@@ -340,6 +340,29 @@ class WPES_Analyzer_Builder {
continue;
}
+ if ( 'de' == $lang ) {
+ ////From: http://gibrown.wordpress.com/2013/05/01/three-principles-for-multilingal-indexing-in-elasticsearch/#comment-857
+ $settings['analyzer'][ $config['name'] ]['tokenizer'] = $config['tokenizer'];
+ $settings['filter'][$lang . '_stop_filter'] = array(
+ 'type' => 'stop',
+ 'stopwords' => array( $config['stopwords'] )
+ );
+ $settings['filter'][ $lang . '_stem_filter' ] = array(
+ 'type' => 'stemmer',
+ 'name' => $config['stemming']
+ );
+ $settings['char_filter'][$lang . '_char_filter'] = array(
+ 'type' => "mapping",
+ 'mappings' => array( 'ß=>ss', 'Ä=>ae', 'ä=>ae', 'Ö=>oe', 'ö=>oe', 'Ü=>ue', 'ü=>ue', 'ph=>f' ),
+ );
+ $settings['analyzer'][ $config['name'] ]['filter'][] = 'icu_normalizer';
+ $settings['analyzer'][ $config['name'] ]['filter'][] = $lang . '_stop_filter';
+ $settings['analyzer'][ $config['name'] ]['filter'][] = $lang . '_stem_filter';
+ $settings['analyzer'][ $config['name'] ]['filter'][] = 'icu_folding';
+ $settings['analyzer'][ $config['name'] ]['char_filter'] = array( $lang . '_char_filter' );
+ continue;
+ }
+
/////////////////////////////////////////////////
//First filter is normalization
// normalization needs to be before stopwords so we combine UTF-8 characters (eg ê)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment