Skip to content

Instantly share code, notes, and snippets.

@msaari
Last active April 13, 2018 09:31
Show Gist options
  • Save msaari/fa322fd49b3f0f6a78c59d7fbc42da7b to your computer and use it in GitHub Desktop.
Save msaari/fa322fd49b3f0f6a78c59d7fbc42da7b to your computer and use it in GitHub Desktop.
Simple German stemmer for Relevanssi
<?php
/* Simple German stemmer
A simple suffix stripper that can be used to stem German texts.
*/
add_filter( 'relevanssi_stemmer', 'relevanssi_simple_german_stemmer' );
function relevanssi_simple_german_stemmer( $term ) {
$term = str_replace( 'ß', 'ss', $term );
$len = strlen( $term );
$s_endings = array( 'b', 'd', 'f', 'g', 'h', 'k', 'l', 'm', 'n', 'r', 't' );
$st_endings = array( 'b', 'd', 'f', 'g', 'h', 'k', 'l', 'm', 'n', 't' );
$end3 = substr( $term, -3, 3 );
$end2 = substr( $term, -2, 2 );
$end1 = substr( $term, -1, 1 );
if ( 'ern' === $end3 && $len > 5 ) {
$term = substr( $term, 0, -3 );
} elseif ( 'em' === $end2 && $len > 5 ) {
$term = substr( $term, 0, -2 );
} elseif ( 'er' === $end2 && $len > 5 ) {
$term = substr( $term, 0, -2 );
} elseif ( 'en' === $end2 && $len > 5 ) {
$term = substr( $term, 0, -2 );
} elseif ( 'es' === $end2 && $len > 5 ) {
$term = substr( $term, 0, -2 );
} elseif ( 'e' === $end1 ) {
$term = substr( $term, 0, -1 );
} elseif ( 's' === $end1 ) {
$second_to_last = substr( $term, -2, 1 );
if ( in_array( $second_to_last, $s_endings ) ) $term = substr( $term, 0, -1 );
}
$len = strlen( $term );
$end3 = substr( $term, -3, 3 );
$end2 = substr( $term, -2, 2 );
if ( 'est' === $end3 && $len > 5 ) {
$term = substr( $term, 0, -3 );
} elseif ( 'en' === $end2 && $len > 4 ) {
$term = substr( $term, 0, -2 );
} elseif ( 'er' === $end2 && $len > 4 ) {
$term = substr( $term, 0, -2 );
} elseif ( 'st' === $end2 ) {
$third_to_last = substr( $term, -3, 1 );
if ( in_array( $third_to_last, $st_endings ) ) $term = substr( $term, 0, -2 );
}
$len = strlen( $term );
$end4 = substr( $term, -4, 4 );
$end3 = substr( $term, -3, 3 );
$end2 = substr( $term, -2, 2 );
if ( 'isch' === $end4 && $len > 6 ) {
$term = substr( $term, 0, -4 );
} elseif ( 'lich' === $end4 && $len > 6 ) {
$term = substr( $term, 0, -4 );
} elseif ( 'heit' === $end4 && $len > 6 ) {
$term = substr( $term, 0, -4 );
} elseif ( 'keit' === $end4 && $len > 6 ) {
$term = substr( $term, 0, -4 );
} elseif ( 'end' === $end3 && $len > 5 ) {
$term = substr( $term, 0, -3 );
} elseif ( 'ung' === $end3 && $len > 5 ) {
$term = substr( $term, 0, -3 );
} elseif ( 'ik' === $end2 && $len > 4 ) {
$term = substr( $term, 0, -2 );
} elseif ( 'ig' === $end2 && $len > 4 ) {
$term = substr( $term, 0, -2 );
}
$term = str_replace( 'ä', 'a', $term );
$term = str_replace( 'ö', 'o', $term );
$term = str_replace( 'ü', 'u', $term );
return $term;
}
?>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment