Skip to content

Instantly share code, notes, and snippets.

@yannvery
Created March 25, 2015 17:44
Show Gist options
  • Save yannvery/2f5c1b84c96fa463aebf to your computer and use it in GitHub Desktop.
Save yannvery/2f5c1b84c96fa463aebf to your computer and use it in GitHub Desktop.
Default solr field type for lemming stemming
<fieldType name="text" class="solr.TextField" positionIncrementGap="100">
<analyzer>
<!-- analyseur configuré pour le français par Gaël -->
<!-- suppression d'éventuelles balises HTML-->
<charFilter class="solr.HTMLStripCharFilterFactory"/>
<!-- découpage selon les espaces -->
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<!-- suppression de la ponctuation -->
<filter class="solr.PatternReplaceFilterFactory" pattern="^(\p{Punct}*)(.*?)(\p{Punct}*)$" replacement="$2"/>
<!-- suppression des tokens vides et des mots démesurés -->
<filter class="solr.LengthFilterFactory" min="3" max="100" />
<!-- passage en minuscules -->
<filter class="solr.LowerCaseFilterFactory"/>
<!-- suppression des élisions (l', qu',...) -->
<filter class="solr.ElisionFilterFactory" articles="elisionwords.txt"/>
<!-- découpage des mots composés -->
<filter class="solr.WordDelimiterFilterFactory" splitOnCaseChange="1" splitOnNumerics="1" stemEnglishPossessive="1" generateWordParts="1"
generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="1" preserveOriginal="1"/>
<!-- suppression des mots insignifiants -->
<filter class="solr.StopFilterFactory" ignoreCase="1" words="stopwords.txt" enablePositionIncrements="true"/>
<!-- gestion des synonymes -->
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
<!-- lemmatisation (pluriels,...) -->
<filter class="solr.SnowballPorterFilterFactory" language="French" protected="protwords.txt"/>
<!-- normalisation des accents, cédilles, e dans l'o,...-->
<filter class="solr.ASCIIFoldingFilterFactory"/>
<!-- suppression des doublons éventuels -->
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
</analyzer>
</fieldType>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment