Create a gist now

Instantly share code, notes, and snippets.

This is a patch to enable user dictionary when using custom Japanese (lucene-kuromoji) at Confluence. The filepath for user dictionary is <confluence.home>/config/userdict_ja.txt. This patch has compatibility with Confluence 5.4.4 and Confluence 5.5.
diff --git confluence-project/confluence-core/confluence/src/java/com/atlassian/confluence/search/lucene/ConfluenceAnalyzer.java confluence-project/confluence-core/confluence/src/java/com/atlassian/confluence/search/lucene/ConfluenceAnalyzer.java
index a4cc059..32fdd1c 100644
--- confluence-project/confluence-core/confluence/src/java/com/atlassian/confluence/search/lucene/ConfluenceAnalyzer.java
+++ confluence-project/confluence-core/confluence/src/java/com/atlassian/confluence/search/lucene/ConfluenceAnalyzer.java
@@ -12,7 +12,7 @@ import org.apache.lucene.analysis.cz.CzechAnalyzer;
import org.apache.lucene.analysis.de.GermanAnalyzer;
import org.apache.lucene.analysis.el.GreekAnalyzer;
import org.apache.lucene.analysis.fr.FrenchAnalyzer;
-import org.apache.lucene.analysis.ja.JapaneseAnalyzer;
+import com.atlassian.confluence.search.lucene.analyzers.ConfluenceJapaneseAnalyzer;
import org.apache.lucene.analysis.ru.RussianAnalyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
@@ -31,7 +31,7 @@ public class ConfluenceAnalyzer extends AnalyzerWrapper
analyzers.put(Settings.GERMAN, new GermanAnalyzer(BonnieConstants.LUCENE_VERSION));
analyzers.put(Settings.CHINESE, new StandardAnalyzer(BonnieConstants.LUCENE_VERSION));
analyzers.put(Settings.CJK, new CJKAnalyzer(BonnieConstants.LUCENE_VERSION));
- analyzers.put(Settings.CUSTOM_JAPANESE, new JapaneseAnalyzer(BonnieConstants.LUCENE_VERSION));
+ analyzers.put(Settings.CUSTOM_JAPANESE, new ConfluenceJapaneseAnalyzer(BonnieConstants.LUCENE_VERSION));
analyzers.put(Settings.FRENCH, new FrenchAnalyzer(BonnieConstants.LUCENE_VERSION));
analyzers.put(Settings.BRAZILIAN, new BrazilianAnalyzer(BonnieConstants.LUCENE_VERSION));
analyzers.put(Settings.CZECH, new CzechAnalyzer(BonnieConstants.LUCENE_VERSION));
diff --git confluence-project/confluence-core/confluence/src/java/com/atlassian/confluence/search/lucene/analyzers/ConfluenceJapaneseAnalyzer.java confluence-project/confluence-core/confluence/src/java/com/atlassian/confluence/search/lucene/analyzers/ConfluenceJapaneseAnalyzer.java
new file mode 100644
index 0000000..e89f568
--- /dev/null
+++ confluence-project/confluence-core/confluence/src/java/com/atlassian/confluence/search/lucene/analyzers/ConfluenceJapaneseAnalyzer.java
@@ -0,0 +1,35 @@
+package com.atlassian.confluence.search.lucene.analyzers;
+
+import java.io.File;
+import java.io.FileReader;
+import java.io.IOException;
+
+import com.atlassian.config.bootstrap.AtlassianBootstrapManager;
+
+import org.apache.lucene.analysis.ja.JapaneseAnalyzer;
+import org.apache.lucene.analysis.ja.JapaneseTokenizer;
+import org.apache.lucene.analysis.ja.dict.UserDictionary;
+import org.apache.lucene.util.Version;
+
+/**
+ * Analyzer for Japanese that uses morphological analysis with UserDictionary.
+ */
+public class ConfluenceJapaneseAnalyzer extends JapaneseAnalyzer
+{
+ private static UserDictionary userDict;
+
+ static {
+ try {
+ AtlassianBootstrapManager bootstrapManager = com.atlassian.config.util.BootstrapUtils.getBootstrapManager();
+ String home = bootstrapManager.getConfiguredApplicationHome();
+ String userDictPath = home + File.separator + "config" + File.separator + "userdict_ja.txt";
+ userDict = new UserDictionary(new FileReader(new File(userDictPath)));
+ } catch (IOException e) {
+ userDict = null;
+ }
+ }
+
+ public ConfluenceJapaneseAnalyzer (final Version version) {
+ super(version, userDict, JapaneseTokenizer.DEFAULT_MODE, JapaneseAnalyzer.getDefaultStopSet(), JapaneseAnalyzer.getDefaultStopTags());
+ }
+}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment