-
-
Save KANATAKA/7cf1c8f4d39f135034fcb38496b83690 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
diff --git a/gradle/generation/kuromoji.gradle b/gradle/generation/kuromoji.gradle | |
index 5754bd6..3c8aab9 100644 | |
--- a/gradle/generation/kuromoji.gradle | |
+++ b/gradle/generation/kuromoji.gradle | |
@@ -47,91 +47,67 @@ configure(project(":lucene:analysis:kuromoji")) { | |
} | |
} | |
- task compileMecab(type: Download) { | |
- description "Recompile dictionaries from Mecab data." | |
+ task compileUnidic(type: Download) { | |
+ description "Recompile dictionaries from UniDic data." | |
group "generation" | |
dependsOn deleteDictionaryData | |
dependsOn sourceSets.main.runtimeClasspath | |
- def dictionaryName = "mecab-ipadic-2.7.0-20070801" | |
- def dictionarySource = "https://jaist.dl.sourceforge.net/project/mecab/mecab-ipadic/2.7.0-20070801/${dictionaryName}.tar.gz" | |
- def dictionaryFile = file("${buildDir}/generate/${dictionaryName}.tar.gz") | |
+ def dictionaryVersion = "2.1.2" | |
+ def dictionaryName = "unidic-mecab-${dictionaryVersion}_src" | |
+ def dictionarySource = "https://clrd.ninjal.ac.jp/unidic_archive/cwj/${dictionaryVersion}/${dictionaryName}.zip" | |
+ def dictionaryFile = file("${buildDir}/generate/${dictionaryName}.zip") | |
def unpackedDir = file("${buildDir}/generate/${dictionaryName}") | |
src dictionarySource | |
dest dictionaryFile | |
- onlyIfModified true | |
+ onlyIfModified false | |
doLast { | |
// Unpack the downloaded archive. | |
delete unpackedDir | |
- ant.untar(src: dictionaryFile, dest: unpackedDir, compression: "gzip") { | |
+ ant.unzip(src: dictionaryFile, dest: unpackedDir) { | |
ant.cutdirsmapper(dirs: "1") | |
} | |
- // Apply patch via local git. | |
- project.quietExec { | |
- workingDir = unpackedDir | |
- // TODO: Uwe says: better use jgit to apply patch, this is not portable!!! | |
- // Dawid answers: (LUCENE-10215) jgit's patch was broken - would not handle binary patches | |
- // Seems like this has been fixed: https://gerrit.googlesource.com/jgit/+/10ac4499115965ff10e547a0632c89873a06cf91 | |
- executable "git" | |
- args += [ | |
- "apply", | |
- file("src/tools/patches/Noun.proper.csv.patch").absolutePath | |
- ] | |
- } | |
- | |
// Compile the dictionary | |
recompileDictionary(project, dictionaryName, { | |
args += [ | |
- "ipadic", | |
+ "unidic", | |
unpackedDir, | |
targetDir, | |
- "euc-jp", | |
+ "utf-8", | |
false | |
] | |
}) | |
} | |
} | |
- task compileNaist(type: Download) { | |
- description "Recompile dictionaries from Naist data." | |
+ task recompileUnidic() { | |
+ description "Recompile dictionaries from UniDic data." | |
group "generation" | |
- dependsOn deleteDictionaryData | |
dependsOn sourceSets.main.runtimeClasspath | |
- def dictionaryName = "mecab-naist-jdic-0.6.3b-20111013" | |
- def dictionarySource = "https://rwthaachen.dl.osdn.jp/naist-jdic/53500/${dictionaryName}.tar.gz" | |
- def dictionaryFile = file("${buildDir}/generate/${dictionaryName}.tar.gz") | |
+ def dictionaryVersion = "2.1.2" | |
+ def dictionaryName = "unidic-mecab-${dictionaryVersion}_src" | |
def unpackedDir = file("${buildDir}/generate/${dictionaryName}") | |
- src dictionarySource | |
- dest dictionaryFile | |
- onlyIfModified true | |
- | |
doLast { | |
- // Unpack the downloaded archive. | |
- delete unpackedDir | |
- ant.untar(src: dictionaryFile, dest: unpackedDir, compression: "gzip") { | |
- ant.cutdirsmapper(dirs: "1") | |
- } | |
- | |
// Compile the dictionary | |
recompileDictionary(project, dictionaryName, { | |
args += [ | |
- "ipadic", | |
+ "unidic", | |
unpackedDir, | |
targetDir, | |
- "euc-jp", | |
+ "utf-8", | |
false | |
] | |
}) | |
} | |
} | |
- regenerate.dependsOn compileMecab | |
+ regenerate.dependsOn compileUnidic | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment