Skip to content

Instantly share code, notes, and snippets.

@KANATAKA
Forked from fjnkt98/kuromoji.gradle.patch
Last active March 29, 2023 09:49
Show Gist options
  • Save KANATAKA/7cf1c8f4d39f135034fcb38496b83690 to your computer and use it in GitHub Desktop.
Save KANATAKA/7cf1c8f4d39f135034fcb38496b83690 to your computer and use it in GitHub Desktop.
diff --git a/gradle/generation/kuromoji.gradle b/gradle/generation/kuromoji.gradle
index 5754bd6..3c8aab9 100644
--- a/gradle/generation/kuromoji.gradle
+++ b/gradle/generation/kuromoji.gradle
@@ -47,91 +47,67 @@ configure(project(":lucene:analysis:kuromoji")) {
}
}
- task compileMecab(type: Download) {
- description "Recompile dictionaries from Mecab data."
+ task compileUnidic(type: Download) {
+ description "Recompile dictionaries from UniDic data."
group "generation"
dependsOn deleteDictionaryData
dependsOn sourceSets.main.runtimeClasspath
- def dictionaryName = "mecab-ipadic-2.7.0-20070801"
- def dictionarySource = "https://jaist.dl.sourceforge.net/project/mecab/mecab-ipadic/2.7.0-20070801/${dictionaryName}.tar.gz"
- def dictionaryFile = file("${buildDir}/generate/${dictionaryName}.tar.gz")
+ def dictionaryVersion = "2.1.2"
+ def dictionaryName = "unidic-mecab-${dictionaryVersion}_src"
+ def dictionarySource = "https://clrd.ninjal.ac.jp/unidic_archive/cwj/${dictionaryVersion}/${dictionaryName}.zip"
+ def dictionaryFile = file("${buildDir}/generate/${dictionaryName}.zip")
def unpackedDir = file("${buildDir}/generate/${dictionaryName}")
src dictionarySource
dest dictionaryFile
- onlyIfModified true
+ onlyIfModified false
doLast {
// Unpack the downloaded archive.
delete unpackedDir
- ant.untar(src: dictionaryFile, dest: unpackedDir, compression: "gzip") {
+ ant.unzip(src: dictionaryFile, dest: unpackedDir) {
ant.cutdirsmapper(dirs: "1")
}
- // Apply patch via local git.
- project.quietExec {
- workingDir = unpackedDir
- // TODO: Uwe says: better use jgit to apply patch, this is not portable!!!
- // Dawid answers: (LUCENE-10215) jgit's patch was broken - would not handle binary patches
- // Seems like this has been fixed: https://gerrit.googlesource.com/jgit/+/10ac4499115965ff10e547a0632c89873a06cf91
- executable "git"
- args += [
- "apply",
- file("src/tools/patches/Noun.proper.csv.patch").absolutePath
- ]
- }
-
// Compile the dictionary
recompileDictionary(project, dictionaryName, {
args += [
- "ipadic",
+ "unidic",
unpackedDir,
targetDir,
- "euc-jp",
+ "utf-8",
false
]
})
}
}
- task compileNaist(type: Download) {
- description "Recompile dictionaries from Naist data."
+ task recompileUnidic() {
+ description "Recompile dictionaries from UniDic data."
group "generation"
- dependsOn deleteDictionaryData
dependsOn sourceSets.main.runtimeClasspath
- def dictionaryName = "mecab-naist-jdic-0.6.3b-20111013"
- def dictionarySource = "https://rwthaachen.dl.osdn.jp/naist-jdic/53500/${dictionaryName}.tar.gz"
- def dictionaryFile = file("${buildDir}/generate/${dictionaryName}.tar.gz")
+ def dictionaryVersion = "2.1.2"
+ def dictionaryName = "unidic-mecab-${dictionaryVersion}_src"
def unpackedDir = file("${buildDir}/generate/${dictionaryName}")
- src dictionarySource
- dest dictionaryFile
- onlyIfModified true
-
doLast {
- // Unpack the downloaded archive.
- delete unpackedDir
- ant.untar(src: dictionaryFile, dest: unpackedDir, compression: "gzip") {
- ant.cutdirsmapper(dirs: "1")
- }
-
// Compile the dictionary
recompileDictionary(project, dictionaryName, {
args += [
- "ipadic",
+ "unidic",
unpackedDir,
targetDir,
- "euc-jp",
+ "utf-8",
false
]
})
}
}
- regenerate.dependsOn compileMecab
+ regenerate.dependsOn compileUnidic
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment