ant build-dict with unidic
<?xml version="1.0"?> | |
<!-- | |
Licensed to the Apache Software Foundation (ASF) under one or more | |
contributor license agreements. See the NOTICE file distributed with | |
this work for additional information regarding copyright ownership. | |
The ASF licenses this file to You under the Apache License, Version 2.0 | |
the "License"); you may not use this file except in compliance with | |
the License. You may obtain a copy of the License at | |
http://www.apache.org/licenses/LICENSE-2.0 | |
Unless required by applicable law or agreed to in writing, software | |
distributed under the License is distributed on an "AS IS" BASIS, | |
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
See the License for the specific language governing permissions and | |
limitations under the License. | |
--> | |
<project name="analyzers-kuromoji" default="default" xmlns:ivy="antlib:org.apache.ivy.ant"> | |
<description> | |
Japanese Morphological Analyzer | |
</description> | |
<!-- currently whether rat detects this as binary or not | |
is platform dependent?! --> | |
<property name="rat.excludes" value="**/*.txt,**/bocchan.utf-8"/> | |
<!-- we don't want to pull in ipadic/naist etc --> | |
<property name="ivy.default.configuration" value="default"/> | |
<import file="../analysis-module-build.xml"/> | |
<!-- default configuration: uses mecab-ipadic | |
<property name="dict.type" value="ipadic"/> | |
<property name="dict.version" value="mecab-ipadic-2.7.0-20070801" /> | |
<property name="dict.is.ipadic" value="true"/> | |
--> | |
<!-- alternative configuration: uses mecab-naist-jdic | |
<property name="dict.type" value="naist"/> | |
<property name="dict.version" value="mecab-naist-jdic-0.6.3b-20111013" /> | |
<property name="dict.is.ipadic" value="true"/> | |
--> | |
<!-- alternative configuration: uses UniDic --> | |
<property name="dict.type" value="unidic"/> | |
<property name="dict.version" value="unidic-mecab-2.1.2_src"/> | |
<property name="dict.src.file" value="${dict.version}.zip" /> | |
<property name="dict.is.unidic" value="true"/> | |
<!-- for ipadic | |
<property name="dict.src.file" value="${dict.version}.tar.gz" /> | |
<property name="dict.encoding" value="euc-jp"/> | |
<property name="dict.format" value="ipadic"/> | |
--> | |
<!-- for UniDic --> | |
<property name="dict.encoding" value="utf-8"/> | |
<property name="dict.format" value="unidic"/> | |
<property name="dict.src.dir" value="${build.dir}/${dict.version}" /> | |
<property name="dict.normalize" value="false"/> | |
<property name="dict.target.dir" location="${resources.dir}"/> | |
<available type="dir" file="${build.dir}/${dict.version}" property="dict.available"/> | |
<path id="classpath"> | |
<dirset dir="${build.dir}"> | |
<include name="classes/java"/> | |
</dirset> | |
<pathelement path="${analyzers-common.jar}"/> | |
<path refid="base.classpath"/> | |
</path> | |
<target name="compile-core" depends="jar-analyzers-common, common.compile-core" /> | |
<target name="download-dict" depends="ivy-availability-check,ivy-fail,ivy-configure" unless="dict.available" if="dict.is.ipadic"> | |
<ivy:retrieve pattern="${build.dir}/${dict.src.file}" conf="${dict.type}" symlink="${ivy.symlink}"/> | |
<!-- TODO: we should checksum too --> | |
<gunzip src="${build.dir}/${dict.src.file}"/> | |
<untar src="${build.dir}/${dict.version}.tar" dest="${build.dir}"/> | |
</target> | |
<target name="download-dict-unidic" depends="ivy-availability-check,ivy-fail,ivy-configure" unless="dict.available" if="dict.is.unidic"> | |
<ivy:retrieve pattern="${build.dir}/${dict.src.file}" conf="${dict.type}" symlink="${ivy.symlink}"/> | |
<!-- TODO: we should checksum too --> | |
<unzip src="${build.dir}/${dict.src.file}" dest="${build.dir}"/> | |
</target> | |
<target name="patch-dict" depends="download-dict,download-dict-unidic" if="dict.is.ipadic"> | |
<patch patchfile="src/tools/patches/Noun.proper.csv.patch" | |
originalfile="${dict.src.dir}/Noun.proper.csv"/> | |
</target> | |
<target name="build-dict" depends="compile, patch-dict"> | |
<sequential> | |
<delete verbose="true"> | |
<fileset dir="${resources.dir}/org/apache/lucene/analysis/ja/dict" includes="**/*"/> | |
</delete> | |
<!-- TODO: optimize the dictionary construction a bit so that you don't need 1G --> | |
<java fork="true" failonerror="true" maxmemory="1g" classname="org.apache.lucene.analysis.ja.util.DictionaryBuilder"> | |
<classpath refid="classpath"/> | |
<assertions> | |
<enable package="org.apache.lucene"/> | |
</assertions> | |
<arg value="${dict.format}"/> | |
<arg value="${dict.src.dir}"/> | |
<arg value="${dict.target.dir}"/> | |
<arg value="${dict.encoding}"/> | |
<arg value="${dict.normalize}"/> | |
</java> | |
</sequential> | |
</target> | |
<target name="compile-test" depends="module-build.compile-test"/> | |
<target name="regenerate" depends="build-dict"/> | |
</project> |
$ ant clean build-dict | |
Buildfile: /Users/johtani/IdeaProjects/lucene-gosen-workspace/lucene-solr/lucene/analysis/kuromoji/build.xml | |
clean: | |
[delete] Deleting directory /Users/johtani/IdeaProjects/lucene-gosen-workspace/lucene-solr/lucene/build/analysis/kuromoji | |
check-analyzers-common-uptodate: | |
jar-analyzers-common: | |
-check-git-state: | |
-git-cleanroot: | |
-copy-git-state: | |
git-autoclean: | |
ivy-availability-check: | |
[loadresource] Do not set property disallowed.ivy.jars.list as its length is 0. | |
-ivy-fail-disallowed-ivy-version: | |
ivy-fail: | |
ivy-configure: | |
[ivy:configure] :: Apache Ivy 2.4.0 - 20141213170938 :: http://ant.apache.org/ivy/ :: | |
[ivy:configure] :: loading settings :: file = /Users/johtani/IdeaProjects/lucene-gosen-workspace/lucene-solr/lucene/top-level-ivy-settings.xml | |
resolve: | |
common.init: | |
compile-lucene-core: | |
-check-git-state: | |
-git-cleanroot: | |
-copy-git-state: | |
git-autoclean: | |
ivy-availability-check: | |
[loadresource] Do not set property disallowed.ivy.jars.list as its length is 0. | |
-ivy-fail-disallowed-ivy-version: | |
ivy-fail: | |
ivy-configure: | |
[ivy:configure] :: loading settings :: file = /Users/johtani/IdeaProjects/lucene-gosen-workspace/lucene-solr/lucene/top-level-ivy-settings.xml | |
resolve: | |
init: | |
-clover.disable: | |
-clover.load: | |
-clover.classpath: | |
-clover.setup: | |
clover: | |
compile-core: | |
init: | |
-clover.disable: | |
-clover.load: | |
-clover.classpath: | |
-clover.setup: | |
clover: | |
common.compile-core: | |
[mkdir] Created dir: /Users/johtani/IdeaProjects/lucene-gosen-workspace/lucene-solr/lucene/build/analysis/kuromoji/classes/java | |
[javac] Compiling 48 source files to /Users/johtani/IdeaProjects/lucene-gosen-workspace/lucene-solr/lucene/build/analysis/kuromoji/classes/java | |
[javac] Creating empty /Users/johtani/IdeaProjects/lucene-gosen-workspace/lucene-solr/lucene/build/analysis/kuromoji/classes/java/org/apache/lucene/analysis/ja/dict/package-info.class | |
[javac] Creating empty /Users/johtani/IdeaProjects/lucene-gosen-workspace/lucene-solr/lucene/build/analysis/kuromoji/classes/java/org/apache/lucene/analysis/ja/package-info.class | |
[javac] Creating empty /Users/johtani/IdeaProjects/lucene-gosen-workspace/lucene-solr/lucene/build/analysis/kuromoji/classes/java/org/apache/lucene/analysis/ja/util/package-info.class | |
[javac] Creating empty /Users/johtani/IdeaProjects/lucene-gosen-workspace/lucene-solr/lucene/build/analysis/kuromoji/classes/java/org/apache/lucene/analysis/ja/tokenattributes/package-info.class | |
[copy] Copying 14 files to /Users/johtani/IdeaProjects/lucene-gosen-workspace/lucene-solr/lucene/build/analysis/kuromoji/classes/java | |
compile-core: | |
compile: | |
ivy-fail: | |
download-dict: | |
download-dict-unidic: | |
[ivy:retrieve] :: resolving dependencies :: org.apache.lucene#analyzers-kuromoji;working@junnoMacBook-Pro.local | |
[ivy:retrieve] confs: [unidic] | |
[ivy:retrieve] found mecab#mecab-unidic;2.1.2 in local | |
[ivy:retrieve] :: resolution report :: resolve 24ms :: artifacts dl 3ms | |
--------------------------------------------------------------------- | |
| | modules || artifacts | | |
| conf | number| search|dwnlded|evicted|| number|dwnlded| | |
--------------------------------------------------------------------- | |
| unidic | 1 | 0 | 0 | 0 || 1 | 0 | | |
--------------------------------------------------------------------- | |
[ivy:retrieve] :: retrieving :: org.apache.lucene#analyzers-kuromoji | |
[ivy:retrieve] confs: [unidic] | |
[ivy:retrieve] 1 artifacts copied, 0 already retrieved (137224kB/35ms) | |
[unzip] Expanding: /Users/johtani/IdeaProjects/lucene-gosen-workspace/lucene-solr/lucene/build/analysis/kuromoji/unidic-mecab-2.1.2_src.zip into /Users/johtani/IdeaProjects/lucene-gosen-workspace/lucene-solr/lucene/build/analysis/kuromoji | |
patch-dict: | |
build-dict: | |
[delete] Deleting /Users/johtani/IdeaProjects/lucene-gosen-workspace/lucene-solr/lucene/analysis/kuromoji/src/resources/org/apache/lucene/analysis/ja/dict/CharacterDefinition.dat | |
[delete] Deleting /Users/johtani/IdeaProjects/lucene-gosen-workspace/lucene-solr/lucene/analysis/kuromoji/src/resources/org/apache/lucene/analysis/ja/dict/ConnectionCosts.dat | |
[delete] Deleting /Users/johtani/IdeaProjects/lucene-gosen-workspace/lucene-solr/lucene/analysis/kuromoji/src/resources/org/apache/lucene/analysis/ja/dict/TokenInfoDictionary$buffer.dat | |
[delete] Deleting /Users/johtani/IdeaProjects/lucene-gosen-workspace/lucene-solr/lucene/analysis/kuromoji/src/resources/org/apache/lucene/analysis/ja/dict/TokenInfoDictionary$fst.dat | |
[delete] Deleting /Users/johtani/IdeaProjects/lucene-gosen-workspace/lucene-solr/lucene/analysis/kuromoji/src/resources/org/apache/lucene/analysis/ja/dict/TokenInfoDictionary$posDict.dat | |
[delete] Deleting /Users/johtani/IdeaProjects/lucene-gosen-workspace/lucene-solr/lucene/analysis/kuromoji/src/resources/org/apache/lucene/analysis/ja/dict/TokenInfoDictionary$targetMap.dat | |
[delete] Deleting /Users/johtani/IdeaProjects/lucene-gosen-workspace/lucene-solr/lucene/analysis/kuromoji/src/resources/org/apache/lucene/analysis/ja/dict/UnknownDictionary$buffer.dat | |
[delete] Deleting /Users/johtani/IdeaProjects/lucene-gosen-workspace/lucene-solr/lucene/analysis/kuromoji/src/resources/org/apache/lucene/analysis/ja/dict/UnknownDictionary$posDict.dat | |
[delete] Deleting /Users/johtani/IdeaProjects/lucene-gosen-workspace/lucene-solr/lucene/analysis/kuromoji/src/resources/org/apache/lucene/analysis/ja/dict/UnknownDictionary$targetMap.dat | |
BUILD SUCCESSFUL | |
Total time: 29 seconds |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment