Created
December 20, 2012 08:49
-
-
Save fumi/4343891 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
diff -ru dbpedia-spotlight/bin/download.sh dbpedia-spotlight.ja/bin/download.sh | |
--- dbpedia-spotlight/bin/download.sh 2012-12-19 16:08:37.631162096 +0900 | |
+++ dbpedia-spotlight.ja/bin/download.sh 2012-12-20 15:23:29.734663244 +0900 | |
@@ -6,9 +6,9 @@ | |
PROGNAME=$(basename $0) | |
#Config parameters (adjust according your target language and folder) | |
-export lang_i18n=pt | |
-export language=portuguese | |
-export dbpedia_workspace=/var/local/spotlight | |
+export lang_i18n=ja | |
+export language=japanese | |
+export dbpedia_workspace=/home/fumi/spotlight | |
export dbpedia_version=3.8 | |
# error_exit function by William Shotts. http://stackoverflow.com/questions/64786/error-handling-in-bash | |
diff -ru dbpedia-spotlight/bin/index.sh dbpedia-spotlight.ja/bin/index.sh | |
--- dbpedia-spotlight/bin/index.sh 2012-12-19 16:08:37.631162096 +0900 | |
+++ dbpedia-spotlight.ja/bin/index.sh 2012-12-20 15:34:52.438701421 +0900 | |
@@ -5,7 +5,8 @@ | |
# | |
# @author maxjakob, pablomendes | |
-export DBPEDIA_WORKSPACE=/usr/local/spotlight/dbpedia_data | |
+#export DBPEDIA_WORKSPACE=/usr/local/spotlight/dbpedia_data | |
+export DBPEDIA_WORKSPACE=/home/fumi/spotlight/dbpedia_data | |
export INDEX_CONFIG_FILE=../conf/indexing.properties | |
diff -ru dbpedia-spotlight/conf/indexing.properties dbpedia-spotlight.ja/conf/indexing.properties | |
--- dbpedia-spotlight/conf/indexing.properties 2012-12-19 16:08:37.631162096 +0900 | |
+++ dbpedia-spotlight.ja/conf/indexing.properties 2012-12-20 17:08:05.731014448 +0900 | |
@@ -1,45 +1,47 @@ | |
# Wikipedia Dump | |
# -------------- | |
-org.dbpedia.spotlight.data.wikipediaDump = /usr/local/spotlight/dbpedia_data/original/wikipedia/en/enwiki-latest-pages-articles.xml.bz2 | |
+org.dbpedia.spotlight.data.wikipediaDump = /home/fumi/spotlight/dbpedia_data/original/wikipedia/ja/jawiki-latest-pages-articles.xml.bz2 | |
# Location for DBpedia resources index (output | |
-org.dbpedia.spotlight.index.dir =/usr/local/spotlight/dbpedia_data/data/output/index | |
+org.dbpedia.spotlight.index.dir =/home/fumi/spotlight/dbpedia_data/data/output/index | |
org.dbpedia.spotlight.index.minDocsBeforeFlush = 40000 | |
# DBpedia Datasets | |
# ---------------- | |
-org.dbpedia.spotlight.data.labels =/usr/local/spotlight/dbpedia_data/original/dbpedia/en/labels_en.nt.bz2 | |
-org.dbpedia.spotlight.data.redirects = /usr/local/spotlight/dbpedia_data/original/dbpedia/en/redirects_en.nt.bz2 | |
-org.dbpedia.spotlight.data.disambiguations = /usr/local/spotlight/dbpedia_data/original/dbpedia/en/disambiguations_en.nt.bz2 | |
-org.dbpedia.spotlight.data.instanceTypes = //usr/local/spotlight/dbpedia_data/original/dbpedia/en/instance_types_en.nt.bz2 | |
+org.dbpedia.spotlight.data.labels =/home/fumi/spotlight/dbpedia_data/original/dbpedia/ja/labels_ja.nt.bz2 | |
+org.dbpedia.spotlight.data.redirects = /home/fumi/spotlight/dbpedia_data/original/dbpedia/ja/redirects_ja.nt.bz2 | |
+org.dbpedia.spotlight.data.disambiguations = /home/fumi/spotlight/dbpedia_data/original/dbpedia/ja/disambiguations_ja.nt.bz2 | |
+org.dbpedia.spotlight.data.instanceTypes = //home/fumi/spotlight/dbpedia_data/original/dbpedia/ja/instance_types_ja.nt.bz2 | |
# Files created from DBpedia Datasets | |
# ----------------------- | |
-org.dbpedia.spotlight.data.conceptURIs = /usr/local/spotlight/dbpedia_data/data/output/conceptURIs.list | |
-org.dbpedia.spotlight.data.redirectsTC = /usr/local/spotlight/dbpedia_data/data/output/redirects_tc.tsv | |
-org.dbpedia.spotlight.data.surfaceForms = /usr/local/spotlight/dbpedia_data/data/output/surfaceForms.tsv | |
+org.dbpedia.spotlight.data.conceptURIs = /home/fumi/spotlight/dbpedia_data/data/output/conceptURIs.list | |
+org.dbpedia.spotlight.data.redirectsTC = /home/fumi/spotlight/dbpedia_data/data/output/redirects_tc.tsv | |
+org.dbpedia.spotlight.data.surfaceForms = /home/fumi/spotlight/dbpedia_data/data/output/surfaceForms.tsv | |
# Language-specific config | |
# -------------- | |
-org.dbpedia.spotlight.language = English | |
-org.dbpedia.spotlight.language_i18n_code = en | |
-org.dbpedia.spotlight.lucene.analyzer = org.apache.lucene.analysis.en.EnglishAnalyzer | |
+org.dbpedia.spotlight.language = Japanese | |
+org.dbpedia.spotlight.language_i18n_code = ja | |
+org.dbpedia.spotlight.lucene.analyzer = org.apache.lucene.analysis.ja.JapaneseAnalyzer | |
org.dbpedia.spotlight.lucene.version = LUCENE_36 | |
# Internationalization (i18n) support -- work in progress | |
-org.dbpedia.spotlight.default_namespace = http://dbpedia.org/resource/ | |
+org.dbpedia.spotlight.default_namespace = http://ja.dbpedia.org/resource/ | |
org.dbpedia.spotlight.default_ontology= http://dbpedia.org/ontology/ | |
# Stop word list | |
-org.dbpedia.spotlight.data.stopWords.english = /data/spotlight/3.6/en/stopwords.en.list | |
-org.dbpedia.spotlight.data.stopWords.spanish = /data/spotlight/3.6/es/stopwords.es.list | |
-org.dbpedia.spotlight.data.stopWords.portuguese = /usr/local/spotlight/dbpedia_data/data/stopwords.pt.list | |
+#org.dbpedia.spotlight.data.stopWords.english = /home/fumi/spotlight/dbpedia_data/data/stopwords.en.list | |
+org.dbpedia.spotlight.data.stopWords.japanese = /home/fumi/spotlight/dbpedia_data/data/stopwords.ja.list | |
+#org.dbpedia.spotlight.data.stopWords.spanish = /data/spotlight/3.6/es/stopwords.es.list | |
+#org.dbpedia.spotlight.data.stopWords.portuguese = /home/fumi/spotlight/dbpedia_data/data/stopwords.pt.list | |
# URI patterns that should not be indexed. e.g. List_of_* | |
-org.dbpedia.spotlight.data.badURIs.english = /data/spotlight/blacklistedURIPatterns.en.list | |
-org.dbpedia.spotlight.data.badURIs.portuguese=/usr/local/spotlight/dbpedia_data/data/blacklistedURIPatterns.pt.list | |
-org.dbpedia.spotlight.data.badURI.spanish=/usr/local/spotlight/dbpedia_data/data/blacklistedURIPatterns.es.list | |
+#org.dbpedia.spotlight.data.badURIs.english = /data/spotlight/blacklistedURIPatterns.en.list | |
+org.dbpedia.spotlight.data.badURIs.japanese = /home/fumi/spotlight/dbpedia_data/data/blacklistedURIPatterns.ja.list | |
+#org.dbpedia.spotlight.data.badURIs.portuguese=/home/fumi/spotlight/dbpedia_data/data/blacklistedURIPatterns.pt.list | |
+#org.dbpedia.spotlight.data.badURI.spanish=/home/fumi/spotlight/dbpedia_data/data/blacklistedURIPatterns.es.list | |
# Will discard surface forms that are too long (reduces complexity of spotting and generally size in disk/memory) | |
org.dbpedia.spotlight.data.maxSurfaceFormLength = 50 | |
@@ -48,7 +50,7 @@ | |
org.dbpedia.spotlight.data.minContextWindowSize = 0 | |
# Other files | |
-org.dbpedia.spotlight.data.priors = /home/pablo/eval/grounder/gold/g1b_spotlight.words.uris.counts | |
+#org.dbpedia.spotlight.data.priors = /home/pablo/eval/grounder/gold/g1b_spotlight.words.uris.counts | |
# Yahoo! Boss properties | |
# ---------------------- | |
@@ -60,5 +62,5 @@ | |
org.dbpedia.spotlight.yahoo.maxIterations = 100 | |
## important for Yahoo! Boss query string: both language and region must be set according to | |
## http://developer.yahoo.com/search/boss/boss_guide/supp_regions_lang.html | |
-org.dbpedia.spotlight.yahoo.language = en | |
-org.dbpedia.spotlight.yahoo.region = us | |
+org.dbpedia.spotlight.yahoo.language = ja | |
+org.dbpedia.spotlight.yahoo.region = jp | |
Only in dbpedia-spotlight.ja/conf: indexing.properties.en | |
diff -ru dbpedia-spotlight/conf/server.properties dbpedia-spotlight.ja/conf/server.properties | |
--- dbpedia-spotlight/conf/server.properties 2012-12-20 15:25:20.926669430 +0900 | |
+++ dbpedia-spotlight.ja/conf/server.properties 2012-12-20 15:04:42.426599506 +0900 | |
@@ -110,7 +110,7 @@ | |
#----- LINKING / FILTERING ------- | |
# Configuration for SparqlFilter | |
-org.dbpedia.spotlight.sparql.endpoint = http://dbpedia.org/sparql | |
-org.dbpedia.spotlight.sparql.graph = http://dbpedia.org | |
-#org.dbpedia.spotlight.sparql.endpoint = http://lod.ac/species/sparql | |
-#org.dbpedia.spotlight.sparql.graph = | |
+#org.dbpedia.spotlight.sparql.endpoint = http://dbpedia.org/sparql | |
+#org.dbpedia.spotlight.sparql.graph = http://dbpedia.org | |
+org.dbpedia.spotlight.sparql.endpoint = http://lod.ac/species/sparql | |
+org.dbpedia.spotlight.sparql.graph = | |
Only in dbpedia-spotlight.ja/core/target: classes.2125373288.timestamp | |
Binary files dbpedia-spotlight/core/target/core-0.6.jar and dbpedia-spotlight.ja/core/target/core-0.6.jar differ | |
Only in dbpedia-spotlight.ja/eval/target: classes.-411422520.timestamp | |
Binary files dbpedia-spotlight/eval/target/eval-0.6.jar and dbpedia-spotlight.ja/eval/target/eval-0.6.jar differ | |
diff -ru dbpedia-spotlight/index/pom.xml dbpedia-spotlight.ja/index/pom.xml | |
--- dbpedia-spotlight/index/pom.xml 2012-12-19 16:08:38.151162125 +0900 | |
+++ dbpedia-spotlight.ja/index/pom.xml 2012-12-20 17:30:00.831088085 +0900 | |
@@ -213,6 +213,11 @@ | |
</dependency> | |
<dependency> | |
<groupId>org.apache.lucene</groupId> | |
+ <artifactId>lucene-kuromoji</artifactId> | |
+ <version>3.6.0</version> | |
+ </dependency> | |
+ <dependency> | |
+ <groupId>org.apache.lucene</groupId> | |
<artifactId>lucene-misc</artifactId> | |
<version>3.6.0</version> | |
</dependency> | |
Only in dbpedia-spotlight.ja/index/target: classes.969761386.timestamp | |
Binary files dbpedia-spotlight/index/target/index-0.6.jar and dbpedia-spotlight.ja/index/target/index-0.6.jar differ | |
diff -ru dbpedia-spotlight/pom.xml dbpedia-spotlight.ja/pom.xml | |
--- dbpedia-spotlight/pom.xml 2012-12-19 16:08:38.347162136 +0900 | |
+++ dbpedia-spotlight.ja/pom.xml 2012-12-20 16:33:58.146899874 +0900 | |
@@ -320,6 +320,11 @@ | |
</dependency> | |
<dependency> | |
<groupId>org.apache.lucene</groupId> | |
+ <artifactId>lucene-kuromoji</artifactId> | |
+ <version>3.6.0</version> | |
+ </dependency> | |
+ <dependency> | |
+ <groupId>org.apache.lucene</groupId> | |
<artifactId>lucene-phonetic</artifactId> | |
<version>3.6.0</version> | |
</dependency> | |
Only in dbpedia-spotlight/rest: log | |
Only in dbpedia-spotlight/rest: scala:run | |
Only in dbpedia-spotlight.ja/rest/target: classes.81715656.timestamp | |
Binary files dbpedia-spotlight/rest/target/rest-0.6.jar and dbpedia-spotlight.ja/rest/target/rest-0.6.jar differ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment