Skip to content

Instantly share code, notes, and snippets.

@fumi
Created December 20, 2012 08:49
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save fumi/4343891 to your computer and use it in GitHub Desktop.
Save fumi/4343891 to your computer and use it in GitHub Desktop.
diff -ru dbpedia-spotlight/bin/download.sh dbpedia-spotlight.ja/bin/download.sh
--- dbpedia-spotlight/bin/download.sh 2012-12-19 16:08:37.631162096 +0900
+++ dbpedia-spotlight.ja/bin/download.sh 2012-12-20 15:23:29.734663244 +0900
@@ -6,9 +6,9 @@
PROGNAME=$(basename $0)
#Config parameters (adjust according your target language and folder)
-export lang_i18n=pt
-export language=portuguese
-export dbpedia_workspace=/var/local/spotlight
+export lang_i18n=ja
+export language=japanese
+export dbpedia_workspace=/home/fumi/spotlight
export dbpedia_version=3.8
# error_exit function by William Shotts. http://stackoverflow.com/questions/64786/error-handling-in-bash
diff -ru dbpedia-spotlight/bin/index.sh dbpedia-spotlight.ja/bin/index.sh
--- dbpedia-spotlight/bin/index.sh 2012-12-19 16:08:37.631162096 +0900
+++ dbpedia-spotlight.ja/bin/index.sh 2012-12-20 15:34:52.438701421 +0900
@@ -5,7 +5,8 @@
#
# @author maxjakob, pablomendes
-export DBPEDIA_WORKSPACE=/usr/local/spotlight/dbpedia_data
+#export DBPEDIA_WORKSPACE=/usr/local/spotlight/dbpedia_data
+export DBPEDIA_WORKSPACE=/home/fumi/spotlight/dbpedia_data
export INDEX_CONFIG_FILE=../conf/indexing.properties
diff -ru dbpedia-spotlight/conf/indexing.properties dbpedia-spotlight.ja/conf/indexing.properties
--- dbpedia-spotlight/conf/indexing.properties 2012-12-19 16:08:37.631162096 +0900
+++ dbpedia-spotlight.ja/conf/indexing.properties 2012-12-20 17:08:05.731014448 +0900
@@ -1,45 +1,47 @@
# Wikipedia Dump
# --------------
-org.dbpedia.spotlight.data.wikipediaDump = /usr/local/spotlight/dbpedia_data/original/wikipedia/en/enwiki-latest-pages-articles.xml.bz2
+org.dbpedia.spotlight.data.wikipediaDump = /home/fumi/spotlight/dbpedia_data/original/wikipedia/ja/jawiki-latest-pages-articles.xml.bz2
# Location for DBpedia resources index (output
-org.dbpedia.spotlight.index.dir =/usr/local/spotlight/dbpedia_data/data/output/index
+org.dbpedia.spotlight.index.dir =/home/fumi/spotlight/dbpedia_data/data/output/index
org.dbpedia.spotlight.index.minDocsBeforeFlush = 40000
# DBpedia Datasets
# ----------------
-org.dbpedia.spotlight.data.labels =/usr/local/spotlight/dbpedia_data/original/dbpedia/en/labels_en.nt.bz2
-org.dbpedia.spotlight.data.redirects = /usr/local/spotlight/dbpedia_data/original/dbpedia/en/redirects_en.nt.bz2
-org.dbpedia.spotlight.data.disambiguations = /usr/local/spotlight/dbpedia_data/original/dbpedia/en/disambiguations_en.nt.bz2
-org.dbpedia.spotlight.data.instanceTypes = //usr/local/spotlight/dbpedia_data/original/dbpedia/en/instance_types_en.nt.bz2
+org.dbpedia.spotlight.data.labels =/home/fumi/spotlight/dbpedia_data/original/dbpedia/ja/labels_ja.nt.bz2
+org.dbpedia.spotlight.data.redirects = /home/fumi/spotlight/dbpedia_data/original/dbpedia/ja/redirects_ja.nt.bz2
+org.dbpedia.spotlight.data.disambiguations = /home/fumi/spotlight/dbpedia_data/original/dbpedia/ja/disambiguations_ja.nt.bz2
+org.dbpedia.spotlight.data.instanceTypes = //home/fumi/spotlight/dbpedia_data/original/dbpedia/ja/instance_types_ja.nt.bz2
# Files created from DBpedia Datasets
# -----------------------
-org.dbpedia.spotlight.data.conceptURIs = /usr/local/spotlight/dbpedia_data/data/output/conceptURIs.list
-org.dbpedia.spotlight.data.redirectsTC = /usr/local/spotlight/dbpedia_data/data/output/redirects_tc.tsv
-org.dbpedia.spotlight.data.surfaceForms = /usr/local/spotlight/dbpedia_data/data/output/surfaceForms.tsv
+org.dbpedia.spotlight.data.conceptURIs = /home/fumi/spotlight/dbpedia_data/data/output/conceptURIs.list
+org.dbpedia.spotlight.data.redirectsTC = /home/fumi/spotlight/dbpedia_data/data/output/redirects_tc.tsv
+org.dbpedia.spotlight.data.surfaceForms = /home/fumi/spotlight/dbpedia_data/data/output/surfaceForms.tsv
# Language-specific config
# --------------
-org.dbpedia.spotlight.language = English
-org.dbpedia.spotlight.language_i18n_code = en
-org.dbpedia.spotlight.lucene.analyzer = org.apache.lucene.analysis.en.EnglishAnalyzer
+org.dbpedia.spotlight.language = Japanese
+org.dbpedia.spotlight.language_i18n_code = ja
+org.dbpedia.spotlight.lucene.analyzer = org.apache.lucene.analysis.ja.JapaneseAnalyzer
org.dbpedia.spotlight.lucene.version = LUCENE_36
# Internationalization (i18n) support -- work in progress
-org.dbpedia.spotlight.default_namespace = http://dbpedia.org/resource/
+org.dbpedia.spotlight.default_namespace = http://ja.dbpedia.org/resource/
org.dbpedia.spotlight.default_ontology= http://dbpedia.org/ontology/
# Stop word list
-org.dbpedia.spotlight.data.stopWords.english = /data/spotlight/3.6/en/stopwords.en.list
-org.dbpedia.spotlight.data.stopWords.spanish = /data/spotlight/3.6/es/stopwords.es.list
-org.dbpedia.spotlight.data.stopWords.portuguese = /usr/local/spotlight/dbpedia_data/data/stopwords.pt.list
+#org.dbpedia.spotlight.data.stopWords.english = /home/fumi/spotlight/dbpedia_data/data/stopwords.en.list
+org.dbpedia.spotlight.data.stopWords.japanese = /home/fumi/spotlight/dbpedia_data/data/stopwords.ja.list
+#org.dbpedia.spotlight.data.stopWords.spanish = /data/spotlight/3.6/es/stopwords.es.list
+#org.dbpedia.spotlight.data.stopWords.portuguese = /home/fumi/spotlight/dbpedia_data/data/stopwords.pt.list
# URI patterns that should not be indexed. e.g. List_of_*
-org.dbpedia.spotlight.data.badURIs.english = /data/spotlight/blacklistedURIPatterns.en.list
-org.dbpedia.spotlight.data.badURIs.portuguese=/usr/local/spotlight/dbpedia_data/data/blacklistedURIPatterns.pt.list
-org.dbpedia.spotlight.data.badURI.spanish=/usr/local/spotlight/dbpedia_data/data/blacklistedURIPatterns.es.list
+#org.dbpedia.spotlight.data.badURIs.english = /data/spotlight/blacklistedURIPatterns.en.list
+org.dbpedia.spotlight.data.badURIs.japanese = /home/fumi/spotlight/dbpedia_data/data/blacklistedURIPatterns.ja.list
+#org.dbpedia.spotlight.data.badURIs.portuguese=/home/fumi/spotlight/dbpedia_data/data/blacklistedURIPatterns.pt.list
+#org.dbpedia.spotlight.data.badURI.spanish=/home/fumi/spotlight/dbpedia_data/data/blacklistedURIPatterns.es.list
# Will discard surface forms that are too long (reduces complexity of spotting and generally size in disk/memory)
org.dbpedia.spotlight.data.maxSurfaceFormLength = 50
@@ -48,7 +50,7 @@
org.dbpedia.spotlight.data.minContextWindowSize = 0
# Other files
-org.dbpedia.spotlight.data.priors = /home/pablo/eval/grounder/gold/g1b_spotlight.words.uris.counts
+#org.dbpedia.spotlight.data.priors = /home/pablo/eval/grounder/gold/g1b_spotlight.words.uris.counts
# Yahoo! Boss properties
# ----------------------
@@ -60,5 +62,5 @@
org.dbpedia.spotlight.yahoo.maxIterations = 100
## important for Yahoo! Boss query string: both language and region must be set according to
## http://developer.yahoo.com/search/boss/boss_guide/supp_regions_lang.html
-org.dbpedia.spotlight.yahoo.language = en
-org.dbpedia.spotlight.yahoo.region = us
+org.dbpedia.spotlight.yahoo.language = ja
+org.dbpedia.spotlight.yahoo.region = jp
Only in dbpedia-spotlight.ja/conf: indexing.properties.en
diff -ru dbpedia-spotlight/conf/server.properties dbpedia-spotlight.ja/conf/server.properties
--- dbpedia-spotlight/conf/server.properties 2012-12-20 15:25:20.926669430 +0900
+++ dbpedia-spotlight.ja/conf/server.properties 2012-12-20 15:04:42.426599506 +0900
@@ -110,7 +110,7 @@
#----- LINKING / FILTERING -------
# Configuration for SparqlFilter
-org.dbpedia.spotlight.sparql.endpoint = http://dbpedia.org/sparql
-org.dbpedia.spotlight.sparql.graph = http://dbpedia.org
-#org.dbpedia.spotlight.sparql.endpoint = http://lod.ac/species/sparql
-#org.dbpedia.spotlight.sparql.graph =
+#org.dbpedia.spotlight.sparql.endpoint = http://dbpedia.org/sparql
+#org.dbpedia.spotlight.sparql.graph = http://dbpedia.org
+org.dbpedia.spotlight.sparql.endpoint = http://lod.ac/species/sparql
+org.dbpedia.spotlight.sparql.graph =
Only in dbpedia-spotlight.ja/core/target: classes.2125373288.timestamp
Binary files dbpedia-spotlight/core/target/core-0.6.jar and dbpedia-spotlight.ja/core/target/core-0.6.jar differ
Only in dbpedia-spotlight.ja/eval/target: classes.-411422520.timestamp
Binary files dbpedia-spotlight/eval/target/eval-0.6.jar and dbpedia-spotlight.ja/eval/target/eval-0.6.jar differ
diff -ru dbpedia-spotlight/index/pom.xml dbpedia-spotlight.ja/index/pom.xml
--- dbpedia-spotlight/index/pom.xml 2012-12-19 16:08:38.151162125 +0900
+++ dbpedia-spotlight.ja/index/pom.xml 2012-12-20 17:30:00.831088085 +0900
@@ -213,6 +213,11 @@
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
+ <artifactId>lucene-kuromoji</artifactId>
+ <version>3.6.0</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.lucene</groupId>
<artifactId>lucene-misc</artifactId>
<version>3.6.0</version>
</dependency>
Only in dbpedia-spotlight.ja/index/target: classes.969761386.timestamp
Binary files dbpedia-spotlight/index/target/index-0.6.jar and dbpedia-spotlight.ja/index/target/index-0.6.jar differ
diff -ru dbpedia-spotlight/pom.xml dbpedia-spotlight.ja/pom.xml
--- dbpedia-spotlight/pom.xml 2012-12-19 16:08:38.347162136 +0900
+++ dbpedia-spotlight.ja/pom.xml 2012-12-20 16:33:58.146899874 +0900
@@ -320,6 +320,11 @@
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
+ <artifactId>lucene-kuromoji</artifactId>
+ <version>3.6.0</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.lucene</groupId>
<artifactId>lucene-phonetic</artifactId>
<version>3.6.0</version>
</dependency>
Only in dbpedia-spotlight/rest: log
Only in dbpedia-spotlight/rest: scala:run
Only in dbpedia-spotlight.ja/rest/target: classes.81715656.timestamp
Binary files dbpedia-spotlight/rest/target/rest-0.6.jar and dbpedia-spotlight.ja/rest/target/rest-0.6.jar differ
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment