lawlesst/fuseki-fulltext-config.ttl

## readme.md

      
    Raw
  

              readme.md
            
          
    ##Fuseki minimal full text search
See the Jena text query docs for full explanation.  This is a minimal sample config for setting up full text search for an existing Fuseki TDB.
###indexing
Indexing an existing loaded TB with:
java -cp fuseki-server.jar jena.textindexer --desc=./fuseki-fulltext-config.ttl

###sample query
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX afn: <http://jena.hpl.hp.com/ARQ/function#>
PREFIX text:    <http://jena.apache.org/text#> 
PREFIX hub: <http://library.brown.edu/ontology/hub>
PREFIX schema: <http://schema.org/>


SELECT ?s ?label ?viaf ?auth
{ ?s text:query (rdfs:label 'mccauley' 25) ; 
     rdfs:label ?label ;
     rdf:type   ?type ;
     hub:viaf ?viaf .
  BIND(afn:localname(?s) as ?auth)
    
}


## fuseki-fulltext-config.ttl
@prefix :        <http://localhost/jena_example/#> .
@prefix rdf:     <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix rdfs:    <http://www.w3.org/2000/01/rdf-schema#> .
@prefix tdb:     <http://jena.hpl.hp.com/2008/tdb#> .
@prefix ja:      <http://jena.hpl.hp.com/2005/11/Assembler#> .
@prefix text:    <http://jena.apache.org/text#> .
@prefix fuseki:  <http://jena.apache.org/fuseki#> .

## Example of a TDB dataset and text index
## Initialize TDB
[] ja:loadClass "com.hp.hpl.jena.tdb.TDB" .
tdb:DatasetTDB  rdfs:subClassOf  ja:RDFDataset .
tdb:GraphTDB    rdfs:subClassOf  ja:Model .

## Initialize text query
[] ja:loadClass       "org.apache.jena.query.text.TextQuery" .
# A TextDataset is a regular dataset with a text index.
text:TextDataset      rdfs:subClassOf   ja:RDFDataset .
# Lucene index
text:TextIndexLucene  rdfs:subClassOf   text:TextIndex .
# Solr index
text:TextIndexSolr    rdfs:subClassOf   text:TextIndex .

## ---------------------------------------------------------------
## This URI must be fixed - it's used to assemble the text dataset.

:text_dataset rdf:type     text:TextDataset ;
    text:dataset   <#dataset> ;
    text:index     <#indexLucene> ;
    .

# A TDB datset used for RDF storage
<#dataset> rdf:type      tdb:DatasetTDB ;
    tdb:location "authority-db" ;
    tdb:unionDefaultGraph true ; # Optional
    .

# Text index description
<#indexLucene> a text:TextIndexLucene ;
    text:directory <file:Lucene> ;
    ##text:directory "mem" ;
    text:entityMap <#entMap> ;
    .

# Mapping in the index
# URI stored in field "uri"
# rdfs:label is mapped to field "text"
<#entMap> a text:EntityMap ;
    text:entityField      "uri" ;
    text:defaultField     "text" ;
    text:map (
         [ text:field "text" ; text:predicate rdfs:label ]
         ) .

[] rdf:type fuseki:Server ;
   # Server-wide context parameters can be given here.
   # For example, to set query timeouts: on a server-wide basis:
   # Format 1: "1000" -- 1 second timeout
   # Format 2: "10000,60000" -- 10s timeout to first result, then 60s timeout to for rest of query.
   # See java doc for ARQ.queryTimeout
   # ja:context [ ja:cxtName "arq:queryTimeout" ;  ja:cxtValue "10000" ] ;

   # Load custom code (rarely needed)
   # ja:loadClass "your.code.Class" ;

   # Services available.  Only explicitly listed services are configured.
   #  If there is a service description not linked from this list, it is ignored.
   fuseki:services (
     <#service>
     #<#service_text_tdb>
   ) .

<#service>  rdf:type fuseki:Service ;
    fuseki:name              "authority" ;       # http://host:port/tdb
    fuseki:serviceQuery               "query" ;    # SPARQL query service
    fuseki:serviceQuery               "sparql" ;   # SPARQL query service
    fuseki:serviceUpdate              "update" ;   # SPARQL query service
    fuseki:serviceUpload              "upload" ;   # Non-SPARQL upload service
    fuseki:serviceReadWriteGraphStore "data" ;     # SPARQL Graph store protocol (read and write)
    #fuseki:dataset           <#dataset> ;
    fuseki:dataset                  :text_dataset ;
.
	@prefix : <http://localhost/jena_example/#> .
	@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
	@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
	@prefix tdb: <http://jena.hpl.hp.com/2008/tdb#> .
	@prefix ja: <http://jena.hpl.hp.com/2005/11/Assembler#> .
	@prefix text: <http://jena.apache.org/text#> .
	@prefix fuseki: <http://jena.apache.org/fuseki#> .

	## Example of a TDB dataset and text index
	## Initialize TDB
	[] ja:loadClass "com.hp.hpl.jena.tdb.TDB" .
	tdb:DatasetTDB rdfs:subClassOf ja:RDFDataset .
	tdb:GraphTDB rdfs:subClassOf ja:Model .

	## Initialize text query
	[] ja:loadClass "org.apache.jena.query.text.TextQuery" .
	# A TextDataset is a regular dataset with a text index.
	text:TextDataset rdfs:subClassOf ja:RDFDataset .
	# Lucene index
	text:TextIndexLucene rdfs:subClassOf text:TextIndex .
	# Solr index
	text:TextIndexSolr rdfs:subClassOf text:TextIndex .

	## ---------------------------------------------------------------
	## This URI must be fixed - it's used to assemble the text dataset.

	:text_dataset rdf:type text:TextDataset ;
	text:dataset <#dataset> ;
	text:index <#indexLucene> ;
	.

	# A TDB datset used for RDF storage
	<#dataset> rdf:type tdb:DatasetTDB ;
	tdb:location "authority-db" ;
	tdb:unionDefaultGraph true ; # Optional
	.

	# Text index description
	<#indexLucene> a text:TextIndexLucene ;
	text:directory <file:Lucene> ;
	##text:directory "mem" ;
	text:entityMap <#entMap> ;
	.

	# Mapping in the index
	# URI stored in field "uri"
	# rdfs:label is mapped to field "text"
	<#entMap> a text:EntityMap ;
	text:entityField "uri" ;
	text:defaultField "text" ;
	text:map (
	[ text:field "text" ; text:predicate rdfs:label ]
	) .

	[] rdf:type fuseki:Server ;
	# Server-wide context parameters can be given here.
	# For example, to set query timeouts: on a server-wide basis:
	# Format 1: "1000" -- 1 second timeout
	# Format 2: "10000,60000" -- 10s timeout to first result, then 60s timeout to for rest of query.
	# See java doc for ARQ.queryTimeout
	# ja:context [ ja:cxtName "arq:queryTimeout" ; ja:cxtValue "10000" ] ;

	# Load custom code (rarely needed)
	# ja:loadClass "your.code.Class" ;

	# Services available. Only explicitly listed services are configured.
	# If there is a service description not linked from this list, it is ignored.
	fuseki:services (
	<#service>
	#<#service_text_tdb>
	) .

	<#service> rdf:type fuseki:Service ;
	fuseki:name "authority" ; # http://host:port/tdb
	fuseki:serviceQuery "query" ; # SPARQL query service
	fuseki:serviceQuery "sparql" ; # SPARQL query service
	fuseki:serviceUpdate "update" ; # SPARQL query service
	fuseki:serviceUpload "upload" ; # Non-SPARQL upload service
	fuseki:serviceReadWriteGraphStore "data" ; # SPARQL Graph store protocol (read and write)
	#fuseki:dataset <#dataset> ;
	fuseki:dataset :text_dataset ;
	.