Skip to content

Instantly share code, notes, and snippets.

@evren
Last active October 2, 2019 08:29
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save evren/7601614 to your computer and use it in GitHub Desktop.
Save evren/7601614 to your computer and use it in GitHub Desktop.
Showing examples of efficiently loading multiple files into different named graphs using Stardog
// Copyright (c) 2013 -- Clark & Parsia, LLC. <http://www.clarkparsia.com>
// For more information about licensing and copyright of this software, please contact inquiries@clarkparsia.com.
package com.complexible.stardog;
import java.io.File;
import java.io.PrintStream;
import java.nio.file.Path;
import java.util.Map;
import java.util.concurrent.TimeUnit;
import com.complexible.common.rdf.rio.RDFCompressedFormat;
import org.openrdf.model.Literal;
import org.openrdf.model.Resource;
import com.complexible.common.rdf.model.Values;
import com.complexible.stardog.api.Connection;
import com.complexible.stardog.api.admin.AdminConnection;
import com.complexible.stardog.api.admin.AdminConnectionConfiguration;
import com.google.common.base.Stopwatch;
import com.google.common.collect.Maps;
import com.google.common.io.ByteStreams;
/**
* Creates a Stardog DB with multiple files where each file is loaded into a different named graph. Named graph URI
* is compute by concatenating a given namespace URI with the name of the file. Different named graph templates can
* be used by tweaking the named graph function below.
*
* @author Evren Sirin
*/
public class CreateDBWithNamedGraphs {
public static void main(String[] args) throws Exception {
if (args.length != 3) {
System.err.println("usage: " + CreateDBWithNamedGraphs.class.getName() + " database inputDir namespace");
System.exit(0);
}
final String database = args[0];
final String inputDir = args[1];
final String namespace = args[2];
// create admin connection
AdminConnection admin = AdminConnectionConfiguration
.toServer("http://localhost:5820")
.credentials("admin", "admin")
.connect();
System.out.print("Creating...");
// lets' time this
Stopwatch w = Stopwatch.createStarted();
// create a mapping from the input files to named graphs
Map<Path, Resource> paths = Maps.newHashMap();
for (File file : new File(inputDir).listFiles()) {
// ignore non-RDF files
if (RDFCompressedFormat.forFileName(file.getName()) == null) {
continue;
}
// named graph is concatenation of namespace and file name
Resource namedGraph = Values.iri(namespace, file.getName());
paths.put(file.toPath(), namedGraph);
}
// create db with all the files in the directory and connect to the db at the end
Connection conn = admin
.disk(database)
.reporter(new PrintStream(ByteStreams.nullOutputStream()))
.create(path -> paths.get(path), paths.keySet().toArray(new Path[0]))
.connect();
w.stop();
// get the number of triples and named graphs
long size = conn.size();
long contexts = ((Literal) conn
.select("select (count(distinct ?g) as ?count) where {graph ?g {?s ?p ?o}}")
.execute()
.next()
.getValue("count"))
.longValue();
System.out.format("finished. Loaded %d triples into %d graphs in %s (%1.1fK triples/sec)%n", size, contexts, w,
(float) size / w.elapsed(TimeUnit.MILLISECONDS));
admin.close();
conn.close();
}
}
// Copyright (c) 2013 -- Clark & Parsia, LLC. <http://www.clarkparsia.com>
// For more information about licensing and copyright of this software, please contact inquiries@clarkparsia.com.
package com.complexible.stardog;
import java.io.File;
import java.util.concurrent.TimeUnit;
import com.complexible.common.rdf.rio.RDFCompressedFormat;
import org.openrdf.model.Literal;
import org.openrdf.model.Resource;
import org.openrdf.rio.RDFFormat;
import com.complexible.common.rdf.model.Values;
import com.complexible.stardog.api.Connection;
import com.complexible.stardog.api.ConnectionConfiguration;
import com.complexible.stardog.api.IO;
import com.google.common.base.Stopwatch;
/**
* Loads multiple files to a Stardog DB where each file is added into a different named graph. Named graph URI
* is computes by concatenating a given namespace URI with the name of the file. Different named graph templates can
* be used by tweaking the named graph function below.
*
* @author Evren Sirin
*/
public class LoadFilesToNamedGraphs {
public static void main(String[] args) throws Exception {
if (args.length != 3) {
System.err.println("usage: " + LoadFilesToNamedGraphs.class.getName() + " database inputDir namespace");
System.exit(0);
}
final String database = args[0];
final String inputDir = args[1];
final String namespace = args[2];
// create db with all the files in the directory and connect to the db
Connection conn = ConnectionConfiguration
.to(database)
.server("http://localhost:5820")
.credentials("admin", "admin")
.connect();
System.out.print("Loading...");
Stopwatch w = Stopwatch.createStarted();
// start tx
conn.begin();
// we'll add multiple files and files are on the server side so no need to send them through the
// connection and server can read them from the file system directly
IO io = conn.add().io().serverSide();
// add all the files
for (File file : new File(inputDir).listFiles()) {
RDFFormat format = RDFCompressedFormat.forFileName(file.getName());
// ignore non-RDF files
if (format == null) {
continue;
}
// named graph is concatenation of namespace and file name
Resource namedGraph = Values.iri(namespace, file.getName());
// it is important to send absolute paths since the working dir for client may not be same
// as the working dir for the server
io.format(format).context(namedGraph).file(file.getAbsoluteFile().toPath());
}
// commit tx
conn.commit();
w.stop();
long size = conn.size();
long contexts = ((Literal) conn
.select("select (count(distinct ?g) as ?count) where {graph ?g {?s ?p ?o}}")
.execute()
.next()
.getValue("count"))
.longValue();
System.out.format("finished. Loaded %d triples into %d contexts in %s (%1.1fK triples/sec)%n", size, contexts, w,
(float) size / w.elapsed(TimeUnit.MILLISECONDS));
conn.close();
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment