Skip to content

Instantly share code, notes, and snippets.

@nawroth
Created February 15, 2013 09:42
Show Gist options
  • Save nawroth/4959405 to your computer and use it in GitHub Desktop.
Save nawroth/4959405 to your computer and use it in GitHub Desktop.
Importing dbpedia into Neo4j.
package y;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.HashMap;
import java.util.Iterator;
import org.neo4j.helpers.collection.MapUtil;
import org.neo4j.helpers.collection.PrefetchingIterator;
import org.neo4j.index.lucene.unsafe.batchinsert.LuceneBatchInserterIndexProvider;
import org.neo4j.kernel.impl.util.FileUtils;
import org.neo4j.unsafe.batchinsert.BatchInserter;
import org.neo4j.unsafe.batchinsert.BatchInserterIndex;
import org.neo4j.unsafe.batchinsert.BatchInserters;
public class ImportDbPedia
{
public static void main( String[] args ) throws Exception
{
String storeDir = args[0];
String tupleFile = args[1];
FileUtils.deleteRecursively( new File( storeDir ) );
BatchInserter inserter = BatchInserters.inserter( storeDir );
LuceneBatchInserterIndexProvider indexProvider = new LuceneBatchInserterIndexProvider(
inserter );
BatchInserterIndex index = indexProvider.nodeIndex( "pages",
MapUtil.stringMap( "type", "exact" ) );
for ( Tuple tuple : parseFile( tupleFile, 10000 ) )
{
HashMap<String, Object> properties = new HashMap<String, Object>();
properties.put( "uri", tuple.getStart() );
long node = inserter.createNode( properties );
index.add( node, properties );
}
indexProvider.shutdown();
inserter.shutdown();
}
private static Iterable<Tuple> parseFile( final String tupleFile,
final long maxRows )
{
return new Iterable<ImportDbPedia.Tuple>()
{
@SuppressWarnings( "resource" )
public Iterator<Tuple> iterator()
{
final BufferedReader reader;
try
{
reader = new BufferedReader( new FileReader( tupleFile ) );
}
catch ( FileNotFoundException e )
{
throw new RuntimeException( e );
}
return new PrefetchingIterator<Tuple>()
{
long rowCount = 0;
@Override
protected Tuple fetchNextOrNull()
{
String line;
try
{
line = reader.readLine();
}
catch ( IOException e )
{
throw new RuntimeException( e );
}
if ( line == null || rowCount++ > maxRows )
{
try
{
reader.close();
}
catch ( IOException e )
{
throw new RuntimeException( e );
}
return null;
}
return new Tuple( line );
}
};
}
};
}
static class Tuple
{
private String start;
private String end;
public Tuple( String line )
{
String[] tokens = line.split( " " );
start = tokens[0];
end = tokens[2];
}
public String getStart()
{
return start;
}
public String getEnd()
{
return end;
}
}
}
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>x</groupId>
<artifactId>y</artifactId>
<version>0.0.1-SNAPSHOT</version>
<dependencies>
<dependency>
<groupId>org.neo4j</groupId>
<artifactId>neo4j</artifactId>
<version>1.9.M04</version>
</dependency>
</dependencies>
</project>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment