Created
February 15, 2013 09:42
-
-
Save nawroth/4959405 to your computer and use it in GitHub Desktop.
Importing dbpedia into Neo4j.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package y; | |
import java.io.BufferedReader; | |
import java.io.File; | |
import java.io.FileNotFoundException; | |
import java.io.FileReader; | |
import java.io.IOException; | |
import java.util.HashMap; | |
import java.util.Iterator; | |
import org.neo4j.helpers.collection.MapUtil; | |
import org.neo4j.helpers.collection.PrefetchingIterator; | |
import org.neo4j.index.lucene.unsafe.batchinsert.LuceneBatchInserterIndexProvider; | |
import org.neo4j.kernel.impl.util.FileUtils; | |
import org.neo4j.unsafe.batchinsert.BatchInserter; | |
import org.neo4j.unsafe.batchinsert.BatchInserterIndex; | |
import org.neo4j.unsafe.batchinsert.BatchInserters; | |
public class ImportDbPedia | |
{ | |
public static void main( String[] args ) throws Exception | |
{ | |
String storeDir = args[0]; | |
String tupleFile = args[1]; | |
FileUtils.deleteRecursively( new File( storeDir ) ); | |
BatchInserter inserter = BatchInserters.inserter( storeDir ); | |
LuceneBatchInserterIndexProvider indexProvider = new LuceneBatchInserterIndexProvider( | |
inserter ); | |
BatchInserterIndex index = indexProvider.nodeIndex( "pages", | |
MapUtil.stringMap( "type", "exact" ) ); | |
for ( Tuple tuple : parseFile( tupleFile, 10000 ) ) | |
{ | |
HashMap<String, Object> properties = new HashMap<String, Object>(); | |
properties.put( "uri", tuple.getStart() ); | |
long node = inserter.createNode( properties ); | |
index.add( node, properties ); | |
} | |
indexProvider.shutdown(); | |
inserter.shutdown(); | |
} | |
private static Iterable<Tuple> parseFile( final String tupleFile, | |
final long maxRows ) | |
{ | |
return new Iterable<ImportDbPedia.Tuple>() | |
{ | |
@SuppressWarnings( "resource" ) | |
public Iterator<Tuple> iterator() | |
{ | |
final BufferedReader reader; | |
try | |
{ | |
reader = new BufferedReader( new FileReader( tupleFile ) ); | |
} | |
catch ( FileNotFoundException e ) | |
{ | |
throw new RuntimeException( e ); | |
} | |
return new PrefetchingIterator<Tuple>() | |
{ | |
long rowCount = 0; | |
@Override | |
protected Tuple fetchNextOrNull() | |
{ | |
String line; | |
try | |
{ | |
line = reader.readLine(); | |
} | |
catch ( IOException e ) | |
{ | |
throw new RuntimeException( e ); | |
} | |
if ( line == null || rowCount++ > maxRows ) | |
{ | |
try | |
{ | |
reader.close(); | |
} | |
catch ( IOException e ) | |
{ | |
throw new RuntimeException( e ); | |
} | |
return null; | |
} | |
return new Tuple( line ); | |
} | |
}; | |
} | |
}; | |
} | |
static class Tuple | |
{ | |
private String start; | |
private String end; | |
public Tuple( String line ) | |
{ | |
String[] tokens = line.split( " " ); | |
start = tokens[0]; | |
end = tokens[2]; | |
} | |
public String getStart() | |
{ | |
return start; | |
} | |
public String getEnd() | |
{ | |
return end; | |
} | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> | |
<modelVersion>4.0.0</modelVersion> | |
<groupId>x</groupId> | |
<artifactId>y</artifactId> | |
<version>0.0.1-SNAPSHOT</version> | |
<dependencies> | |
<dependency> | |
<groupId>org.neo4j</groupId> | |
<artifactId>neo4j</artifactId> | |
<version>1.9.M04</version> | |
</dependency> | |
</dependencies> | |
</project> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment