Skip to content

Instantly share code, notes, and snippets.

@pbloem
Created November 12, 2020 12:56
Show Gist options
  • Save pbloem/c5a815759bebb4b0b3ada63fdb47f98d to your computer and use it in GitHub Desktop.
Save pbloem/c5a815759bebb4b0b3ada63fdb47f98d to your computer and use it in GitHub Desktop.
package org.submassive;
import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import it.unimi.dsi.webgraph.labelling.ArcLabelledImmutableGraph;
import it.unimi.dsi.webgraph.labelling.ArcLabelledNodeIterator.LabelledArcIterator;
import it.unimi.dsi.webgraph.labelling.GammaCodedIntLabel;
import it.unimi.dsi.webgraph.labelling.Label;
import java.util.ArrayList;
import java.util.Deque;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.nio.file.Files;
import java.util.stream.*;
class ShuaiLoader extends ArcLabelledImmutableGraph {
private File triples;
final private GammaCodedIntLabel prototype;
final private List<Integer> outdegrees = new ArrayList<Integer>();
private class Triple
{
private final int a, b, c;
public Triple(int a, int b, int c)
{
this.a = a; this.b = b; this.c = c;
}
public int a() {return a;}
public int b() {return b;}
public int c() {return c;}
}
/**
* Iterate over all triples in the source file.
*
* @author peter
*
*/
private class TripleIterator implements Iterator<Triple>
{
private final int BUFFER_SIZE = 5000;
private BufferedReader input;
private Deque<Triple> buffer = new LinkedList<Triple>();
public TripleIterator()
{
try {
this.input = new BufferedReader(new FileReader(triples));
} catch(IOException e)
{
throw new RuntimeException(e);
}
}
public boolean hasNext()
{
check();
return ! buffer.isEmpty();
}
public Triple next()
{
check();
return buffer.pop();
}
private void check()
{
try {
while(buffer.size() < BUFFER_SIZE)
{
String line = input.readLine();
String[] elem = line.trim().split("\\s+", 3);
assert elem.length == 3;
int a = Integer.parseInt(elem[0]);
int b = Integer.parseInt(elem[1]);
int c = Integer.parseInt(elem[2]);
buffer.add(new Triple(a, b, c));
}
} catch(IOException e)
{
throw new RuntimeException(e);
}
}
}
public ShuaiLoader(File triples)
{
this.triples = triples;
this.prototype = new GammaCodedIntLabel("FOO");
// - Not entirely sure what this does, but it works for the IntegerTriples example.
// - Loop to count the outdegrees
Iterator<Triple> it = new TripleIterator();
while(it.hasNext())
{
Triple t = it.next();
int a = t.a(); int b = t.b(); int c = t.c();
ensure(outdegrees, a);
outdegrees.set(a, outdegrees.get(a) + 1);
}
}
/**
* Ensure that the given list has the given capacity (fill with zeros)
* @param list
* @param max
*/
private void ensure(List<Integer> list, int max)
{
while(list.size() < max + 1)
list.add(0);
}
@Override
public ArcLabelledImmutableGraph copy()
{
return new ShuaiLoader(triples);
}
@Override
public LabelledArcIterator successors(int x)
{
/**
* This function is the only sticking point. It seems we have to be able to generate all successors (outgoing neighbors)
* of a given node on the fly.
*
* If we make sure that the triples in the file are sorted by source node, I think we can compute the starting point (in nr of
* bytes into the file) or each node and start the TripleTierator from there.
*
*/
return null;
}
@Override
public Label prototype()
{
return prototype;
}
@Override
public int numNodes()
{
return outdegrees.size();
}
@Override
public boolean randomAccess()
{
return false;
// -- No random access allowed (only iteration)
}
@Override
public int outdegree(int i)
{
return outdegrees.get(i);
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment