Skip to content

Instantly share code, notes, and snippets.

@iwiwi
Last active September 17, 2021 09:32
Show Gist options
  • Star 6 You must be signed in to star a gist
  • Fork 3 You must be signed in to fork a gist
  • Save iwiwi/5351417 to your computer and use it in GitHub Desktop.
Save iwiwi/5351417 to your computer and use it in GitHub Desktop.
Download and decode WebGraph format graphs
#!/bin/sh
#
# http://webgraph.dsi.unimi.it/
#
# Usage (example):
#
# % ls ~/Desktop/WebGraph
# enron-nat.graph enron-nat.properties
#
# % ./decode.sh ~/Desktop/WebGraph/enron-nat
#
# % ls ~/Desktop/WebGraph
# enron-nat.graph enron-nat.offsets enron-nat.txt
# enron-nat.obl enron-nat.properties
#
# CP=`ls | ruby -e "puts \\$stdin.read.gsub(/\s+/, ':')"`
CP=`ls -w 1000000000 --format=commas | sed 's/, /:/g'`
javac -cp $CP WebGraphDecoder.java
java -cp $CP it.unimi.dsi.webgraph.BVGraph -o -O -L $1 &&
java -cp $CP:. WebGraphDecoder $1
#!/bin/bash
#
# http://law.di.unimi.it/datasets.php
#
# Usage:
# % download.sh http://data.law.di.unimi.it/webdata/cnr-2000/cnr-2000
#
for ext in .properties .graph .md5sums; do
wget -c $1$ext
done
import it.unimi.dsi.fastutil.ints.IntArrayFIFOQueue;
import it.unimi.dsi.fastutil.ints.IntArrays;
import it.unimi.dsi.logging.ProgressLogger;
import it.unimi.dsi.webgraph.GraphClassParser;
import it.unimi.dsi.webgraph.ImmutableGraph;
import it.unimi.dsi.webgraph.LazyIntIterator;
import java.io.*;
import java.util.*;
public class WebGraphDecoder {
static public void main(String arg[]) throws Exception {
ImmutableGraph graph = ImmutableGraph.load(arg[0]);
BufferedWriter bw = new BufferedWriter(new FileWriter(arg[0] + ".tsv"));
int num_v = graph.numNodes();
System.out.printf("Vertices: %d\n", num_v);
System.out.printf("Edges: %d\n", graph.numArcs());
int num_e = 0;
for (int v = 0; v < num_v; ++v) {
LazyIntIterator successors = graph.successors(v);
for (int i = 0; i < graph.outdegree(v); ++i) {
int w = successors.nextInt();
bw.write(Integer.toString(v));
bw.write("\t");
bw.write(Integer.toString(w));
bw.write("\n");
++num_e;
}
}
bw.flush();
System.out.printf("Output Edges: %d\n", num_e);
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment