Skip to content

Instantly share code, notes, and snippets.

@zed9h
Last active May 12, 2022 22:15
Show Gist options
  • Save zed9h/148874 to your computer and use it in GitHub Desktop.
Save zed9h/148874 to your computer and use it in GitHub Desktop.
six degrees to kevin bacon [and other queries] using dbpedia
#!/usr/bin/perl
use strict;
use warnings;
use Compress::Raw::Bzip2;
use Graph::Undirected;
use Storable;
use Term::ReadLine; # Term::ReadLine::Gnu
$|=1;
my $root = 'Kevin_Bacon'; # dbpedia resource name of an actor
my $input = 'infobox_en.nt.bz2'; # http://downloads.dbpedia.org/3.3/en/infobox_en.nt.bz2
my $cache1 = '62kevin.g';
my $cache2 = '62kevin.sptg';
my $history = "$ENV{HOME}/.62kevin";
$root = "a:$root";
my $sptg;
if(-s $cache2) {
printf "loading cache2 (%d bytes) ...\n", -s $cache2;
$sptg = retrieve($cache2);
} else {
my $g;
if(-s $cache1) {
printf "loading cache1 (%d bytes) ...\n", -s $cache1;
$g = retrieve($cache1);
} else {
$g = Graph::Undirected->new(countvertexed=>1, countedged=>1);
my $size = -s $input;
printf "parse input (%d bytes compressed) ...\n", $size;
my $bz = new Compress::Raw::Bunzip2
or die "Cannot create bunzip2 object\n";
open FILE, "<$input" or die "$input: $!";
my $last = 0;
my $buffer = '';
my $compressed;
my $num_parsed = 0;
my $num_added = 0;
while (read(FILE,$compressed,4096)) {
my $decompressed;
my $status = $bz->bzinflate($compressed, $decompressed);
die "error decompressing: $status"
unless $status == BZ_OK or $status == BZ_STREAM_END;
$buffer .= $decompressed;
$buffer =~ s{[^\n]*\n}{
$num_parsed ++;
my $res = 'http://dbpedia\\.org/resource';
my $prop = 'http://dbpedia\\.org/property';
if($& =~ m{<$res/(.*?)> <$prop/starring> <$res/(.*?)>}) {
$num_added ++;
my $movie = "m:$1";
my $actor = "a:$2";
$g->add_vertex($movie);
$g->add_vertex($actor);
$g->add_edge($movie,$actor);
}
if(time-$last > 2) {
$last = time;
printf "\r%6.2f%% completed (buffer %d) %d triples inserted of %d parsed.",
(tell(FILE) / $size)*100, length $buffer, $num_added, $num_parsed;
}
""
}msge;
}
printf "\r%6.2f%% completed (buffer %d) %d triples inserted of %d parsed.\n",
(tell(FILE) / $size)*100, length $buffer, $num_added, $num_parsed;
close FILE;
printf "saving cache level1 (%d vertices, %d edges) ...\n",
scalar($g->vertices), scalar($g->edges);
store $g, $cache1;
}
printf "Dijkstra single-source shortest path (%d vertices, %d edges) ...\n",
scalar($g->vertices), scalar($g->edges);
$sptg = $g->SPT_Dijkstra($root);
printf "saving cache level2 ...\n";
store $sptg, $cache2;
}
print "ready.\n";
print "type one dbpedia actor resource name per line:\n";
my $term = new Term::ReadLine '62kevin';
eval { $term->ReadHistory($history); };
while ( defined ($_ = $term->readline('actor>')) ) {
chomp;
next unless $_;
s{\b\w}{uc $&}eg;
s{\s}{_}g;
#$term->addhistory($_); # XXX automatic on gnu version
my $node = "a:$_";
my $r = $root;
$r =~ s{a:}{};
my $w = $sptg->get_vertex_attribute($node, 'weight');
unless(defined $w) {
print $node eq $root ?
"$r is the root node, distance 0" :
"$_ not found, may not be connected to $r",
"\n";
next;
}
my $d = int($w/2);
printf "%s is %s degree%s to %s through:\n", $_, $d, $d == 1 ? '' : 's', $r;
my $p = $node;
while($p = $sptg->get_vertex_attribute($p, 'p')) {
local $_ = $p;
s{m:}{movie: } or s{a:}{actor: };
print " $_\n";
}
}
print "\nshutting down ...\n";
END {
if($term) {
eval {
$term->WriteHistory($history)
or die "$history: $!";
print "history file $history wrote.\n";
};
}
}
PREFIX dbpedia: <http://dbpedia.org/resource/>
PREFIX dbpprop: <http://dbpedia.org/property/>
SELECT ?actor6 ?step
WHERE {
{ SELECT *
WHERE { [] dbpprop:starring ?actor1, ?actor6 . }
}
OPTION ( transitive,
t_distinct,
t_no_cycles,
t_in (?actor1),
t_out (?actor6),
t_step ('step_no') as ?step,
t_min(6),
t_max(6) ) .
FILTER (?actor1 = dbpedia:Kevin_Bacon)
}
PREFIX dbpedia: <http://dbpedia.org/resource/>
PREFIX dbpprop: <http://dbpedia.org/property/>
SELECT ?step ?count
WHERE
{
{
SELECT ?step COUNT(*) as ?count
WHERE
{
{
SELECT ?actor1 ?actorN
WHERE { [] dbpprop:starring ?actor1, ?actorN . }
}
OPTION ( transitive,
t_distinct,
t_no_cycles,
t_in (?actor1),
t_out (?actorN),
t_step ('step_no') as ?step,
t_min(1) ) .
FILTER (?actor1 = dbpedia:Kevin_Bacon)
}
GROUP BY ?step
}
}
ORDER BY ?step
select
(min(?n) AS ?name),
(sql:GROUP_CONCAT_DISTINCT(?p, " | ") AS ?page),
(sql:GROUP_CONCAT_DISTINCT(IF(?ln, ?ln, ?l), ", ") AS ?code)
where {
?s dbo:genre dbr:Game_engine ;
rdfs:label ?n .
FILTER (LANG(?n) = "en")
OPTIONAL {
?s foaf:homepage ?p .
}
OPTIONAL {
?s dbp:programmingLanguage ?l .
OPTIONAL {
?l rdfs:label ?ln .
FILTER (LANG(?ln) = "en")
}
}
}
group by ?s
select
min( (bif:power(bif:st_distance (?point, bif:st_point(-46.6333, -23.55)), 2) + bif:power(bif:st_distance (?point, bif:st_point(-43.1964, -22.9083)), 2) ) ) as ?rank
min(?label) as ?label
max(?pop) as ?pop
max(?height) as ?height
?city
where {
?city dbpedia-owl:country dbpedia:Brazil ;
geo:geometry ?point ;
dbpprop:elevationM ?height ;
dbpedia-owl:populationTotal ?pop ;
rdfs:label ?label .
FILTER( langMatches( lang(?label), "pt") )
}
group by ?city
order by ?rank
define input:inference "http://example.com/rules"
PREFIX : <http://example.com#>
SELECT ?s ?n {
FILTER( ?o = :A )
{ SELECT ?s ?o
WHERE {
{ ?s :is_a_subset_of ?o . }
UNION
{ ?o :is_a_superset_of ?s . }
}
} OPTION ( transitive,
t_in(?s),
t_out(?o),
t_distinct,
t_no_cycles,
t_step('step_no') as ?n,
t_min(1) ) .
}
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX dbpedia: <http://dbpedia.org/resource/>
PREFIX dbpprop: <http://dbpedia.org/property/>
SELECT ?chalkboard_gag, ?season_no, ?episode_no, ?episode_title
WHERE {
?episode rdfs:label ?episode_title ;
dbpprop:episodeNo ?episode_no ;
dbpprop:season ?season_no ;
dbpprop:blackboard ?chalkboard_gag .
FILTER (langMatches( lang(?episode_title), "EN" ))
FILTER (isLITERAL(?chalkboard_gag) && !regex(?chalkboard_gag, "^None|^-$", "i"))
}
ORDER BY ?season_no ?episode_no
sparql
prefix n: <http://lua.net/n/>
prefix p: <http://lua.net/p/>
SELECT ?s ?o ?n ?via ?path
FROM <http://lua.net>
{
FILTER( ?o = n:990 )
FILTER( ?s = n:2210270 )
{ SELECT ?s ?o
WHERE {
{ ?s p:edge ?o . }
UNION
{ ?o p:edge ?s . }
}
} OPTION ( transitive,
t_in(?s),
t_out(?o),
t_distinct,
t_no_cycles,
t_step(?s) as ?via,
t_step('step_no') as ?n,
t_step('path_id') as ?path,
t_min(1),
t_max(4)
) .
}
;
sparql # clear test environment
clear graph <http://test.com/>;
sparql # load test data
prefix : <http://test.com/>
insert data into graph <http://test.com/> {
:A a owl:Class .
:B a owl:Class .
:C a owl:Class .
:D a owl:Class .
:E a owl:Class .
:A rdfs:subClassOf owl:Thing .
:B rdfs:subClassOf owl:Thing .
:C rdfs:subClassOf :A .
:D rdfs:subClassOf :B .
:E rdfs:subClassOf :D .
:A :color "red" .
:B :color "blue" .
:C :color "green" .
:E :color "black" .
:a rdf:type :A .
:b rdf:type :B .
:c rdf:type :C .
:d rdf:type :D .
:e rdf:type :E .
};
sparql # show the closest colors of one individual
define input:default-graph-uri <http://test.com/>
prefix : <http://test.com/>
SELECT ?color
WHERE
{
:e a ?c0 .
?c0 rdfs:subClassOf ?c1 option (transitive, t_distinct, t_step('step_no') as ?n, t_in(?c0), t_out(?c1), t_min(0)) .
?c1 :color ?color .
}
order by ?n
limit 1;
sparql # show all the closest colors of each class
define input:default-graph-uri <http://test.com/>
prefix : <http://test.com/>
SELECT ?c0 (bif:aref(min(bif:vector(?n, ?color)),1)) as ?v # interesting vector aggregation
WHERE
{
?c0 a owl:Class .
?c0 rdfs:subClassOf ?c1 option (transitive, t_distinct, t_step('step_no') as ?n, t_in(?c0), t_out(?c1), t_min(0)) .
?c1 :color ?color .
}
group by ?c0;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment