nichtich/skosc2csv.pl

## skosc2csv.pl
#!/usr/bin/perl

use strict;
use warnings;
use utf8;

=head1 NAME

skosc2csv.pl - Create CSV overview of a SKOS classification

=head1 SYNOPSIS

skosc2csv.pl [ NTriples file ]

=cut

use Pod::Usage;
use Text::CSV; #::Encoded;
use Encode qw(decode);

our $VERSION = '0.12';

my $arg = shift @ARGV;
my $input = \*STDIN;
my $output = \*STDOUT;

if ( $arg ) {
    pod2usage(1) if $arg =~ /^(-\?|-h|--help)$/;
    pod2usage(-verbose => 2) if $arg =~ /^-?-man$/;
    open($input, '<', $arg) || die "Failed to open file $arg";
}

binmode $output, ':utf8';

=head1 DESCRIPTION

This script reads RDF data in NTriples format and creates an overview
of the classification that is encoded in this RDF data in SKOS format.

=head2 Input format

The input (read from STDIN or from a file that is specified as first
parameter) must be RDF in NTriples format. All but the following RDF
predicates are ignored:

=over 4

=item rdf:type

=item skos:notation

=item skos:prefLabel

=item dcterms:title

Used if no prefLabel was found for some concept.

=item skos:broader

=item dcterms:subject

=item dcterms:extent

=item foaf:page

=back

=head2 Output format

The overview is given as CSV file with one class per line and the
following columns:

=over 4

=item level

The class' hierarchy level, starting with 1.

=item path

The class' broader classes as path of notations, separated by C<E<lt>>.
For instane the class C<MT 87> (Community music) of the Library of Congress
classification is located at C<M E<lt> MT>.

=item

=back


=head1 EXAMPLE

The following examples uses blank RDF nodes C<_:lccM>, C<_:c1>, etc. for resources.
In practise at least your classes should have stable URIs.

=head2 input in NTriples format

  <_:lccM> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/2008/05/skos#Concept> .
  <_:lccM> <http://www.w3.org/2008/05/skos#notation> "M" .
  <_:lccM> <http://www.w3.org/2008/05/skos#prefLabel> "Music"@en .
  <_:lccMT> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/2008/05/skos#Concept> .
  <_:lccMT> <http://www.w3.org/2008/05/skos#notation> "MT" .
  <_:lccMT> <http://www.w3.org/2008/05/skos#broader> <_:lccM> .
  <_:lccMT> <http://www.w3.org/2008/05/skos#prefLabel> "Musical instruction and study"@en .
  <_:lccMT87> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/2008/05/skos#Concept> .
  <_:lccMT87> <http://www.w3.org/2008/05/skos#notation> "MT 87" .
  <_:lccMT87> <http://www.w3.org/2008/05/skos#broader> <_:lccM> .
  <_:lccMT87> <http://www.w3.org/2008/05/skos#prefLabel> "Community music"@en .
  <_:c1> <http://purl.org/dc/terms/extent> "50"^^<http://www.w3.org/2001/XMLSchema#integer> .
  <_:c1> <http://purl.org/dc/terms/subject> <_:lccMT> .
  <_:c2> <http://purl.org/dc/terms/subject> <_:lccMT87> .
  <_:c2> <http://purl.org/dc/terms/extent> "30"^^<http://www.w3.org/2001/XMLSchema#integer> .

=head2 input in Turtle format

Turtle format is much easier to read. You can also provide your classification
data in Turtle format and convert it, for instance with the 'rapper' command
line tool.

  @prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
  @prefix skos: <http://www.w3.org/2008/05/skos#> .
  @prefix dct: <http://purl.org/dc/terms/> .

  # classes of the classification

  <_:lccM> a skos:Concept ;
      skos:notation "M" ;
      skos:prefLabel "Music"@en .

  <_:lccMT> a skos:Concept ;
      skos:notation "MT" ;
      skos:prefLabel "Musical instruction and study"@en ;
      skos:broader <_:lccM> .

  <_:lccMT87> a skos:Concept ;
      skos:notation "MT 87" ;
      skos:prefLabel "Community music"@en ;
      skos:broader <_:lccM> .

  # collections for some of the classes

  <_:c1> dct:subject <_:lccMT> ;
        dct:extent 50 .

  <_:c2> dct:subject <_:lccMT87> ;
        dct:extent 30 .

=head2 output as CSV

  "level","path","notation","label"
  "1","","M","Music",,"80"
  "2","M","MT 87","Community music","30","30"
  "2","M","MT","Musical instruction and study","50","50"

=head1 TODO

The current version does not use skos:prefLabel but skos:prefLabel.
Some classifications do not use unique notations (for instance
L<dewey.info>.

=cut

my %predicates = (
    'http://www.w3.org/2008/05/skos#notation' => 'notation',
    'http://www.w3.org/1999/02/22-rdf-syntax-ns#type' => 'a',
    'http://purl.org/dc/terms/subject' => 'subject',
    'http://purl.org/dc/terms/title' => 'title',
    'http://purl.org/dc/elements/1.1/subject' => 'subject',
    'http://purl.org/dc/elements/1.1/title' => 'title',
    'http://www.w3.org/2008/05/skos#broader' => 'broader',
    'http://www.w3.org/2008/05/skos#prefLabel' => 'label',
    'http://xmlns.com/foaf/0.1/page' => 'page',
    'http://purl.org/dc/terms/extent' => 'extent'
);

my $triples = {};

###
### Parse NTriples format into a simple hash. This code is based on code
### from L<RDF::Trine::Parser::NTriples> to avoid including RDF::Trine.
### Using RDF::Trine would simplify this script, but add many dependencies.
###

my $lineno = 0;
while (defined(my $line = <$input>)) {
LINE:
    ($line, my @extra) = split(/\r\n|\r|\n/, $line, 2);
    $lineno++;

    next unless (defined($line) and length($line));
    next unless ($line =~ /\S/);
    chomp($line);
    $line =~ s/^\s*//;
    $line =~ s/\s*$//;
    next if ($line =~ /^#/);

    my @nodes = ();
    for my $which (qw(subject predicate object)) {
        my $node;

        # eat one node
        $line =~ s/^\s*//;
        my $char = length($line) ? substr($line, 0, 1) : '.';
        if ($char eq '.') {
            # ignore
        } elsif ($char eq '<') {
            my ($uri) = $line =~ m/^<([^>]*)>/;
            substr($line, 0, length($uri)+2) = '';
            $node = $uri; # TODO: _unescape($uri, $lineno) );
        } elsif ($char eq '_') {
            my ($name) = $line =~ m/^_:([A-Za-z][A-Za-z0-9]*)/;
            substr($line, 0, length($name)+2) = '';
            $node = "_:$name";
        } elsif ($char eq '"') {
            substr($line, 0, 1) = '';
            my $value = decode('utf8', '');
            while (length($line) and substr($line, 0, 1) ne '"') {
                while ($line =~ m/^([^"\\]+)/) {
                    $value .= $1;
                    substr($line,0,length($1))  = '';
                }
                if (substr($line,0,1) eq '\\') {
                    while ($line =~ m/^\\(.)/) {
                        if ($1 eq 't') {
                            $value .= "\t";
                            substr($line,0,2)   = '';
                        } elsif ($1 eq 'r') {
                            $value .= "\r";
                            substr($line,0,2)   = '';
                        } elsif ($1 eq 'n') {
                            $value .= "\n";
                            substr($line,0,2)   = '';
                        } elsif ($1 eq '"') {
                            $value .= '"';
                            substr($line,0,2)   = '';
                        } elsif ($1 eq '\\') {
                            $value .= "\\";
                            substr($line,0,2)   = '';
                        } elsif ($1 eq 'u') {
                            $line =~ m/^\\u([0-9A-F]{4})/ or die qq[Bad N-Triples \\u escape at line $lineno, near "$line"];
                            $value .= chr(oct('0x' . $1));
                            substr($line,0,6)   = '';
                        } elsif ($1 eq 'U') {
                            $line =~ m/^\\U([0-9A-F]{8})/ or die qq[Bad N-Triples \\U escape at line $lineno, near "$line"];
                            $value .= chr(oct('0x' . $1));
                            substr($line,0,10)  = '';
                        } else {
                            die qq[Not valid N-Triples escape character '\\$1' at line $lineno, near "$line"];
                        }
                    }
                }
            }
            if (substr($line,0,1) eq '"') {
                substr($line,0,1) = '';
            } else {
                die qq[Ending double quote not found at line $lineno];
            }

            if ($line =~ m/^@([a-z]+(-[a-zA-Z0-9]+)*)/) {
                my $lang = $1;
                substr($line,0,1+length($lang)) = '';
                # ignore the language
                $node = $value;
            } elsif (substr($line,0,3) eq '^^<') {
                substr($line,0,3)   = '';
                my ($uri) = $line =~ m/^([^>]*)>/;
                substr($line, 0, length($uri)+1) = '';
                # ignore the datatype
                $node = $value;
            } else {
                $node = $value;
            }
        } else {
            die qq[Not valid N-Triples node start character '$char' at line $lineno, near "$line"];
        }

        die "expected $which at line $lineno" unless $node;
        push(@nodes, $node);
        $line =~ s/^\s*//;
    }
    $line =~ s/^\s//g;
    unless ($line eq '.') {
        die "Missing expected '.' at line $lineno";
    }

    my $subj = $nodes[0];
    my $pred = $predicates{ $nodes[1] };
    my $obj  = $nodes[2];
    if ( $pred ) {
        if ( not exists $triples->{$subj} ) {
            $triples->{$subj} = { $pred => [ $obj ] };
        #} elsif ( not exists $triples->{$subj}->{$pred} ) {
        } else {
            push @{ $triples->{$subj}->{$pred} }, $obj;
        }
    }

    if (@extra) {
        $line = shift(@extra);
        goto LINE;
    }
}


###
### Collect skos:Concepts and associated collections
###

my $concepts = {};
my @top_concepts; # notations of top concepts
my $collections = {};

# check whether a resource is a skos:Concept
sub isa_concept {
    return unless $_[0] and $_[0]->{a};
    return grep { $_ eq 'http://www.w3.org/2008/05/skos#Concept' }
        @{$_[0]->{a}};
}

# chek whether an object is unique for its subject and predicate
sub get_uniqueobject {
    return $_[0]->[0] if $_[0] and @{$_[0]} == 1;
}

sub get_concept_notation {
    my $uri = shift;
    my $concept = $triples->{$uri};
    return unless isa_concept( $concept );
    return get_uniqueobject( $concept->{notation} );
}

while ( my ($uri,$c) = each %$triples ) {
    my $message = "";

    if ( isa_concept($c) ) {
        my $notation = get_uniqueobject( $c->{notation} );
        my $label    = get_uniqueobject( $c->{label} );
        unless ( defined $label ) {
            $label = get_uniqueobject( $c->{title} );
        }
        if ( defined $notation and defined $label ) {
            die "Notation $notation is not unique"
                if $concepts->{ $notation };
            $concepts->{ $notation } = {
                label => $label,
                uri => $uri
            };
            if ( $c->{broader} ) {
                my $b_uri = $c->{broader}->[0];
                my $broader = get_concept_notation( $b_uri );
                if ( $broader ) {
                    $concepts->{ $notation }->{broader} = $broader;
                } else {
                    $message = "Concept or notation not found: "
                             . $b_uri;
                }
            } else {
                push @top_concepts, $notation;
            }
        } else {
            $message = "$uri must have exactely one notation and one label";
        }
    } elsif ( $c->{subject} ) {

        # We may change this limitation in a future version:
        my $c_uri = get_uniqueobject( $c->{subject} );

        if ($c_uri) {
            my $notation = get_concept_notation( $c_uri );
            if ( $notation ) {
               push @{ $collections->{$notation} },
                    { map { $_ => $c->{$_}->[0] } keys %$c };
            } else {
                $message = "$c_uri (dct:subject of $uri) must be "
                         . "a skos:Concept with one skos:notation.";
            }
        } else {
            $message = "$uri must have only one subject";
        }
    } else {
        next; # ignore all other resources
    }

    if ( $message ) {
        print STDERR $message . "\n";
    }
}

###
### For now we only want one collection per concept
###

while ( my ($notation, $c) = each %$collections ) {
    if (@$c != 1) {
        print STDERR "$notation should only have one collection.\n";
        next;
    }
    # We will add 'foaf:page' in a later version
    $concepts->{$notation}->{extent} = $c->[0]->{extent};
}

###
### Now we have a nice hashref $concepts to validate
###

while ( my ($notation, $c) = each %$concepts ) {
    next unless $c->{broader};
    push @{ $concepts->{ $c->{broader} }->{narrower} }, $notation;
}

sub traverse {
    my $c = $concepts->{$_[0]};
    if ( $c->{visited} ) {
        die "there is a loop or diamond in your 'tree'";
        exit;
    }
    $c->{visited} = 1;
    $c->{size} = $c->{extent} unless $c->{size};
    return unless $c->{narrower};
    foreach my $n (@{$c->{narrower}}) {
        traverse( $n );
        $c->{size} += $concepts->{$n}->{size} if ($concepts->{$n}->{size});
    }
}

traverse( $_ ) for @top_concepts;

delete $_->{visited} for values %$concepts;

my $csv = Text::CSV->new({binary => 1,always_quote=>1});

# notation, path, level
sub print_stat {
    my $n = shift;
    my $path = shift || "";
    my $level = shift || 1;

    my $c = $concepts->{$n};

    my @row = ($level, $path, $n, $c->{label}, $c->{extent}, $c->{size} );

    $csv->print( $output, \@row );
    print $output "\n";

    $path = "$path > " if $path;
    foreach my $narrower (@{$c->{narrower}}) {
        print_stat ( $narrower, "$path$n", $level+1 );
    }
}

$csv->print( $output, [qw(level path notation label)] );
print $output "\n";

foreach my $n (sort @top_concepts) {
    print_stat( $n );
}

=head1 COPYRIGHT

Copyright (c) 2010 Jakob Voss. All rights reserved. This program is
free software; you can redistribute it and/or modify it under the
same terms as Perl itself.

=cut
	#!/usr/bin/perl

	use strict;
	use warnings;
	use utf8;

	=head1 NAME

	skosc2csv.pl - Create CSV overview of a SKOS classification

	=head1 SYNOPSIS

	skosc2csv.pl [ NTriples file ]

	=cut

	use Pod::Usage;
	use Text::CSV; #::Encoded;
	use Encode qw(decode);

	our $VERSION = '0.12';

	my $arg = shift @ARGV;
	my $input = \*STDIN;
	my $output = \*STDOUT;

	if ( $arg ) {
	pod2usage(1) if $arg =~ /^(-\?\|-h\|--help)$/;
	pod2usage(-verbose => 2) if $arg =~ /^-?-man$/;
	open($input, '<', $arg) \|\| die "Failed to open file $arg";
	}

	binmode $output, ':utf8';

	=head1 DESCRIPTION

	This script reads RDF data in NTriples format and creates an overview
	of the classification that is encoded in this RDF data in SKOS format.

	=head2 Input format

	The input (read from STDIN or from a file that is specified as first
	parameter) must be RDF in NTriples format. All but the following RDF
	predicates are ignored:

	=over 4

	=item rdf:type

	=item skos:notation

	=item skos:prefLabel

	=item dcterms:title

	Used if no prefLabel was found for some concept.

	=item skos:broader

	=item dcterms:subject

	=item dcterms:extent

	=item foaf:page

	=back

	=head2 Output format

	The overview is given as CSV file with one class per line and the
	following columns:

	=over 4

	=item level

	The class' hierarchy level, starting with 1.

	=item path

	The class' broader classes as path of notations, separated by C<E<lt>>.
	For instane the class C<MT 87> (Community music) of the Library of Congress
	classification is located at C<M E<lt> MT>.

	=item

	=back


	=head1 EXAMPLE

	The following examples uses blank RDF nodes C<_:lccM>, C<_:c1>, etc. for resources.
	In practise at least your classes should have stable URIs.

	=head2 input in NTriples format

	<_:lccM> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/2008/05/skos#Concept> .
	<_:lccM> <http://www.w3.org/2008/05/skos#notation> "M" .
	<_:lccM> <http://www.w3.org/2008/05/skos#prefLabel> "Music"@en .
	<_:lccMT> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/2008/05/skos#Concept> .
	<_:lccMT> <http://www.w3.org/2008/05/skos#notation> "MT" .
	<_:lccMT> <http://www.w3.org/2008/05/skos#broader> <_:lccM> .
	<_:lccMT> <http://www.w3.org/2008/05/skos#prefLabel> "Musical instruction and study"@en .
	<_:lccMT87> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/2008/05/skos#Concept> .
	<_:lccMT87> <http://www.w3.org/2008/05/skos#notation> "MT 87" .
	<_:lccMT87> <http://www.w3.org/2008/05/skos#broader> <_:lccM> .
	<_:lccMT87> <http://www.w3.org/2008/05/skos#prefLabel> "Community music"@en .
	<_:c1> <http://purl.org/dc/terms/extent> "50"^^<http://www.w3.org/2001/XMLSchema#integer> .
	<_:c1> <http://purl.org/dc/terms/subject> <_:lccMT> .
	<_:c2> <http://purl.org/dc/terms/subject> <_:lccMT87> .
	<_:c2> <http://purl.org/dc/terms/extent> "30"^^<http://www.w3.org/2001/XMLSchema#integer> .

	=head2 input in Turtle format

	Turtle format is much easier to read. You can also provide your classification
	data in Turtle format and convert it, for instance with the 'rapper' command
	line tool.

	@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
	@prefix skos: <http://www.w3.org/2008/05/skos#> .
	@prefix dct: <http://purl.org/dc/terms/> .

	# classes of the classification

	<_:lccM> a skos:Concept ;
	skos:notation "M" ;
	skos:prefLabel "Music"@en .

	<_:lccMT> a skos:Concept ;
	skos:notation "MT" ;
	skos:prefLabel "Musical instruction and study"@en ;
	skos:broader <_:lccM> .

	<_:lccMT87> a skos:Concept ;
	skos:notation "MT 87" ;
	skos:prefLabel "Community music"@en ;
	skos:broader <_:lccM> .

	# collections for some of the classes

	<_:c1> dct:subject <_:lccMT> ;
	dct:extent 50 .

	<_:c2> dct:subject <_:lccMT87> ;
	dct:extent 30 .

	=head2 output as CSV

	"level","path","notation","label"
	"1","","M","Music",,"80"
	"2","M","MT 87","Community music","30","30"
	"2","M","MT","Musical instruction and study","50","50"

	=head1 TODO

	The current version does not use skos:prefLabel but skos:prefLabel.
	Some classifications do not use unique notations (for instance
	L<dewey.info>.

	=cut

	my %predicates = (
	'http://www.w3.org/2008/05/skos#notation' => 'notation',
	'http://www.w3.org/1999/02/22-rdf-syntax-ns#type' => 'a',
	'http://purl.org/dc/terms/subject' => 'subject',
	'http://purl.org/dc/terms/title' => 'title',
	'http://purl.org/dc/elements/1.1/subject' => 'subject',
	'http://purl.org/dc/elements/1.1/title' => 'title',
	'http://www.w3.org/2008/05/skos#broader' => 'broader',
	'http://www.w3.org/2008/05/skos#prefLabel' => 'label',
	'http://xmlns.com/foaf/0.1/page' => 'page',
	'http://purl.org/dc/terms/extent' => 'extent'
	);

	my $triples = {};

	###
	### Parse NTriples format into a simple hash. This code is based on code
	### from L<RDF::Trine::Parser::NTriples> to avoid including RDF::Trine.
	### Using RDF::Trine would simplify this script, but add many dependencies.
	###

	my $lineno = 0;
	while (defined(my $line = <$input>)) {
	LINE:
	($line, my @extra) = split(/\r\n\|\r\|\n/, $line, 2);
	$lineno++;

	next unless (defined($line) and length($line));
	next unless ($line =~ /\S/);
	chomp($line);
	$line =~ s/^\s*//;
	$line =~ s/\s*$//;
	next if ($line =~ /^#/);

	my @nodes = ();
	for my $which (qw(subject predicate object)) {
	my $node;

	# eat one node
	$line =~ s/^\s*//;
	my $char = length($line) ? substr($line, 0, 1) : '.';
	if ($char eq '.') {
	# ignore
	} elsif ($char eq '<') {
	my ($uri) = $line =~ m/^<([^>]*)>/;
	substr($line, 0, length($uri)+2) = '';
	$node = $uri; # TODO: _unescape($uri, $lineno) );
	} elsif ($char eq '_') {
	my ($name) = $line =~ m/^_:([A-Za-z][A-Za-z0-9]*)/;
	substr($line, 0, length($name)+2) = '';
	$node = "_:$name";
	} elsif ($char eq '"') {
	substr($line, 0, 1) = '';
	my $value = decode('utf8', '');
	while (length($line) and substr($line, 0, 1) ne '"') {
	while ($line =~ m/^([^"\\]+)/) {
	$value .= $1;
	substr($line,0,length($1)) = '';
	}
	if (substr($line,0,1) eq '\\') {
	while ($line =~ m/^\\(.)/) {
	if ($1 eq 't') {
	$value .= "\t";
	substr($line,0,2) = '';
	} elsif ($1 eq 'r') {
	$value .= "\r";
	substr($line,0,2) = '';
	} elsif ($1 eq 'n') {
	$value .= "\n";
	substr($line,0,2) = '';
	} elsif ($1 eq '"') {
	$value .= '"';
	substr($line,0,2) = '';
	} elsif ($1 eq '\\') {
	$value .= "\\";
	substr($line,0,2) = '';
	} elsif ($1 eq 'u') {
	$line =~ m/^\\u([0-9A-F]{4})/ or die qq[Bad N-Triples \\u escape at line $lineno, near "$line"];
	$value .= chr(oct('0x' . $1));
	substr($line,0,6) = '';
	} elsif ($1 eq 'U') {
	$line =~ m/^\\U([0-9A-F]{8})/ or die qq[Bad N-Triples \\U escape at line $lineno, near "$line"];
	$value .= chr(oct('0x' . $1));
	substr($line,0,10) = '';
	} else {
	die qq[Not valid N-Triples escape character '\\$1' at line $lineno, near "$line"];
	}
	}
	}
	}
	if (substr($line,0,1) eq '"') {
	substr($line,0,1) = '';
	} else {
	die qq[Ending double quote not found at line $lineno];
	}

	if ($line =~ m/^@([a-z]+(-[a-zA-Z0-9]+)*)/) {
	my $lang = $1;
	substr($line,0,1+length($lang)) = '';
	# ignore the language
	$node = $value;
	} elsif (substr($line,0,3) eq '^^<') {
	substr($line,0,3) = '';
	my ($uri) = $line =~ m/^([^>]*)>/;
	substr($line, 0, length($uri)+1) = '';
	# ignore the datatype
	$node = $value;
	} else {
	$node = $value;
	}
	} else {
	die qq[Not valid N-Triples node start character '$char' at line $lineno, near "$line"];
	}

	die "expected $which at line $lineno" unless $node;
	push(@nodes, $node);
	$line =~ s/^\s*//;
	}
	$line =~ s/^\s//g;
	unless ($line eq '.') {
	die "Missing expected '.' at line $lineno";
	}

	my $subj = $nodes[0];
	my $pred = $predicates{ $nodes[1] };
	my $obj = $nodes[2];
	if ( $pred ) {
	if ( not exists $triples->{$subj} ) {
	$triples->{$subj} = { $pred => [ $obj ] };
	#} elsif ( not exists $triples->{$subj}->{$pred} ) {
	} else {
	push @{ $triples->{$subj}->{$pred} }, $obj;
	}
	}

	if (@extra) {
	$line = shift(@extra);
	goto LINE;
	}
	}


	###
	### Collect skos:Concepts and associated collections
	###

	my $concepts = {};
	my @top_concepts; # notations of top concepts
	my $collections = {};

	# check whether a resource is a skos:Concept
	sub isa_concept {
	return unless $_[0] and $_[0]->{a};
	return grep { $_ eq 'http://www.w3.org/2008/05/skos#Concept' }
	@{$_[0]->{a}};
	}

	# chek whether an object is unique for its subject and predicate
	sub get_uniqueobject {
	return $_[0]->[0] if $_[0] and @{$_[0]} == 1;
	}

	sub get_concept_notation {
	my $uri = shift;
	my $concept = $triples->{$uri};
	return unless isa_concept( $concept );
	return get_uniqueobject( $concept->{notation} );
	}

	while ( my ($uri,$c) = each %$triples ) {
	my $message = "";

	if ( isa_concept($c) ) {
	my $notation = get_uniqueobject( $c->{notation} );
	my $label = get_uniqueobject( $c->{label} );
	unless ( defined $label ) {
	$label = get_uniqueobject( $c->{title} );
	}
	if ( defined $notation and defined $label ) {
	die "Notation $notation is not unique"
	if $concepts->{ $notation };
	$concepts->{ $notation } = {
	label => $label,
	uri => $uri
	};
	if ( $c->{broader} ) {
	my $b_uri = $c->{broader}->[0];
	my $broader = get_concept_notation( $b_uri );
	if ( $broader ) {
	$concepts->{ $notation }->{broader} = $broader;
	} else {
	$message = "Concept or notation not found: "
	. $b_uri;
	}
	} else {
	push @top_concepts, $notation;
	}
	} else {
	$message = "$uri must have exactely one notation and one label";
	}
	} elsif ( $c->{subject} ) {

	# We may change this limitation in a future version:
	my $c_uri = get_uniqueobject( $c->{subject} );

	if ($c_uri) {
	my $notation = get_concept_notation( $c_uri );
	if ( $notation ) {
	push @{ $collections->{$notation} },
	{ map { $_ => $c->{$_}->[0] } keys %$c };
	} else {
	$message = "$c_uri (dct:subject of $uri) must be "
	. "a skos:Concept with one skos:notation.";
	}
	} else {
	$message = "$uri must have only one subject";
	}
	} else {
	next; # ignore all other resources
	}

	if ( $message ) {
	print STDERR $message . "\n";
	}
	}

	###
	### For now we only want one collection per concept
	###

	while ( my ($notation, $c) = each %$collections ) {
	if (@$c != 1) {
	print STDERR "$notation should only have one collection.\n";
	next;
	}
	# We will add 'foaf:page' in a later version
	$concepts->{$notation}->{extent} = $c->[0]->{extent};
	}

	###
	### Now we have a nice hashref $concepts to validate
	###

	while ( my ($notation, $c) = each %$concepts ) {
	next unless $c->{broader};
	push @{ $concepts->{ $c->{broader} }->{narrower} }, $notation;
	}

	sub traverse {
	my $c = $concepts->{$_[0]};
	if ( $c->{visited} ) {
	die "there is a loop or diamond in your 'tree'";
	exit;
	}
	$c->{visited} = 1;
	$c->{size} = $c->{extent} unless $c->{size};
	return unless $c->{narrower};
	foreach my $n (@{$c->{narrower}}) {
	traverse( $n );
	$c->{size} += $concepts->{$n}->{size} if ($concepts->{$n}->{size});
	}
	}

	traverse( $_ ) for @top_concepts;

	delete $_->{visited} for values %$concepts;

	my $csv = Text::CSV->new({binary => 1,always_quote=>1});

	# notation, path, level
	sub print_stat {
	my $n = shift;
	my $path = shift \|\| "";
	my $level = shift \|\| 1;

	my $c = $concepts->{$n};

	my @row = ($level, $path, $n, $c->{label}, $c->{extent}, $c->{size} );

	$csv->print( $output, \@row );
	print $output "\n";

	$path = "$path > " if $path;
	foreach my $narrower (@{$c->{narrower}}) {
	print_stat ( $narrower, "$path$n", $level+1 );
	}
	}

	$csv->print( $output, [qw(level path notation label)] );
	print $output "\n";

	foreach my $n (sort @top_concepts) {
	print_stat( $n );
	}

	=head1 COPYRIGHT

	Copyright (c) 2010 Jakob Voss. All rights reserved. This program is
	free software; you can redistribute it and/or modify it under the
	same terms as Perl itself.

	=cut