llamasoft/wordnet_parse.pl

## wordnet_parse.pl
#!/usr/bin/perl

while (my $line = <>) {
    # If no 8-digit byte offset is present, skip this line
    if ( $line !~ /^[0-9]{8}\s/ ) { next; }
    chomp($line);

    my @tokens = split(/ /, $line);
    shift(@tokens); # Byte offset
    shift(@tokens); # File number
    shift(@tokens); # Part of speech

    my $word_count = hex(shift(@tokens));
    foreach ( 1 .. $word_count ) {
        my $word = shift(@tokens);
        $word =~ tr/_/ /;
        $word =~ s/\(.*\)//;
        print $word, "\n";

        shift(@tokens); # Lexical ID
    }
}
	#!/usr/bin/perl

	while (my $line = <>) {
	# If no 8-digit byte offset is present, skip this line
	if ( $line !~ /^[0-9]{8}\s/ ) { next; }
	chomp($line);

	my @tokens = split(/ /, $line);
	shift(@tokens); # Byte offset
	shift(@tokens); # File number
	shift(@tokens); # Part of speech

	my $word_count = hex(shift(@tokens));
	foreach ( 1 .. $word_count ) {
	my $word = shift(@tokens);
	$word =~ tr/_/ /;
	$word =~ s/\(.*\)//;
	print $word, "\n";

	shift(@tokens); # Lexical ID
	}
	}