|
#!/usr/bin/perl -w |
|
|
|
use strict; |
|
use JSON; |
|
|
|
package FrictionlessData; |
|
|
|
our $VERSION = "0.1"; |
|
|
|
my @errors; |
|
|
|
sub new { |
|
### What properties do we need to capture here? |
|
|
|
my $class = shift; |
|
my $self = {}; |
|
bless $self, $class; |
|
|
|
return $self; |
|
} |
|
|
|
sub validate { |
|
|
|
my ( $self, $datapackage, $schema ) = @_; |
|
if ( defined $schema ) { |
|
|
|
############################################################################################ |
|
# The schema conversion should be in two functions: |
|
# - required_fields |
|
# |
|
# return an array of field names which must be present or the document isn't valid |
|
# |
|
# - field_properties |
|
# |
|
# return a hash of properties and their values |
|
# |
|
############################################################################################ |
|
|
|
my $ds = convert_json_to_perl($self, $schema); |
|
|
|
my @required; # An array of field names for required properties - simple check, so if it's invalid, it can reject quickly |
|
my %properties; # A hash of field names and validation criteria - fieldname_[anyOf/oneOf_]_property : value / array / hash |
|
if ($ds) { # else returns an error message explaining that the schema is missing |
|
@required = $self->required_fields($ds); |
|
|
|
%properties = $self->field_properties($ds); |
|
} else { |
|
push @errors, "The Schema is not valid JSON"; |
|
} |
|
|
|
|
|
if ( defined $datapackage ) { |
|
my $dp = $self->convert_json_to_perl($datapackage); |
|
|
|
if ($dp) { |
|
my @datapackage_keys = keys %$dp; |
|
foreach my $rq ( @required ) { |
|
next if exists $$dp{$rq}; |
|
push @errors, "Missing required field \"$rq\""; |
|
} |
|
################################################################################################# |
|
# |
|
# This will be where we match the data package against the schema |
|
# |
|
################################################################################################# |
|
|
|
my @property_keys = keys %properties; |
|
|
|
foreach my $datapackage_key (@datapackage_keys) { |
|
foreach my $property_key ( @property_keys ) { |
|
if ( $property_key =~ /^$datapackage_key/ ){ |
|
my $validate_result = $self->validate_field( $datapackage_key, $$dp{$datapackage_key}, $property_key, $properties{$property_key} ); |
|
} |
|
} |
|
|
|
} |
|
} else { |
|
push @errors, "The Data Package is not valid JSON"; |
|
} |
|
} else { |
|
push @errors, "Please enter a file to validate."; |
|
} |
|
} else { |
|
push @errors, "Please enter a schema."; |
|
} |
|
|
|
if (scalar @errors > 0) { |
|
foreach my $err (@errors) { |
|
print "$err\n"; |
|
} |
|
return 0; |
|
} else { |
|
return 1; |
|
# print "The data package is valid\n"; |
|
} |
|
} |
|
|
|
sub convert_json_to_perl { |
|
my ($self, $json) = @_; |
|
|
|
my $json_parser = JSON->new; # $json_parser is the JSON parser object that we use to convert the JSON file to a perl data structure |
|
|
|
################################################################# |
|
# # |
|
# needs more work here on extracting a URI as well as from file # |
|
# # |
|
################################################################# |
|
|
|
open(JSONFILE, "$json") || die $!; |
|
##### Convert the json to a perl data structure |
|
my $perl_data = join( "", <JSONFILE>); #perl_data is the data package schema converted from a file to a string |
|
close(JSONFILE); |
|
|
|
$perl_data = eval{ $json_parser->decode( $perl_data ) }; # $perl_data is the json string converted into a perl data structure |
|
|
|
return $perl_data; |
|
} |
|
|
|
sub required_fields { |
|
##################################################################################### |
|
# # |
|
# returns an array of field names which must be present or the document isn't valid # |
|
# # |
|
##################################################################################### |
|
my ($self, $schema) = @_; |
|
|
|
my @required; |
|
|
|
foreach my $schemakey ( keys %$schema ) { |
|
next unless $schemakey eq "required"; |
|
my $required_fields = $$schema{$schemakey}; |
|
foreach my $field (@$required_fields) { |
|
push @required, $field; |
|
} |
|
} |
|
|
|
return @required; |
|
|
|
} |
|
|
|
sub field_properties { |
|
################################################# |
|
# # |
|
# returns a hash of properties and their values # |
|
# # |
|
################################################# |
|
|
|
my ($self, $schema) = @_; |
|
my %properties; |
|
|
|
foreach my $schemakey (keys %$schema) { |
|
if ( $schemakey eq "properties" ) { |
|
my $properties = $$schema{$schemakey}; |
|
|
|
# The definitions of the properties are held in a separate file - this just checks that they are all referencing the same file |
|
my %reffiles; |
|
foreach my $property ( keys %$properties ) { |
|
my $property_definitions = $$properties{$property}; |
|
foreach my $definition ( keys %$property_definitions ) { |
|
if ( $definition eq '$ref' ) { |
|
my $ref = $$property_definitions{$definition}; |
|
my @refs = split( '#', $ref ); |
|
push @{$reffiles{$refs[0]}}, $refs[1]; |
|
} else { |
|
next if $definition eq "title"; |
|
next if $definition eq "description"; |
|
next if $definition eq "propertyOrder"; |
|
next if $definition eq "format"; |
|
my $deftype = $$property_definitions{$definition}; |
|
$properties{$property."_".$definition} = $deftype; |
|
} |
|
} |
|
} |
|
foreach my $reffile ( keys %reffiles ) { |
|
my $rf = $self->convert_json_to_perl( $reffile); |
|
if ($rf) { |
|
my @defs = @{$reffiles{$reffile}}; |
|
my %definitions_container; # Here, we're just getting to the actual list of definitions. There's probably only one item in this hash - "define" with the value being the hash of definitions. |
|
foreach my $def (@defs) { |
|
my @defpath = split('/', $def); |
|
$definitions_container{$defpath[1]} = $$rf{$defpath[1]}; |
|
} |
|
foreach my $definition ( keys %definitions_container) { |
|
my $field_definitions = $definitions_container{$definition}; #Here we get the field definitions from the container. |
|
foreach my $field_name (keys %$field_definitions) { |
|
my $field_properties = $$field_definitions{$field_name}; |
|
foreach my $field_property (keys %$field_properties) { |
|
next if $field_property eq "title"; |
|
next if $field_property eq "description"; |
|
my $property_value = $$field_properties{$field_property}; |
|
$properties{$field_name."_".$field_property} = $property_value; |
|
} |
|
} |
|
} |
|
} |
|
} |
|
} |
|
} |
|
|
|
return %properties; |
|
|
|
|
|
} |
|
|
|
sub validate_field { |
|
|
|
my ( $self, $field_name, $field_value, $property_name, $properties ) = @_; |
|
|
|
if ( ref($properties) eq "ARRAY" ) { |
|
my @properties = split("_", $property_name); |
|
my $property = $properties[1]; |
|
if ( $property eq "oneOf" ) { |
|
return one_of( $field_name, $field_value, $property_name, $properties ); |
|
} elsif ( $property eq "anyOf" ) { |
|
return any_of( $field_name, $field_value, $property_name, $properties ); |
|
} else { |
|
return 0; |
|
} |
|
} elsif ( ref($properties) eq "HASH" ) { |
|
push @errors, "HASH types don't have a validation script yet."; |
|
return 0; |
|
} else { |
|
my @properties = split("_", $property_name); |
|
my $property = $properties[1]; |
|
if ( $property eq "type" ) { |
|
return $self->is_type($properties, $field_value); |
|
} elsif ( $property eq "minItems" ) { |
|
return $self->has_min_items( $properties, $field_value ); |
|
} elsif ( $property eq "pattern" ) { |
|
return $self->matches_pattern( $properties, $field_value ); |
|
} else { |
|
push @errors, "This property ($property) doesn't have a validation script yet."; |
|
return 0; |
|
} |
|
} |
|
|
|
} |
|
|
|
sub sub_properties { |
|
my ( $self, $property_name, @subarray ) = @_; |
|
|
|
my $properties; |
|
|
|
foreach my $definitions ( @subarray) { |
|
if ( ref( $definitions ) eq "HASH" ) { |
|
foreach my $field_name (keys %$definitions) { |
|
next if $field_name eq "title"; |
|
next if $field_name eq "description"; |
|
push @{$$properties{$property_name . "_". $field_name}}, $$definitions{$field_name}; |
|
} |
|
} |
|
} |
|
|
|
foreach my $k ( keys %$properties ) { |
|
foreach my $i ( @{$$properties{$k}} ) { |
|
next unless ref($i) eq "ARRAY"; |
|
if ( scalar @$i == 1 ) { |
|
$$properties{$k} = @$i[0]; |
|
} |
|
} |
|
} |
|
|
|
return $properties; |
|
} |
|
|
|
sub is_type { |
|
|
|
my ( $self, $type, $value ) = @_; |
|
|
|
if ( $type eq "array" ) { |
|
if ( ref($value) eq "ARRAY" ) { |
|
return 1; |
|
} else { |
|
push @errors, "Type should be ARRAY"; |
|
return 0; |
|
} |
|
} elsif ( $type eq "object" ) { |
|
if ( ref($value) eq "HASH" ) { |
|
return 1; |
|
} else { |
|
push @errors, "Type should be OBJECT"; |
|
return 0; |
|
} |
|
} elsif ( $type eq "integer" ) { |
|
if ( $value =~/^([0-9])+$/ ) { |
|
return 1; |
|
} else { |
|
push @errors, "Type should be INTEGER"; |
|
return 0; |
|
} |
|
} elsif ( $type eq "string" ) { |
|
if ( !ref($value) ) { |
|
return 1; |
|
} else { |
|
push @errors, "Type should be STRING"; |
|
return 0; |
|
} |
|
} else { |
|
push @errors, "Type not tested"; |
|
return 0; |
|
} |
|
|
|
} |
|
|
|
sub has_min_items { |
|
|
|
my ( $self, $number, $value ) = @_; |
|
|
|
if ( scalar @$value >= $number ) { |
|
return 1; |
|
} else { |
|
push @errors, "Does not have the minimum number of items"; |
|
return 0; |
|
} |
|
|
|
} |
|
|
|
sub matches_pattern { |
|
|
|
my ( $self, $pattern, $value ) = @_; |
|
|
|
if ( $value =~ $pattern ) { |
|
return 1; |
|
} else { |
|
push @errors, "Does not match the given pattern"; |
|
return 0; |
|
} |
|
|
|
} |
|
|
|
sub one_of { |
|
|
|
## Check if one and only one of the conditions is met |
|
|
|
my ( $self, $field_name, $field_value, $property_name, $properties, ) = @_; |
|
|
|
my $subproperties = sub_properties( $property_name, @{$properties} ); |
|
|
|
if ( $subproperties ) { |
|
foreach my $subp ( keys %$subproperties ) { |
|
my @subprops = split("_", $subp); |
|
my $subprop = $subprops[-1]; |
|
my $count_true = 0; |
|
foreach my $prop (@$properties) { |
|
foreach my $sp ( keys %$prop ) { |
|
if ( $subprop eq $sp ) { |
|
if ( $sp eq "type" ) { |
|
$count_true += is_type( $$prop{$sp}, $field_value ); |
|
} elsif ( $sp eq "properties" ) { |
|
$count_true += 0; |
|
} elsif ( $sp eq "anyOf" ) { |
|
$count_true += 0; |
|
} |
|
|
|
} |
|
} |
|
} |
|
if ( $count_true == 1 ) { |
|
return 1; |
|
} else { |
|
push @errors, "$field_name does not have oneOf the required items."; |
|
return 0; |
|
} |
|
} |
|
} else { |
|
return 0; |
|
} |
|
} |
|
|
|
sub any_of { |
|
|
|
## Check if at least one of the conditions is met |
|
|
|
my ( $self, $field_name, $field_value, $property_name, $properties, ) = @_; |
|
|
|
my $subproperties = sub_properties( $property_name, @{$properties} ); |
|
|
|
if ( $subproperties ) { |
|
foreach my $subp ( keys %$subproperties ) { |
|
my @subprops = split("_", $subp); |
|
my $subprop = $subprops[-1]; |
|
my $count_true = 0; |
|
foreach my $prop (@$properties) { |
|
foreach my $sp ( keys %$prop ) { |
|
if ( $subprop eq $sp ) { |
|
if ( $sp eq "type" ) { |
|
$count_true += is_type( $$prop{$sp}, $field_value ); |
|
} elsif ( $sp eq "properties" ) { |
|
$count_true += 0; |
|
} elsif ( $sp eq "anyOf" ) { |
|
$count_true += 0; |
|
} |
|
|
|
} |
|
} |
|
} |
|
if ( $count_true >= 1 ) { |
|
return 1; |
|
} else { |
|
push @errors, "$field_name does not have anyOf the required items."; |
|
return 0; |
|
} |
|
} |
|
} else { |
|
push @errors, "$field_name does not have any sub-properties to test"; |
|
return 0; |
|
} |
|
} |
|
|
|
sub all_of { |
|
|
|
my ( $self, $pattern, $value ) = @_; |
|
|
|
## Check if all of the conditions are met |
|
|
|
} |
|
|
|
sub none_of { |
|
|
|
my ( $self, $pattern, $value ) = @_; |
|
|
|
## Check that none of the conditions is met |
|
|
|
} |
|
|
|
|
|
|
|
|
|
1; |
|
|
|
__END__ |
|
|
|
=head1 NAME |
|
|
|
FrictionlessData |
|
|
|
=head1 SYNOPSIS |
|
|
|
(example of code) |
|
|
|
=head1 DESCRIPTION |
|
|
|
A module for interacting with data packages made according to the OKF standard L<http://frictionlessdata.io/> |
|
|
|
=head1 LICENSE |
|
|
|
The MIT License (MIT) |
|
|
|
Copyright (c) 2016 University of Bath |
|
|
|
Permission is hereby granted, free of charge, to any person obtaining a copy of |
|
this software and associated documentation files (the "Software"), to deal in |
|
the Software without restriction, including without limitation the rights to |
|
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of |
|
the Software, and to permit persons to whom the Software is furnished to do so, |
|
subject to the following conditions: |
|
|
|
The above copyright notice and this permission notice shall be included in all |
|
copies or substantial portions of the Software. |
|
|
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS |
|
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR |
|
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER |
|
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN |
|
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
|
|
|
=head2 Methods |
|
|
|
=over 12 |
|
|
|
=item C<new> |
|
|
|
Returns a new FrictionlessData object. |
|
|
|
=item C<validated> |
|
|
|
Returns a boolean value indicating whether the data package is valid. |
|
|
|
=back |
|
|
|
=cut |