Skip to content

Instantly share code, notes, and snippets.

@sevaa
Last active June 5, 2024 18:14
JSON_VALIDATE.sql
create or alter function dbo.JSON_VALIDATE(
@doc nvarchar(max), -- JSON document to validate
@schema nvarchar(max), -- JSON schema to validate against
-- Pass nulls, those parameters are for recursive invokations
@res1 bit = null,
@res2 bit = null,
@schema_path nvarchar(max) = null,
@type tinyint = null,
@root nvarchar(max) = null)
returns @t table(
schema_path nvarchar(max) not null,
error nvarchar(max) not null)
as
begin
-- References:
-- https://json-schema.org/draft/2020-12/json-schema-core
-- https://json-schema.org/draft/2020-12/json-schema-validation for array/object contents keywords
-- Scalar schemata true an false are not supported
if @schema_path is null -- Topmost invokation
begin
if isjson(@schema) = 0
begin
insert into @t values('', 'The schema is not a valid JSON')
return
end
-- isjson() in MSSQL before 2022 only returns 1 for arrays and objects
if isjson(@doc) = 0 --Top level scalars
begin
if isjson('['+@doc+']') = 0 -- Not a JSON container and not a JSON scalar either
begin
insert into @t values('', 'The document is not a valid JSON')
return
end
end
select top 1 @type = [type] from openjson('[' + @doc + ']')
-- Special processing for top level strings - extract them, so that @doc looks like a value from openjson()
if @type = 1
set @doc = json_value('[' + @doc + ']', '$[0]')
-- Schema path at the document root
set @schema_path = isnull(json_value(@schema, '$.id'), '') + '#'
end
-- Now validate in earnest
declare @ref nvarchar(max) = json_value(@schema, '$."$ref"')
if @ref is not null
begin
-- TODO: fail on full URL refs
-- Are relative refs possible?
set @schema_path = @ref
set @schema = json_query(@root, '$' + replace(@ref, '/', '.'))
end
declare @sc_type varchar(20), @sc_properties nvarchar(max), @sc_required nvarchar(max)
declare @sc_additionalProperties bit, @sc_items nvarchar(max), @sc_anyOf nvarchar(max)
declare @sc_allOf nvarchar(max), @sc_oneOf nvarchar(max), @sc_not nvarchar(max)
declare @sc_if nvarchar(max), @sc_then nvarchar(max), @sc_else nvarchar(max)
declare @sc_enum nvarchar(max), @sc_const nvarchar(max)
declare @sc_minItems int, @sc_maxItems int, @sc_uniqueItems bit, @sc_contains nvarchar(max)
declare @sc_minContains int, @sc_maxContains int, @sc_maxProperties int, @sc_minProperties int
declare @sc_minLength int, @sc_maxLength int
declare @sc_minimum decimal(20, 10), @sc_maximum decimal(20, 10), @sc_multipleOf decimal(20, 10)
declare @sc_exclusiveMinimum decimal(20, 10), @sc_exclusiveMaximum decimal(20, 10)
select top 1
@sc_type = [type],
@sc_properties = [properties],
@sc_additionalProperties = additionalProperties,
@sc_required = [required],
@sc_items = items,
@sc_anyOf = anyOf,
@sc_allOf = allOf,
@sc_oneOf = oneOf,
@sc_not = [not],
@sc_if = [if],
@sc_then = [then],
@sc_else = [else],
@sc_enum = enum,
@sc_contains = [contains],
@sc_minItems = minItems,
@sc_maxItems = maxItems,
@sc_uniqueItems = uniqueItems,
@sc_minContains = minContains,
@sc_maxContains = maxContains,
@sc_maxProperties = maxProperties,
@sc_minProperties = minProperties,
@sc_const = const,
@sc_contains = [contains],
@sc_minLength = minLength,
@sc_maxLength = maxLength,
@sc_minimum = minimum,
@sc_maximum = maximum,
@sc_exclusiveMinimum = exclusiveMinimum,
@sc_exclusiveMaximum = exclusiveMaximum
from openjson(@schema) with (
type varchar(20), -- Can also be an array
properties nvarchar(max) as json,
additionalProperties bit,
[required] nvarchar(max) as json,
items nvarchar(max) as json,
anyOf nvarchar(max) as json,
allOf nvarchar(max) as json,
oneOf nvarchar(max) as json,
[not] nvarchar(max) as json,
[if] nvarchar(max) as json,
[then] nvarchar(max) as json,
[else] nvarchar(max) as json,
enum nvarchar(max) as json,
const nvarchar(max) as json,
[contains] nvarchar(max) as json,
minItems int,
maxItems int,
uniqueItems bit,
minContains int,
maxContains int,
maxProperties int,
minProperties int,
minLength int,
maxLength int,
minimum decimal(20,10),
maximum decimal(20,10),
exclusiveMinimum decimal(20,10),
exclusiveMaximum decimal(20,10))
---------------------------- Check type. Mismatch terminates further processing.
declare @type_desc nvarchar(20) = case @type
when 0 then 'null'
when 1 then 'string'
when 2 then 'number'
when 3 then 'boolean'
when 4 then 'array'
when 5 then 'object' end
if json_query(@schema, '$.type') is not null -- Type is an array
begin
-- TODO: integer as distinct from number
if @type_desc not in (select value from openjson(@schema, '$.type'))
begin
insert into @t values(@schema_path + '/type', 'Expected ' + (select string_agg(value, ', ') from openjson(@schema, '$.type')) + ', found ' + isnull(@type_desc, '?'))
return
end
end
else -- Type is a single string
begin
if @sc_type is not null
begin
if @type_desc <> @sc_type
begin
-- Number in JSON, integer in schema
if @type = 2 and @sc_type = 'integer' and cast(@doc as decimal(20, 10)) % 1 <> 0
begin
insert into @t values(@schema_path + '/type', 'Expected an integer, found ' + isnull(@doc, '?'))
end
else
begin
insert into @t values(@schema_path + '/type', 'Expected ' + isnull(@sc_type, '?') + ', found ' + isnull(@type_desc, '?'))
end
return
end
end
end
-- Blank type a possibility. No type, no check.
---------------------------- Objects: recurse down properties
if @type = 5
begin
declare @fields table(field nvarchar(max) collate Latin1_General_BIN2,
value nvarchar(max) collate Latin1_General_BIN2,
datatype tinyint,
sc_property nvarchar(max))
insert into @fields
select f.[key], f.value, f.type, p.value
from openjson(@doc) f
left join openjson(@sc_properties) p on f.[key] = p.[key]
-- Is it a closed schema?
if isnull(@sc_additionalProperties, 1) = 0
begin
insert into @t
select @schema_path + '/additionalProperties', 'Unknown property: ' + field + ': ' + iif(datatype in (4,5), '[collection]', value)
from @fields
where sc_property is null
end
-- Is there a required set of fields?
if @sc_required is not null
begin
insert into @t
select @schema_path + '/required', 'Missing required property: ' + r.[value]
from openjson(@sc_required) r
left join @fields on r.[value] = field
where field is null
end
-- minProperties
if @sc_minProperties is not null and @sc_minProperties > (select count(*) from openjson(@doc))
begin
insert into @t values(@schema_path + '/minProperties', 'Not enough properties, expected at least ' + cast(@sc_minProperties as varchar))
end
-- maxProperties
if @sc_maxProperties is not null and @sc_maxProperties < (select count(*) from openjson(@doc))
begin
insert into @t values(@schema_path + '/maxProperties', 'Too many properties, expected at most ' + cast(@sc_maxProperties as varchar))
end
--TODO: dependentRequired
--TODO: patternProperties, but no regex
--TODO: propertyNames
-- Validate each element against its schema, where known
insert into @t
select schema_path, error
from @fields f
cross apply dbo.JSON_VALIDATE(value, sc_property, null, null, @schema_path + '/properties/' + field, datatype, @root)
where sc_property is not null
end
--------------------------- Arrays
if @type = 4
begin
-- Validate item count
if @sc_minItems is not null and @sc_minItems > 0 and @sc_minItems > (select count([key]) from openjson(@doc))
begin
insert into @t values(@schema_path + '/minItems', 'Not enough items, expected at least ' + cast(@sc_minItems as varchar))
end
if @sc_maxItems is not null and @sc_maxItems > 0 and @sc_maxItems < (select count([key]) from openjson(@doc))
begin
insert into @t values(@schema_path + '/maxItems', 'Too many items, expected at most ' + cast(@sc_maxItems as varchar))
end
-- Validate item uniqueness
if isnull(@sc_uniqueItems, 0) <> 0 and exists(select count(*) as n from openjson(@doc) group by value having count(*) > 1)
begin
insert into @t values(@schema_path + '/uniqueItems', 'Items are not unique.')
end
--TODO: maxContains, minContains together with contains
--TOTO: prefixItems
-- Validate each item
if @sc_items is not null
begin
insert into @t
select schema_path, error
from openjson(@doc)
cross apply dbo.JSON_VALIDATE(value, @sc_items, null, null, @schema_path + '/items', type, @root)
end
end
--------------------------- Strings
if @type = 1
begin
-- minLength
if @sc_minLength is not null and datalength(@doc) < @sc_minLength
begin
insert into @t values(@schema_path + '/minLength', 'Too short, expected ' + cast(@sc_minLength as varchar(20)))
end
-- maxLength
if @sc_maxLength is not null and datalength(@doc) > @sc_maxLength
begin
insert into @t values(@schema_path + '/maxLength', 'Too long, expected ' + cast(@sc_minLength as varchar(20)))
end
-- TODO: pattern
-- Pattern for strings can't be done - as of the time of this writing,
-- there is no regex in SQL server without privileged operations.
end
--------------------------- Numbers
if @type = 2
begin
-- Fractional datatype pulled out of thin air :(
declare @nvalue int = cast(@doc as decimal(20,10))
-- minimum
if @sc_minimum is not null and @nvalue < @sc_minimum
begin
insert into @t values(@schema_path + '/minimum', 'Too small, expected at least ' + cast(@sc_minimum as varchar(30)))
end
-- maximum
if @sc_maximum is not null and @nvalue > @sc_maximum
begin
insert into @t values(@schema_path + '/maximum', 'Too large, expected at most ' + cast(@sc_minimum as varchar(30)))
end
-- exclusiveMinimum
if @sc_exclusiveMinimum is not null and @nvalue <= @sc_exclusiveMinimum
begin
insert into @t values(@schema_path + '/exclusiveMinimum', 'Too small, expected greater than ' + cast(@sc_exclusiveMinimum as varchar(30)))
end
-- exclusiveMaximum
if @sc_exclusiveMaximum is not null and @nvalue >= @sc_exclusiveMaximum
begin
insert into @t values(@schema_path + '/exclusiveMaximum', 'Too large, expected less than ' + cast(@sc_exclusiveMinimum as varchar(30)))
end
-- multipleOf
if @sc_multipleOf is not null and @nvalue % @sc_multipleOf <> 0
begin
insert into @t values(@schema_path + '/multipleOf', 'Expected a multiple of ' + cast(@sc_multipleOf as varchar(30)))
end
end
------------------------------------- Check enum
-- If comparing nontrivial nested JSON, assuming it is canonical; whitespace might get in the way.
if @sc_enum is not null and not exists(select [key]
from openjson(@sc_enum)
where value = @doc)
begin
insert into @t values(@schema_path + '/enum', 'Unknown enum value: ' + isnull(@doc, ''))
end
------------------------------------- Check const
-- If comparing nontrivial nested JSON, assuming it is canonical; whitespace might get in the way.
if @sc_const is not null and @doc <> @sc_const
begin
insert into @t values(@schema_path + '/const', 'Expected a const value of ' + @sc_const + ', got ' + isnull(@doc, ''))
end
------------------------------------- Check anyOf
if @sc_anyOf is not null and not exists(select [key]
from openjson(@sc_anyOf) ao
where not exists (select * from dbo.JSON_VALIDATE(@doc, value, null, null, @schema_path + '/enyOf', @type, @root)))
begin
insert into @t values(@schema_path + '/anyOf', 'Does not match any schema in anyOf')
end
------------------------------------- Check allOf
if @sc_allOf is not null and exists(select [key]
from openjson(@sc_allOf) ao
where exists (select * from dbo.JSON_VALIDATE(@doc, value, null, null, @schema_path + '/allOf', @type, @root)))
begin
insert into @t values(@schema_path + '/allOf', 'Does not match every schema in allOf')
end
------------------------------------- Check oneOf
if @sc_oneOf is not null and 1 <> (select count([key])
from openjson(@sc_allOf) ao
where not exists (select * from dbo.JSON_VALIDATE(@doc, value, null, null, @schema_path + '/oneOf', @type, @root)))
begin
insert into @t values(@schema_path + '/oneOf', 'Does not match exactly one schema in oneOf')
end
------------------------------------- Check not
if @sc_not is not null and 0 = (select count(*)
from dbo.JSON_VALIDATE(@doc, @sc_not, null, null, @schema_path + '/not', @type, @root))
begin
insert into @t values(@schema_path + '/not', 'Matches the schema under not')
end
------------------------------------- If/then/else
if @sc_if is not null
begin
if @sc_then is not null and
0 = (select count(*) from dbo.JSON_VALIDATE(@doc, @sc_if, null, null, @schema_path + '/if', @type, @root))
begin
insert into @t
select @schema_path, error
from dbo.JSON_VALIDATE(@doc, @sc_then, null, null, @schema_path + '/then', @type, @root)
end
if @sc_else is not null and
0 <> (select count(*) from dbo.JSON_VALIDATE(@doc, @sc_if, null, null, @schema_path + '/if', @type, @root))
begin
insert into @t
select @schema_path, error
from dbo.JSON_VALIDATE(@doc, @sc_else, null, null, @schema_path + '/else', @type, @root)
end
end
-- TODO: dependentSchemas
return
end
go
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment