Last active
May 29, 2019 23:12
-
-
Save mlarraz/3e6bef9af50ba090085ca97551c6ec60 to your computer and use it in GitHub Desktop.
avro ruby diff (1.8.2 vs 1.9.0). extracted from https://github.com/apache/avro/compare/release-1.8.2...release-1.9.0
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
:100644 100644 3f9ddafa 7a61fe1f M lang/ruby/.gitignore | |
:100644 100644 20543da7 17161f52 M lang/ruby/Gemfile | |
:100644 100644 8008c46b 9fc48c2a M lang/ruby/Manifest | |
:100755 100755 8fea5656 a09617c3 M lang/ruby/build.sh | |
:100644 100644 c419ab1e 5a645663 M lang/ruby/lib/avro.rb | |
:100644 100644 c27c2dc1 e4650557 M lang/ruby/lib/avro/data_file.rb | |
:100644 100644 9dc41db4 31107217 M lang/ruby/lib/avro/io.rb | |
:100644 100644 1ac81294 f0816e83 M lang/ruby/lib/avro/ipc.rb | |
:000000 100644 00000000 e1b219d7 A lang/ruby/lib/avro/logical_types.rb | |
:100644 100644 6c210e19 9f509dde M lang/ruby/lib/avro/protocol.rb | |
:100644 100644 87f6fa48 4a766fdc M lang/ruby/lib/avro/schema.rb | |
:000000 100644 00000000 37401fc0 A lang/ruby/lib/avro/schema_compatibility.rb | |
:100644 100644 0a5bee5d 1506fcac M lang/ruby/lib/avro/schema_normalization.rb | |
:000000 100644 00000000 f9274f06 A lang/ruby/lib/avro/schema_validator.rb | |
:100644 100644 de2d9c49 a666f338 M lang/ruby/test/case_finder.rb | |
:100644 100644 9d276f7d 54fa8781 M lang/ruby/test/random_data.rb | |
:100644 100644 8fe05d70 0f13df35 M lang/ruby/test/test_datafile.rb | |
:100644 100644 f8f3da13 350d48bc M lang/ruby/test/test_io.rb | |
:000000 100644 00000000 8cd6819b A lang/ruby/test/test_logical_types.rb | |
:100644 100644 fda38824 1b036fe5 M lang/ruby/test/test_protocol.rb | |
:100644 100644 0668cf2f ed1c8eef M lang/ruby/test/test_schema.rb | |
:000000 100644 00000000 4ec60ef7 A lang/ruby/test/test_schema_compatibility.rb | |
:000000 100644 00000000 0126c354 A lang/ruby/test/test_schema_validator.rb | |
:100644 100644 111b9d77 1a1f12ee M lang/ruby/test/tool.rb | |
diff --git a/lang/ruby/.gitignore b/lang/ruby/.gitignore | |
index 3f9ddafa..7a61fe1f 100644 | |
--- a/lang/ruby/.gitignore | |
+++ b/lang/ruby/.gitignore | |
@@ -1,3 +1,9 @@ | |
tmp | |
data.avr | |
Gemfile.lock | |
+.bundle/ | |
+.gem/ | |
+avro.gemspec | |
+pkg/ | |
+.ruby-version | |
+.ruby-gemset | |
diff --git a/lang/ruby/Gemfile b/lang/ruby/Gemfile | |
index 20543da7..17161f52 100644 | |
--- a/lang/ruby/Gemfile | |
+++ b/lang/ruby/Gemfile | |
@@ -18,3 +18,4 @@ gem 'rake' | |
gem 'echoe' | |
gem 'multi_json' | |
gem 'snappy' | |
+gem 'test-unit' | |
diff --git a/lang/ruby/Manifest b/lang/ruby/Manifest | |
index 8008c46b..9fc48c2a 100644 | |
--- a/lang/ruby/Manifest | |
+++ b/lang/ruby/Manifest | |
@@ -9,16 +9,27 @@ lib/avro.rb | |
lib/avro/data_file.rb | |
lib/avro/io.rb | |
lib/avro/ipc.rb | |
+lib/avro/logical_types.rb | |
lib/avro/protocol.rb | |
lib/avro/schema.rb | |
+lib/avro/schema_compatibility.rb | |
+lib/avro/schema_normalization.rb | |
+lib/avro/schema_validator.rb | |
+test/case_finder.rb | |
test/random_data.rb | |
test/sample_ipc_client.rb | |
test/sample_ipc_http_client.rb | |
test/sample_ipc_http_server.rb | |
test/sample_ipc_server.rb | |
test/test_datafile.rb | |
+test/test_fingerprints.rb | |
test/test_help.rb | |
test/test_io.rb | |
+test/test_logical_types.rb | |
test/test_protocol.rb | |
+test/test_schema.rb | |
+test/test_schema_compatibility.rb | |
+test/test_schema_normalization.rb | |
+test/test_schema_validator.rb | |
test/test_socket_transport.rb | |
test/tool.rb | |
diff --git a/lang/ruby/build.sh b/lang/ruby/build.sh | |
index 8fea5656..a09617c3 100755 | |
--- a/lang/ruby/build.sh | |
+++ b/lang/ruby/build.sh | |
@@ -25,11 +25,11 @@ export GEM_HOME=.gem/ | |
export PATH="$PATH:.gem/bin" | |
# bootstrap bundler | |
-gem install --no-rdoc --no-ri bundler | |
+gem install --no-document -v 1.17.3 bundler | |
+bundle install | |
case "$1" in | |
test) | |
- bundle install | |
bundle exec rake test | |
;; | |
@@ -39,6 +39,7 @@ case "$1" in | |
clean) | |
bundle exec rake clean | |
+ rm -rf tmp avro.gemspec data.avr | |
;; | |
*) | |
diff --git a/lang/ruby/lib/avro.rb b/lang/ruby/lib/avro.rb | |
index c419ab1e..5a645663 100644 | |
--- a/lang/ruby/lib/avro.rb | |
+++ b/lang/ruby/lib/avro.rb | |
@@ -32,6 +32,15 @@ module Avro | |
super(msg) | |
end | |
end | |
+ | |
+ class << self | |
+ attr_writer :disable_field_default_validation | |
+ | |
+ def disable_field_default_validation | |
+ @disable_field_default_validation ||= | |
+ ENV.fetch('AVRO_DISABLE_FIELD_DEFAULT_VALIDATION', '') != '' | |
+ end | |
+ end | |
end | |
require 'avro/schema' | |
@@ -40,3 +49,5 @@ require 'avro/data_file' | |
require 'avro/protocol' | |
require 'avro/ipc' | |
require 'avro/schema_normalization' | |
+require 'avro/schema_validator' | |
+require 'avro/schema_compatibility' | |
diff --git a/lang/ruby/lib/avro/data_file.rb b/lang/ruby/lib/avro/data_file.rb | |
index c27c2dc1..e4650557 100644 | |
--- a/lang/ruby/lib/avro/data_file.rb | |
+++ b/lang/ruby/lib/avro/data_file.rb | |
@@ -338,12 +338,29 @@ module Avro | |
def decompress(data) | |
load_snappy! | |
+ crc32 = data.slice(-4..-1).unpack('N').first | |
+ uncompressed = Snappy.inflate(data.slice(0..-5)) | |
+ | |
+ if crc32 == Zlib.crc32(uncompressed) | |
+ uncompressed | |
+ else | |
+ # older versions of avro-ruby didn't write the checksum, so if it | |
+ # doesn't match this must assume that it wasn't there and return | |
+ # the entire payload uncompressed. | |
+ Snappy.inflate(data) | |
+ end | |
+ rescue Snappy::Error | |
+ # older versions of avro-ruby didn't write the checksum, so removing | |
+ # the last 4 bytes may cause Snappy to fail. recover by assuming the | |
+ # payload is from an older file and uncompress the entire buffer. | |
Snappy.inflate(data) | |
end | |
def compress(data) | |
load_snappy! | |
- Snappy.deflate(data) | |
+ crc32 = Zlib.crc32(data) | |
+ compressed = Snappy.deflate(data) | |
+ [compressed, crc32].pack('a*N') | |
end | |
private | |
diff --git a/lang/ruby/lib/avro/io.rb b/lang/ruby/lib/avro/io.rb | |
index 9dc41db4..31107217 100644 | |
--- a/lang/ruby/lib/avro/io.rb | |
+++ b/lang/ruby/lib/avro/io.rb | |
@@ -5,9 +5,9 @@ | |
# to you under the Apache License, Version 2.0 (the | |
# "License"); you may not use this file except in compliance | |
# with the License. You may obtain a copy of the License at | |
-# | |
+# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
-# | |
+# | |
# Unless required by applicable law or agreed to in writing, software | |
# distributed under the License is distributed on an "AS IS" BASIS, | |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
@@ -43,9 +43,9 @@ module Avro | |
end | |
def byte! | |
- @reader.read(1).unpack('C').first | |
+ @reader.readbyte | |
end | |
- | |
+ | |
def read_null | |
# null is written as zero byte's | |
nil | |
@@ -76,7 +76,7 @@ module Avro | |
# The float is converted into a 32-bit integer using a method | |
# equivalent to Java's floatToIntBits and then encoded in | |
# little-endian format. | |
- @reader.read(4).unpack('e')[0] | |
+ read_and_unpack(4, 'e'.freeze) | |
end | |
def read_double | |
@@ -84,7 +84,7 @@ module Avro | |
# The double is converted into a 64-bit integer using a method | |
# equivalent to Java's doubleToLongBits and then encoded in | |
# little-endian format. | |
- @reader.read(8).unpack('E')[0] | |
+ read_and_unpack(8, 'E'.freeze) | |
end | |
def read_bytes | |
@@ -97,7 +97,7 @@ module Avro | |
# A string is encoded as a long followed by that many bytes of | |
# UTF-8 encoded character data. | |
read_bytes.tap do |string| | |
- string.force_encoding("UTF-8") if string.respond_to? :force_encoding | |
+ string.force_encoding('UTF-8'.freeze) if string.respond_to? :force_encoding | |
end | |
end | |
@@ -144,6 +144,23 @@ module Avro | |
def skip(n) | |
reader.seek(reader.tell() + n) | |
end | |
+ | |
+ private | |
+ | |
+ # Optimize unpacking strings when `unpack1` is available (ruby >= 2.4) | |
+ if String.instance_methods.include?(:unpack1) | |
+ | |
+ def read_and_unpack(byte_count, format) | |
+ @reader.read(byte_count).unpack1(format) | |
+ end | |
+ | |
+ else | |
+ | |
+ def read_and_unpack(byte_count, format) | |
+ @reader.read(byte_count).unpack(format)[0] | |
+ end | |
+ | |
+ end | |
end | |
# Write leaf values | |
@@ -159,7 +176,7 @@ module Avro | |
nil | |
end | |
- # a boolean is written as a single byte | |
+ # a boolean is written as a single byte | |
# whose value is either 0 (false) or 1 (true). | |
def write_boolean(datum) | |
on_disk = datum ? 1.chr : 0.chr | |
@@ -175,7 +192,6 @@ module Avro | |
# int and long values are written using variable-length, | |
# zig-zag coding. | |
def write_long(n) | |
- foo = n | |
n = (n << 1) ^ (n >> 63) | |
while (n & ~0x7F) != 0 | |
@writer.write(((n & 0x7f) | 0x80).chr) | |
@@ -189,7 +205,7 @@ module Avro | |
# equivalent to Java's floatToIntBits and then encoded in | |
# little-endian format. | |
def write_float(datum) | |
- @writer.write([datum].pack('e')) | |
+ @writer.write([datum].pack('e'.freeze)) | |
end | |
# A double is written as 8 bytes. | |
@@ -197,7 +213,7 @@ module Avro | |
# equivalent to Java's doubleToLongBits and then encoded in | |
# little-endian format. | |
def write_double(datum) | |
- @writer.write([datum].pack('E')) | |
+ @writer.write([datum].pack('E'.freeze)) | |
end | |
# Bytes are encoded as a long followed by that many bytes of data. | |
@@ -209,7 +225,7 @@ module Avro | |
# A string is encoded as a long followed by that many bytes of | |
# UTF-8 encoded character data | |
def write_string(datum) | |
- # FIXME utf-8 encode this in 1.9 | |
+ datum = datum.encode('utf-8'.freeze) if datum.respond_to? :encode | |
write_bytes(datum) | |
end | |
@@ -221,46 +237,7 @@ module Avro | |
class DatumReader | |
def self.match_schemas(writers_schema, readers_schema) | |
- w_type = writers_schema.type_sym | |
- r_type = readers_schema.type_sym | |
- | |
- # This conditional is begging for some OO love. | |
- if w_type == :union || r_type == :union | |
- return true | |
- end | |
- | |
- if w_type == r_type | |
- return true if Schema::PRIMITIVE_TYPES_SYM.include?(r_type) | |
- | |
- case r_type | |
- when :record | |
- return writers_schema.fullname == readers_schema.fullname | |
- when :error | |
- return writers_schema.fullname == readers_schema.fullname | |
- when :request | |
- return true | |
- when :fixed | |
- return writers_schema.fullname == readers_schema.fullname && | |
- writers_schema.size == readers_schema.size | |
- when :enum | |
- return writers_schema.fullname == readers_schema.fullname | |
- when :map | |
- return writers_schema.values.type == readers_schema.values.type | |
- when :array | |
- return writers_schema.items.type == readers_schema.items.type | |
- end | |
- end | |
- | |
- # Handle schema promotion | |
- if w_type == :int && [:long, :float, :double].include?(r_type) | |
- return true | |
- elsif w_type == :long && [:float, :double].include?(r_type) | |
- return true | |
- elsif w_type == :float && r_type == :double | |
- return true | |
- end | |
- | |
- return false | |
+ Avro::SchemaCompatibility.match_schemas(writers_schema, readers_schema) | |
end | |
attr_accessor :writers_schema, :readers_schema | |
@@ -293,7 +270,7 @@ module Avro | |
# function dispatch for reading data based on type of writer's | |
# schema | |
- case writers_schema.type_sym | |
+ datum = case writers_schema.type_sym | |
when :null; decoder.read_null | |
when :boolean; decoder.read_boolean | |
when :string; decoder.read_string | |
@@ -311,6 +288,8 @@ module Avro | |
else | |
raise AvroError, "Cannot read unknown schema type: #{writers_schema.type}" | |
end | |
+ | |
+ readers_schema.type_adapter.decode(datum) | |
end | |
def read_fixed(writers_schema, readers_schema, decoder) | |
@@ -336,7 +315,7 @@ module Avro | |
while block_count != 0 | |
if block_count < 0 | |
block_count = -block_count | |
- block_size = decoder.read_long | |
+ _block_size = decoder.read_long | |
end | |
block_count.times do | |
read_items << read_data(writers_schema.items, | |
@@ -355,7 +334,7 @@ module Avro | |
while block_count != 0 | |
if block_count < 0 | |
block_count = -block_count | |
- block_size = decoder.read_long | |
+ _block_size = decoder.read_long | |
end | |
block_count.times do | |
key = decoder.read_string | |
@@ -393,11 +372,11 @@ module Avro | |
writers_fields_hash = writers_schema.fields_hash | |
readers_fields_hash.each do |field_name, field| | |
unless writers_fields_hash.has_key? field_name | |
- if !field.default.nil? | |
+ if field.default? | |
field_val = read_default_value(field.type, field.default) | |
read_record[field.name] = field_val | |
else | |
- # FIXME(jmhodges) another 'unset' here | |
+ raise AvroError, "Missing data for #{field.type} with no default" | |
end | |
end | |
end | |
@@ -520,7 +499,7 @@ module Avro | |
if block_count < 0 | |
decoder.skip(decoder.read_long) | |
else | |
- block_count.times &blk | |
+ block_count.times(&blk) | |
end | |
block_count = decoder.read_long | |
end | |
@@ -538,8 +517,10 @@ module Avro | |
write_data(writers_schema, datum, encoder) | |
end | |
- def write_data(writers_schema, datum, encoder) | |
- unless Schema.validate(writers_schema, datum) | |
+ def write_data(writers_schema, logical_datum, encoder) | |
+ datum = writers_schema.type_adapter.encode(logical_datum) | |
+ | |
+ unless Schema.validate(writers_schema, datum, { recursive: false, encoded: true }) | |
raise AvroTypeError.new(writers_schema, datum) | |
end | |
@@ -574,6 +555,7 @@ module Avro | |
end | |
def write_array(writers_schema, datum, encoder) | |
+ raise AvroTypeError.new(writers_schema, datum) unless datum.is_a?(Array) | |
if datum.size > 0 | |
encoder.write_long(datum.size) | |
datum.each do |item| | |
@@ -584,6 +566,7 @@ module Avro | |
end | |
def write_map(writers_schema, datum, encoder) | |
+ raise AvroTypeError.new(writers_schema, datum) unless datum.is_a?(Hash) | |
if datum.size > 0 | |
encoder.write_long(datum.size) | |
datum.each do |k,v| | |
@@ -606,6 +589,7 @@ module Avro | |
end | |
def write_record(writers_schema, datum, encoder) | |
+ raise AvroTypeError.new(writers_schema, datum) unless datum.is_a?(Hash) | |
writers_schema.fields.each do |field| | |
write_data(field.type, datum[field.name], encoder) | |
end | |
diff --git a/lang/ruby/lib/avro/ipc.rb b/lang/ruby/lib/avro/ipc.rb | |
index 1ac81294..f0816e83 100644 | |
--- a/lang/ruby/lib/avro/ipc.rb | |
+++ b/lang/ruby/lib/avro/ipc.rb | |
@@ -5,9 +5,9 @@ | |
# to you under the Apache License, Version 2.0 (the | |
# "License"); you may not use this file except in compliance | |
# with the License. You may obtain a copy of the License at | |
-# | |
+# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
-# | |
+# | |
# Unless required by applicable law or agreed to in writing, software | |
# distributed under the License is distributed on an "AS IS" BASIS, | |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
@@ -74,10 +74,10 @@ module Avro::IPC | |
class ConnectionClosedException < Avro::AvroError; end | |
+ # Base class for the client side of a protocol interaction. | |
class Requestor | |
- """Base class for the client side of a protocol interaction.""" | |
- attr_reader :local_protocol, :transport | |
- attr_accessor :remote_protocol, :remote_hash, :send_protocol | |
+ attr_reader :local_protocol, :transport, :remote_protocol, :remote_hash | |
+ attr_accessor :send_protocol | |
def initialize(local_protocol, transport) | |
@local_protocol = local_protocol | |
@@ -100,7 +100,7 @@ module Avro::IPC | |
def request(message_name, request_datum) | |
# Writes a request message and reads a response or error message. | |
# build handshake and call request | |
- buffer_writer = StringIO.new('', 'w+') | |
+ buffer_writer = StringIO.new(''.force_encoding('BINARY')) | |
buffer_encoder = Avro::IO::BinaryEncoder.new(buffer_writer) | |
write_handshake_request(buffer_encoder) | |
write_call_request(message_name, request_datum, buffer_encoder) | |
@@ -193,9 +193,9 @@ module Avro::IPC | |
# * a one-byte error flag boolean, followed by either: | |
# * if the error flag is false, | |
# the message response, serialized per the message's response schema. | |
- # * if the error flag is true, | |
+ # * if the error flag is true, | |
# the error, serialized per the message's error union schema. | |
- response_metadata = META_READER.read(decoder) | |
+ _response_metadata = META_READER.read(decoder) | |
# remote response schema | |
remote_message_schema = remote_protocol.messages[message_name] | |
@@ -244,7 +244,7 @@ module Avro::IPC | |
# a response or error. Compare to 'handle()' in Thrift. | |
def respond(call_request, transport=nil) | |
buffer_decoder = Avro::IO::BinaryDecoder.new(StringIO.new(call_request)) | |
- buffer_writer = StringIO.new('', 'w+') | |
+ buffer_writer = StringIO.new(''.force_encoding('BINARY')) | |
buffer_encoder = Avro::IO::BinaryEncoder.new(buffer_writer) | |
error = nil | |
response_metadata = {} | |
@@ -257,7 +257,7 @@ module Avro::IPC | |
end | |
# read request using remote protocol | |
- request_metadata = META_READER.read(buffer_decoder) | |
+ _request_metadata = META_READER.read(buffer_decoder) | |
remote_message_name = buffer_decoder.read_string | |
# get remote and local request schemas so we can do | |
@@ -294,6 +294,7 @@ module Avro::IPC | |
end | |
rescue Avro::AvroError => e | |
error = AvroRemoteException.new(e.to_s) | |
+ # TODO does the stuff written here ever get used? | |
buffer_encoder = Avro::IO::BinaryEncoder.new(StringIO.new) | |
META_WRITER.write(response_metadata, buffer_encoder) | |
buffer_encoder.write_boolean(true) | |
@@ -393,7 +394,7 @@ module Avro::IPC | |
def read_framed_message | |
message = [] | |
loop do | |
- buffer = StringIO.new | |
+ buffer = StringIO.new(''.force_encoding('BINARY')) | |
buffer_length = read_buffer_length | |
if buffer_length == 0 | |
return message.join | |
@@ -410,7 +411,7 @@ module Avro::IPC | |
end | |
def write_framed_message(message) | |
- message_length = message.size | |
+ message_length = message.bytesize | |
total_bytes_sent = 0 | |
while message_length - total_bytes_sent > 0 | |
if message_length - total_bytes_sent > BUFFER_SIZE | |
@@ -426,7 +427,7 @@ module Avro::IPC | |
end | |
def write_buffer(chunk) | |
- buffer_length = chunk.size | |
+ buffer_length = chunk.bytesize | |
write_buffer_length(buffer_length) | |
total_bytes_sent = 0 | |
while total_bytes_sent < buffer_length | |
@@ -467,7 +468,7 @@ module Avro::IPC | |
end | |
def write_framed_message(message) | |
- message_size = message.size | |
+ message_size = message.bytesize | |
total_bytes_sent = 0 | |
while message_size - total_bytes_sent > 0 | |
if message_size - total_bytes_sent > BUFFER_SIZE | |
@@ -485,7 +486,7 @@ module Avro::IPC | |
private | |
def write_buffer(chunk) | |
- buffer_size = chunk.size | |
+ buffer_size = chunk.bytesize | |
write_buffer_size(buffer_size) | |
writer << chunk | |
end | |
@@ -505,13 +506,13 @@ module Avro::IPC | |
def read_framed_message | |
message = [] | |
loop do | |
- buffer = "" | |
+ buffer = ''.force_encoding('BINARY') | |
buffer_size = read_buffer_size | |
return message.join if buffer_size == 0 | |
- while buffer.size < buffer_size | |
- chunk = reader.read(buffer_size - buffer.size) | |
+ while buffer.bytesize < buffer_size | |
+ chunk = reader.read(buffer_size - buffer.bytesize) | |
chunk_error?(chunk) | |
buffer << chunk | |
end | |
@@ -541,7 +542,7 @@ module Avro::IPC | |
end | |
def transceive(message) | |
- writer = FramedWriter.new(StringIO.new) | |
+ writer = FramedWriter.new(StringIO.new(''.force_encoding('BINARY'))) | |
writer.write_framed_message(message) | |
resp = @conn.post('/', writer.to_s, {'Content-Type' => 'avro/binary'}) | |
FramedReader.new(StringIO.new(resp.body)).read_framed_message | |
diff --git a/lang/ruby/lib/avro/logical_types.rb b/lang/ruby/lib/avro/logical_types.rb | |
new file mode 100644 | |
index 00000000..e1b219d7 | |
--- /dev/null | |
+++ b/lang/ruby/lib/avro/logical_types.rb | |
@@ -0,0 +1,90 @@ | |
+# -*- coding: utf-8 -*- | |
+# Licensed to the Apache Software Foundation (ASF) under one | |
+# or more contributor license agreements. See the NOTICE file | |
+# distributed with this work for additional information | |
+# regarding copyright ownership. The ASF licenses this file | |
+# to you under the Apache License, Version 2.0 (the | |
+# "License"); you may not use this file except in compliance | |
+# with the License. You may obtain a copy of the License at | |
+# | |
+# http://www.apache.org/licenses/LICENSE-2.0 | |
+# | |
+# Unless required by applicable law or agreed to in writing, software | |
+# distributed under the License is distributed on an "AS IS" BASIS, | |
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
+# See the License for the specific language governing permissions and | |
+# limitations under the License. | |
+ | |
+require 'date' | |
+ | |
+module Avro | |
+ module LogicalTypes | |
+ module IntDate | |
+ EPOCH_START = Date.new(1970, 1, 1) | |
+ | |
+ def self.encode(date) | |
+ return date.to_i if date.is_a?(Numeric) | |
+ | |
+ (date - EPOCH_START).to_i | |
+ end | |
+ | |
+ def self.decode(int) | |
+ EPOCH_START + int | |
+ end | |
+ end | |
+ | |
+ module TimestampMillis | |
+ def self.encode(value) | |
+ return value.to_i if value.is_a?(Numeric) | |
+ | |
+ time = value.to_time | |
+ time.to_i * 1000 + time.usec / 1000 | |
+ end | |
+ | |
+ def self.decode(int) | |
+ s, ms = int / 1000, int % 1000 | |
+ Time.at(s, ms * 1000).utc | |
+ end | |
+ end | |
+ | |
+ module TimestampMicros | |
+ def self.encode(value) | |
+ return value.to_i if value.is_a?(Numeric) | |
+ | |
+ time = value.to_time | |
+ time.to_i * 1000_000 + time.usec | |
+ end | |
+ | |
+ def self.decode(int) | |
+ s, us = int / 1000_000, int % 1000_000 | |
+ Time.at(s, us).utc | |
+ end | |
+ end | |
+ | |
+ module Identity | |
+ def self.encode(datum) | |
+ datum | |
+ end | |
+ | |
+ def self.decode(datum) | |
+ datum | |
+ end | |
+ end | |
+ | |
+ TYPES = { | |
+ "int" => { | |
+ "date" => IntDate | |
+ }, | |
+ "long" => { | |
+ "timestamp-millis" => TimestampMillis, | |
+ "timestamp-micros" => TimestampMicros | |
+ }, | |
+ }.freeze | |
+ | |
+ def self.type_adapter(type, logical_type) | |
+ return unless logical_type | |
+ | |
+ TYPES.fetch(type, {}.freeze).fetch(logical_type, Identity) | |
+ end | |
+ end | |
+end | |
diff --git a/lang/ruby/lib/avro/protocol.rb b/lang/ruby/lib/avro/protocol.rb | |
index 6c210e19..9f509dde 100644 | |
--- a/lang/ruby/lib/avro/protocol.rb | |
+++ b/lang/ruby/lib/avro/protocol.rb | |
@@ -20,7 +20,7 @@ module Avro | |
VALID_TYPE_SCHEMA_TYPES_SYM = Set.new(VALID_TYPE_SCHEMA_TYPES.map(&:to_sym)) | |
class ProtocolParseError < Avro::AvroError; end | |
- attr_reader :name, :namespace, :types, :messages, :md5 | |
+ attr_reader :name, :namespace, :types, :messages, :md5, :doc | |
def self.parse(protocol_string) | |
json_data = MultiJson.load(protocol_string) | |
@@ -29,13 +29,14 @@ module Avro | |
namespace = json_data['namespace'] | |
types = json_data['types'] | |
messages = json_data['messages'] | |
- Protocol.new(name, namespace, types, messages) | |
+ doc = json_data['doc'] | |
+ Protocol.new(name, namespace, types, messages, doc) | |
else | |
raise ProtocolParseError, "Not a JSON object: #{json_data}" | |
end | |
end | |
- def initialize(name, namespace=nil, types=nil, messages=nil) | |
+ def initialize(name, namespace=nil, types=nil, messages=nil, doc=nil) | |
# Ensure valid ctor args | |
if !name | |
raise ProtocolParseError, 'Protocols must have a non-empty name.' | |
@@ -55,6 +56,7 @@ module Avro | |
@types = parse_types(types, type_names) | |
@messages = parse_messages(messages, type_names) | |
@md5 = Digest::MD5.digest(to_s) | |
+ @doc = doc | |
end | |
def to_s | |
@@ -67,7 +69,6 @@ module Avro | |
private | |
def parse_types(types, type_names) | |
- type_objects = [] | |
types.collect do |type| | |
# FIXME adding type.name to type_names is not defined in the | |
# spec. Possible bug in the python impl and the spec. | |
@@ -92,7 +93,8 @@ module Avro | |
request = body['request'] | |
response = body['response'] | |
errors = body['errors'] | |
- message_objects[name] = Message.new(name, request, response, errors, names, namespace) | |
+ doc = body['doc'] | |
+ message_objects[name] = Message.new(name, request, response, errors, names, namespace, doc) | |
end | |
message_objects | |
end | |
@@ -111,14 +113,15 @@ module Avro | |
end | |
class Message | |
- attr_reader :name, :request, :response, :errors, :default_namespace | |
+ attr_reader :name, :request, :response, :errors, :default_namespace, :doc | |
- def initialize(name, request, response, errors=nil, names=nil, default_namespace=nil) | |
+ def initialize(name, request, response, errors=nil, names=nil, default_namespace=nil, doc=nil) | |
@name = name | |
@default_namespace = default_namespace | |
@request = parse_request(request, names) | |
@response = parse_response(response, names) | |
@errors = parse_errors(errors, names) if errors | |
+ @doc = doc | |
end | |
def to_avro(names=Set.new) | |
@@ -127,6 +130,7 @@ module Avro | |
'response' => response.to_avro(names) | |
}.tap do |hash| | |
hash['errors'] = errors.to_avro(names) if errors | |
+ hash['doc'] = @doc if @doc | |
end | |
end | |
diff --git a/lang/ruby/lib/avro/schema.rb b/lang/ruby/lib/avro/schema.rb | |
index 87f6fa48..4a766fdc 100644 | |
--- a/lang/ruby/lib/avro/schema.rb | |
+++ b/lang/ruby/lib/avro/schema.rb | |
@@ -14,6 +14,8 @@ | |
# See the License for the specific language governing permissions and | |
# limitations under the License. | |
+require 'avro/logical_types' | |
+ | |
module Avro | |
class Schema | |
# Sets of strings, for backwards compatibility. See below for sets of symbols, | |
@@ -40,6 +42,7 @@ module Avro | |
def self.real_parse(json_obj, names=nil, default_namespace=nil) | |
if json_obj.is_a? Hash | |
type = json_obj['type'] | |
+ logical_type = json_obj['logicalType'] | |
raise SchemaParseError, %Q(No "type" property: #{json_obj}) if type.nil? | |
# Check that the type is valid before calling #to_sym, since symbols are never garbage | |
@@ -50,7 +53,7 @@ module Avro | |
type_sym = type.to_sym | |
if PRIMITIVE_TYPES_SYM.include?(type_sym) | |
- return PrimitiveSchema.new(type_sym) | |
+ return PrimitiveSchema.new(type_sym, logical_type) | |
elsif NAMED_TYPES_SYM.include? type_sym | |
name = json_obj['name'] | |
@@ -58,13 +61,15 @@ module Avro | |
case type_sym | |
when :fixed | |
size = json_obj['size'] | |
- return FixedSchema.new(name, namespace, size, names) | |
+ return FixedSchema.new(name, namespace, size, names, logical_type) | |
when :enum | |
symbols = json_obj['symbols'] | |
- return EnumSchema.new(name, namespace, symbols, names) | |
+ doc = json_obj['doc'] | |
+ return EnumSchema.new(name, namespace, symbols, names, doc) | |
when :record, :error | |
fields = json_obj['fields'] | |
- return RecordSchema.new(name, namespace, fields, names, type_sym) | |
+ doc = json_obj['doc'] | |
+ return RecordSchema.new(name, namespace, fields, names, type_sym, doc) | |
else | |
raise SchemaParseError.new("Unknown named type: #{type}") | |
end | |
@@ -91,52 +96,29 @@ module Avro | |
end | |
# Determine if a ruby datum is an instance of a schema | |
- def self.validate(expected_schema, datum) | |
- case expected_schema.type_sym | |
- when :null | |
- datum.nil? | |
- when :boolean | |
- datum == true || datum == false | |
- when :string, :bytes | |
- datum.is_a? String | |
- when :int | |
- (datum.is_a?(Fixnum) || datum.is_a?(Bignum)) && | |
- (INT_MIN_VALUE <= datum) && (datum <= INT_MAX_VALUE) | |
- when :long | |
- (datum.is_a?(Fixnum) || datum.is_a?(Bignum)) && | |
- (LONG_MIN_VALUE <= datum) && (datum <= LONG_MAX_VALUE) | |
- when :float, :double | |
- datum.is_a?(Float) || datum.is_a?(Fixnum) || datum.is_a?(Bignum) | |
- when :fixed | |
- datum.is_a?(String) && datum.size == expected_schema.size | |
- when :enum | |
- expected_schema.symbols.include? datum | |
- when :array | |
- datum.is_a?(Array) && | |
- datum.all?{|d| validate(expected_schema.items, d) } | |
- when :map | |
- datum.keys.all?{|k| k.is_a? String } && | |
- datum.values.all?{|v| validate(expected_schema.values, v) } | |
- when :union | |
- expected_schema.schemas.any?{|s| validate(s, datum) } | |
- when :record, :error, :request | |
- datum.is_a?(Hash) && | |
- expected_schema.fields.all?{|f| validate(f.type, datum[f.name]) } | |
- else | |
- raise "you suck #{expected_schema.inspect} is not allowed." | |
- end | |
+ def self.validate(expected_schema, logical_datum, options = { recursive: true, encoded: false }) | |
+ SchemaValidator.validate!(expected_schema, logical_datum, options) | |
+ true | |
+ rescue SchemaValidator::ValidationError | |
+ false | |
end | |
- def initialize(type) | |
+ def initialize(type, logical_type=nil) | |
@type_sym = type.is_a?(Symbol) ? type : type.to_sym | |
+ @logical_type = logical_type | |
end | |
attr_reader :type_sym | |
+ attr_reader :logical_type | |
# Returns the type as a string (rather than a symbol), for backwards compatibility. | |
# Deprecated in favor of {#type_sym}. | |
def type; @type_sym.to_s; end | |
+ def type_adapter | |
+ @type_adapter ||= LogicalTypes.type_adapter(type, logical_type) || LogicalTypes::Identity | |
+ end | |
+ | |
# Returns the MD5 fingerprint of the schema as an Integer. | |
def md5_fingerprint | |
parsing_form = SchemaNormalization.to_parsing_form(self) | |
@@ -149,6 +131,18 @@ module Avro | |
Digest::SHA256.hexdigest(parsing_form).to_i(16) | |
end | |
+ def read?(writers_schema) | |
+ SchemaCompatibility.can_read?(writers_schema, self) | |
+ end | |
+ | |
+ def be_read?(other_schema) | |
+ other_schema.read?(self) | |
+ end | |
+ | |
+ def mutual_read?(other_schema) | |
+ SchemaCompatibility.mutual_read?(other_schema, self) | |
+ end | |
+ | |
def ==(other, seen=nil) | |
other.is_a?(Schema) && type_sym == other.type_sym | |
end | |
@@ -172,7 +166,9 @@ module Avro | |
end | |
def to_avro(names=nil) | |
- {'type' => type} | |
+ props = {'type' => type} | |
+ props['logicalType'] = logical_type if logical_type | |
+ props | |
end | |
def to_s | |
@@ -181,9 +177,11 @@ module Avro | |
class NamedSchema < Schema | |
attr_reader :name, :namespace | |
- def initialize(type, name, namespace=nil, names=nil) | |
- super(type) | |
+ | |
+ def initialize(type, name, namespace=nil, names=nil, doc=nil, logical_type=nil) | |
+ super(type, logical_type) | |
@name, @namespace = Name.extract_namespace(name, namespace) | |
+ @doc = doc | |
names = Name.add_name(names, self) | |
end | |
@@ -194,6 +192,7 @@ module Avro | |
end | |
props = {'name' => @name} | |
props.merge!('namespace' => @namespace) if @namespace | |
+ props.merge!('doc' => @doc) if @doc | |
super.merge props | |
end | |
@@ -203,7 +202,7 @@ module Avro | |
end | |
class RecordSchema < NamedSchema | |
- attr_reader :fields | |
+ attr_reader :fields, :doc | |
def self.make_field_objects(field_data, names, namespace=nil) | |
field_objects, field_names = [], Set.new | |
@@ -211,9 +210,10 @@ module Avro | |
if field.respond_to?(:[]) # TODO(jmhodges) wtffffff | |
type = field['type'] | |
name = field['name'] | |
- default = field['default'] | |
+ default = field.key?('default') ? field['default'] : :no_default | |
order = field['order'] | |
- new_field = Field.new(type, name, default, order, names, namespace) | |
+ doc = field['doc'] | |
+ new_field = Field.new(type, name, default, order, names, namespace, doc) | |
# make sure field name has not been used yet | |
if field_names.include?(new_field.name) | |
raise SchemaParseError, "Field name #{new_field.name.inspect} is already in use" | |
@@ -227,14 +227,20 @@ module Avro | |
field_objects | |
end | |
- def initialize(name, namespace, fields, names=nil, schema_type=:record) | |
+ def initialize(name, namespace, fields, names=nil, schema_type=:record, doc=nil) | |
if schema_type == :request || schema_type == 'request' | |
@type_sym = schema_type.to_sym | |
@namespace = namespace | |
+ @name = nil | |
+ @doc = nil | |
else | |
- super(schema_type, name, namespace, names) | |
+ super(schema_type, name, namespace, names, doc) | |
end | |
- @fields = RecordSchema.make_field_objects(fields, names, self.namespace) | |
+ @fields = if fields | |
+ RecordSchema.make_field_objects(fields, names, self.namespace) | |
+ else | |
+ {} | |
+ end | |
end | |
def fields_hash | |
@@ -285,8 +291,7 @@ module Avro | |
def initialize(schemas, names=nil, default_namespace=nil) | |
super(:union) | |
- schema_objects = [] | |
- schemas.each_with_index do |schema, i| | |
+ @schemas = schemas.each_with_object([]) do |schema, schema_objects| | |
new_schema = subparse(schema, names, default_namespace) | |
ns_type = new_schema.type_sym | |
@@ -299,7 +304,6 @@ module Avro | |
else | |
schema_objects << new_schema | |
end | |
- @schemas = schema_objects | |
end | |
end | |
@@ -309,13 +313,14 @@ module Avro | |
end | |
class EnumSchema < NamedSchema | |
- attr_reader :symbols | |
- def initialize(name, space, symbols, names=nil) | |
+ attr_reader :symbols, :doc | |
+ | |
+ def initialize(name, space, symbols, names=nil, doc=nil) | |
if symbols.uniq.length < symbols.length | |
- fail_msg = 'Duplicate symbol: %s' % symbols | |
+ fail_msg = "Duplicate symbol: #{symbols}" | |
raise Avro::SchemaParseError, fail_msg | |
end | |
- super(:enum, name, space, names) | |
+ super(:enum, name, space, names, doc) | |
@symbols = symbols | |
end | |
@@ -327,11 +332,11 @@ module Avro | |
# Valid primitive types are in PRIMITIVE_TYPES. | |
class PrimitiveSchema < Schema | |
- def initialize(type) | |
+ def initialize(type, logical_type=nil) | |
if PRIMITIVE_TYPES_SYM.include?(type) | |
- super(type) | |
+ super(type, logical_type) | |
elsif PRIMITIVE_TYPES.include?(type) | |
- super(type.to_sym) | |
+ super(type.to_sym, logical_type) | |
else | |
raise AvroError.new("#{type} is not a valid primitive type.") | |
end | |
@@ -345,12 +350,12 @@ module Avro | |
class FixedSchema < NamedSchema | |
attr_reader :size | |
- def initialize(name, space, size, names=nil) | |
+ def initialize(name, space, size, names=nil, logical_type=nil) | |
# Ensure valid cto args | |
- unless size.is_a?(Fixnum) || size.is_a?(Bignum) | |
+ unless size.is_a?(Integer) | |
raise AvroError, 'Fixed Schema requires a valid integer for size property.' | |
end | |
- super(:fixed, name, space, names) | |
+ super(:fixed, name, space, names, nil, logical_type) | |
@size = size | |
end | |
@@ -361,21 +366,42 @@ module Avro | |
end | |
class Field < Schema | |
- attr_reader :type, :name, :default, :order | |
+ attr_reader :type, :name, :default, :order, :doc | |
- def initialize(type, name, default=nil, order=nil, names=nil, namespace=nil) | |
+ def initialize(type, name, default=:no_default, order=nil, names=nil, namespace=nil, doc=nil) | |
@type = subparse(type, names, namespace) | |
@name = name | |
@default = default | |
@order = order | |
+ @doc = doc | |
+ validate_default! if default? && !Avro.disable_field_default_validation | |
+ end | |
+ | |
+ def default? | |
+ @default != :no_default | |
end | |
def to_avro(names=Set.new) | |
{'name' => name, 'type' => type.to_avro(names)}.tap do |avro| | |
- avro['default'] = default if default | |
+ avro['default'] = default if default? | |
avro['order'] = order if order | |
+ avro['doc'] = doc if doc | |
end | |
end | |
+ | |
+ private | |
+ | |
+ def validate_default! | |
+ type_for_default = if type.type_sym == :union | |
+ type.schemas.first | |
+ else | |
+ type | |
+ end | |
+ | |
+ Avro::SchemaValidator.validate!(type_for_default, default) | |
+ rescue Avro::SchemaValidator::ValidationError => e | |
+ raise Avro::SchemaParseError, "Error validating default for #{name}: #{e.message}" | |
+ end | |
end | |
end | |
diff --git a/lang/ruby/lib/avro/schema_compatibility.rb b/lang/ruby/lib/avro/schema_compatibility.rb | |
new file mode 100644 | |
index 00000000..37401fc0 | |
--- /dev/null | |
+++ b/lang/ruby/lib/avro/schema_compatibility.rb | |
@@ -0,0 +1,170 @@ | |
+# Licensed to the Apache Software Foundation (ASF) under one | |
+# or more contributor license agreements. See the NOTICE file | |
+# distributed with this work for additional information | |
+# regarding copyright ownership. The ASF licenses this file | |
+# to you under the Apache License, Version 2.0 (the | |
+# "License"); you may not use this file except in compliance | |
+# with the License. You may obtain a copy of the License at | |
+# | |
+# http://www.apache.org/licenses/LICENSE-2.0 | |
+# | |
+# Unless required by applicable law or agreed to in writing, software | |
+# distributed under the License is distributed on an "AS IS" BASIS, | |
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
+# See the License for the specific language governing permissions and | |
+# limitations under the License. | |
+module Avro | |
+ module SchemaCompatibility | |
+ # Perform a full, recursive check that a datum written using the writers_schema | |
+ # can be read using the readers_schema. | |
+ def self.can_read?(writers_schema, readers_schema) | |
+ Checker.new.can_read?(writers_schema, readers_schema) | |
+ end | |
+ | |
+ # Perform a full, recursive check that a datum written using either the | |
+ # writers_schema or the readers_schema can be read using the other schema. | |
+ def self.mutual_read?(writers_schema, readers_schema) | |
+ Checker.new.mutual_read?(writers_schema, readers_schema) | |
+ end | |
+ | |
+ # Perform a basic check that a datum written with the writers_schema could | |
+ # be read using the readers_schema. This check only includes matching the types, | |
+ # including schema promotion, and matching the full name for named types. | |
+ # Aliases for named types are not supported here, and the ruby implementation | |
+ # of Avro in general does not include support for aliases. | |
+ def self.match_schemas(writers_schema, readers_schema) | |
+ w_type = writers_schema.type_sym | |
+ r_type = readers_schema.type_sym | |
+ | |
+ # This conditional is begging for some OO love. | |
+ if w_type == :union || r_type == :union | |
+ return true | |
+ end | |
+ | |
+ if w_type == r_type | |
+ return true if Schema::PRIMITIVE_TYPES_SYM.include?(r_type) | |
+ | |
+ case r_type | |
+ when :record | |
+ return writers_schema.fullname == readers_schema.fullname | |
+ when :error | |
+ return writers_schema.fullname == readers_schema.fullname | |
+ when :request | |
+ return true | |
+ when :fixed | |
+ return writers_schema.fullname == readers_schema.fullname && | |
+ writers_schema.size == readers_schema.size | |
+ when :enum | |
+ return writers_schema.fullname == readers_schema.fullname | |
+ when :map | |
+ return match_schemas(writers_schema.values, readers_schema.values) | |
+ when :array | |
+ return match_schemas(writers_schema.items, readers_schema.items) | |
+ end | |
+ end | |
+ | |
+ # Handle schema promotion | |
+ if w_type == :int && [:long, :float, :double].include?(r_type) | |
+ return true | |
+ elsif w_type == :long && [:float, :double].include?(r_type) | |
+ return true | |
+ elsif w_type == :float && r_type == :double | |
+ return true | |
+ elsif w_type == :string && r_type == :bytes | |
+ return true | |
+ elsif w_type == :bytes && r_type == :string | |
+ return true | |
+ end | |
+ | |
+ return false | |
+ end | |
+ | |
+ class Checker | |
+ SIMPLE_CHECKS = Schema::PRIMITIVE_TYPES_SYM.dup.add(:fixed).freeze | |
+ | |
+ attr_reader :recursion_set | |
+ private :recursion_set | |
+ | |
+ def initialize | |
+ @recursion_set = Set.new | |
+ end | |
+ | |
+ def can_read?(writers_schema, readers_schema) | |
+ full_match_schemas(writers_schema, readers_schema) | |
+ end | |
+ | |
+ def mutual_read?(writers_schema, readers_schema) | |
+ can_read?(writers_schema, readers_schema) && can_read?(readers_schema, writers_schema) | |
+ end | |
+ | |
+ private | |
+ | |
+ def full_match_schemas(writers_schema, readers_schema) | |
+ return true if recursion_in_progress?(writers_schema, readers_schema) | |
+ | |
+ return false unless Avro::SchemaCompatibility.match_schemas(writers_schema, readers_schema) | |
+ | |
+ if writers_schema.type_sym != :union && SIMPLE_CHECKS.include?(readers_schema.type_sym) | |
+ return true | |
+ end | |
+ | |
+ case readers_schema.type_sym | |
+ when :record | |
+ match_record_schemas(writers_schema, readers_schema) | |
+ when :map | |
+ full_match_schemas(writers_schema.values, readers_schema.values) | |
+ when :array | |
+ full_match_schemas(writers_schema.items, readers_schema.items) | |
+ when :union | |
+ match_union_schemas(writers_schema, readers_schema) | |
+ when :enum | |
+ # reader's symbols must contain all writer's symbols | |
+ (writers_schema.symbols - readers_schema.symbols).empty? | |
+ else | |
+ if writers_schema.type_sym == :union && writers_schema.schemas.size == 1 | |
+ full_match_schemas(writers_schema.schemas.first, readers_schema) | |
+ else | |
+ false | |
+ end | |
+ end | |
+ end | |
+ | |
+ def match_union_schemas(writers_schema, readers_schema) | |
+ raise 'readers_schema must be a union' unless readers_schema.type_sym == :union | |
+ | |
+ case writers_schema.type_sym | |
+ when :union | |
+ writers_schema.schemas.all? { |writer_type| full_match_schemas(writer_type, readers_schema) } | |
+ else | |
+ readers_schema.schemas.any? { |reader_type| full_match_schemas(writers_schema, reader_type) } | |
+ end | |
+ end | |
+ | |
+ def match_record_schemas(writers_schema, readers_schema) | |
+ return false if writers_schema.type_sym == :union | |
+ | |
+ writer_fields_hash = writers_schema.fields_hash | |
+ readers_schema.fields.each do |field| | |
+ if writer_fields_hash.key?(field.name) | |
+ return false unless full_match_schemas(writer_fields_hash[field.name].type, field.type) | |
+ else | |
+ return false unless field.default? | |
+ end | |
+ end | |
+ | |
+ return true | |
+ end | |
+ | |
+ def recursion_in_progress?(writers_schema, readers_schema) | |
+ key = [writers_schema.object_id, readers_schema.object_id] | |
+ | |
+ if recursion_set.include?(key) | |
+ true | |
+ else | |
+ recursion_set.add(key) | |
+ false | |
+ end | |
+ end | |
+ end | |
+ end | |
+end | |
diff --git a/lang/ruby/lib/avro/schema_normalization.rb b/lang/ruby/lib/avro/schema_normalization.rb | |
index 0a5bee5d..1506fcac 100644 | |
--- a/lang/ruby/lib/avro/schema_normalization.rb | |
+++ b/lang/ruby/lib/avro/schema_normalization.rb | |
@@ -25,7 +25,7 @@ module Avro | |
end | |
def to_parsing_form(schema) | |
- JSON.dump(normalize_schema(schema)) | |
+ MultiJson.dump(normalize_schema(schema)) | |
end | |
private | |
diff --git a/lang/ruby/lib/avro/schema_validator.rb b/lang/ruby/lib/avro/schema_validator.rb | |
new file mode 100644 | |
index 00000000..f9274f06 | |
--- /dev/null | |
+++ b/lang/ruby/lib/avro/schema_validator.rb | |
@@ -0,0 +1,242 @@ | |
+# Licensed to the Apache Software Foundation (ASF) under one | |
+# or more contributor license agreements. See the NOTICE file | |
+# distributed with this work for additional information | |
+# regarding copyright ownership. The ASF licenses this file | |
+# to you under the Apache License, Version 2.0 (the | |
+# "License"); you may not use this file except in compliance | |
+# with the License. You may obtain a copy of the License at | |
+# | |
+# http://www.apache.org/licenses/LICENSE-2.0 | |
+# | |
+# Unless required by applicable law or agreed to in writing, software | |
+# distributed under the License is distributed on an "AS IS" BASIS, | |
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
+# See the License for the specific language governing permissions and | |
+# limitations under the License. | |
+ | |
+module Avro | |
+ class SchemaValidator | |
+ ROOT_IDENTIFIER = '.'.freeze | |
+ PATH_SEPARATOR = '.'.freeze | |
+ INT_RANGE = Schema::INT_MIN_VALUE..Schema::INT_MAX_VALUE | |
+ LONG_RANGE = Schema::LONG_MIN_VALUE..Schema::LONG_MAX_VALUE | |
+ COMPLEX_TYPES = [:array, :error, :map, :record, :request].freeze | |
+ BOOLEAN_VALUES = [true, false].freeze | |
+ | |
+ class Result | |
+ attr_reader :errors | |
+ | |
+ def initialize | |
+ @errors = [] | |
+ end | |
+ | |
+ def <<(error) | |
+ @errors << error | |
+ end | |
+ | |
+ def add_error(path, message) | |
+ self << "at #{path} #{message}" | |
+ end | |
+ | |
+ def failure? | |
+ @errors.any? | |
+ end | |
+ | |
+ def to_s | |
+ errors.join("\n") | |
+ end | |
+ end | |
+ | |
+ class ValidationError < StandardError | |
+ attr_reader :result | |
+ | |
+ def initialize(result = Result.new) | |
+ @result = result | |
+ super | |
+ end | |
+ | |
+ def to_s | |
+ result.to_s | |
+ end | |
+ end | |
+ | |
+ TypeMismatchError = Class.new(ValidationError) | |
+ | |
+ class << self | |
+ def validate!(expected_schema, logical_datum, options = { recursive: true, encoded: false, fail_on_extra_fields: false }) | |
+ options ||= {} | |
+ options[:recursive] = true unless options.key?(:recursive) | |
+ | |
+ result = Result.new | |
+ if options[:recursive] | |
+ validate_recursive(expected_schema, logical_datum, ROOT_IDENTIFIER, result, options) | |
+ else | |
+ validate_simple(expected_schema, logical_datum, ROOT_IDENTIFIER, result, options) | |
+ end | |
+ fail ValidationError, result if result.failure? | |
+ result | |
+ end | |
+ | |
+ private | |
+ | |
+ def validate_recursive(expected_schema, logical_datum, path, result, options = {}) | |
+ datum = resolve_datum(expected_schema, logical_datum, options[:encoded]) | |
+ | |
+ validate_simple(expected_schema, datum, path, result, encoded: true) | |
+ | |
+ case expected_schema.type_sym | |
+ when :array | |
+ validate_array(expected_schema, datum, path, result, options) | |
+ when :map | |
+ validate_map(expected_schema, datum, path, result, options) | |
+ when :union | |
+ validate_union(expected_schema, datum, path, result, options) | |
+ when :record, :error, :request | |
+ fail TypeMismatchError unless datum.is_a?(Hash) | |
+ expected_schema.fields.each do |field| | |
+ deeper_path = deeper_path_for_hash(field.name, path) | |
+ validate_recursive(field.type, datum[field.name], deeper_path, result, options) | |
+ end | |
+ if options[:fail_on_extra_fields] | |
+ datum_fields = datum.keys.map(&:to_s) | |
+ schema_fields = expected_schema.fields.map(&:name) | |
+ (datum_fields - schema_fields).each do |extra_field| | |
+ result.add_error(path, "extra field '#{extra_field}' - not in schema") | |
+ end | |
+ end | |
+ end | |
+ rescue TypeMismatchError | |
+ result.add_error(path, "expected type #{expected_schema.type_sym}, got #{actual_value_message(datum)}") | |
+ end | |
+ | |
+ def validate_simple(expected_schema, logical_datum, path, result, options = {}) | |
+ datum = resolve_datum(expected_schema, logical_datum, options[:encoded]) | |
+ validate_type(expected_schema) | |
+ | |
+ case expected_schema.type_sym | |
+ when :null | |
+ fail TypeMismatchError unless datum.nil? | |
+ when :boolean | |
+ fail TypeMismatchError unless BOOLEAN_VALUES.include?(datum) | |
+ when :string, :bytes | |
+ fail TypeMismatchError unless datum.is_a?(String) | |
+ when :int | |
+ fail TypeMismatchError unless datum.is_a?(Integer) | |
+ result.add_error(path, "out of bound value #{datum}") unless INT_RANGE.cover?(datum) | |
+ when :long | |
+ fail TypeMismatchError unless datum.is_a?(Integer) | |
+ result.add_error(path, "out of bound value #{datum}") unless LONG_RANGE.cover?(datum) | |
+ when :float, :double | |
+ fail TypeMismatchError unless datum.is_a?(Float) || datum.is_a?(Integer) | |
+ when :fixed | |
+ if datum.is_a? String | |
+ result.add_error(path, fixed_string_message(expected_schema.size, datum)) unless datum.bytesize == expected_schema.size | |
+ else | |
+ result.add_error(path, "expected fixed with size #{expected_schema.size}, got #{actual_value_message(datum)}") | |
+ end | |
+ when :enum | |
+ result.add_error(path, enum_message(expected_schema.symbols, datum)) unless expected_schema.symbols.include?(datum) | |
+ end | |
+ rescue TypeMismatchError | |
+ result.add_error(path, "expected type #{expected_schema.type_sym}, got #{actual_value_message(datum)}") | |
+ end | |
+ | |
+ def resolve_datum(expected_schema, logical_datum, encoded) | |
+ if encoded | |
+ logical_datum | |
+ else | |
+ expected_schema.type_adapter.encode(logical_datum) rescue nil | |
+ end | |
+ end | |
+ | |
+ def validate_type(expected_schema) | |
+ unless Avro::Schema::VALID_TYPES_SYM.include?(expected_schema.type_sym) | |
+ fail "Unexpected schema type #{expected_schema.type_sym} #{expected_schema.inspect}" | |
+ end | |
+ end | |
+ | |
+ def fixed_string_message(size, datum) | |
+ "expected fixed with size #{size}, got \"#{datum}\" with size #{datum.bytesize}" | |
+ end | |
+ | |
+ def enum_message(symbols, datum) | |
+ "expected enum with values #{symbols}, got #{actual_value_message(datum)}" | |
+ end | |
+ | |
+ def validate_array(expected_schema, datum, path, result, options = {}) | |
+ fail TypeMismatchError unless datum.is_a?(Array) | |
+ datum.each_with_index do |d, i| | |
+ validate_recursive(expected_schema.items, d, path + "[#{i}]", result, options) | |
+ end | |
+ end | |
+ | |
+ def validate_map(expected_schema, datum, path, result, options = {}) | |
+ fail TypeMismatchError unless datum.is_a?(Hash) | |
+ datum.keys.each do |k| | |
+ result.add_error(path, "unexpected key type '#{ruby_to_avro_type(k.class)}' in map") unless k.is_a?(String) | |
+ end | |
+ datum.each do |k, v| | |
+ deeper_path = deeper_path_for_hash(k, path) | |
+ validate_recursive(expected_schema.values, v, deeper_path, result, options) | |
+ end | |
+ end | |
+ | |
+ def validate_union(expected_schema, datum, path, result, options = {}) | |
+ if expected_schema.schemas.size == 1 | |
+ validate_recursive(expected_schema.schemas.first, datum, path, result, options) | |
+ return | |
+ end | |
+ failures = [] | |
+ compatible_type = first_compatible_type(datum, expected_schema, path, failures, options) | |
+ return unless compatible_type.nil? | |
+ | |
+ complex_type_failed = failures.detect { |r| COMPLEX_TYPES.include?(r[:type]) } | |
+ if complex_type_failed | |
+ complex_type_failed[:result].errors.each { |error| result << error } | |
+ else | |
+ types = expected_schema.schemas.map { |s| "'#{s.type_sym}'" }.join(', ') | |
+ result.add_error(path, "expected union of [#{types}], got #{actual_value_message(datum)}") | |
+ end | |
+ end | |
+ | |
+ def first_compatible_type(datum, expected_schema, path, failures, options = {}) | |
+ expected_schema.schemas.find do |schema| | |
+ result = Result.new | |
+ validate_recursive(schema, datum, path, result, options) | |
+ failures << { type: schema.type_sym, result: result } if result.failure? | |
+ !result.failure? | |
+ end | |
+ end | |
+ | |
+ def deeper_path_for_hash(sub_key, path) | |
+ "#{path}#{PATH_SEPARATOR}#{sub_key}".squeeze(PATH_SEPARATOR) | |
+ end | |
+ | |
+ def actual_value_message(value) | |
+ avro_type = if value.is_a?(Integer) | |
+ ruby_integer_to_avro_type(value) | |
+ else | |
+ ruby_to_avro_type(value.class) | |
+ end | |
+ if value.nil? | |
+ avro_type | |
+ else | |
+ "#{avro_type} with value #{value.inspect}" | |
+ end | |
+ end | |
+ | |
+ def ruby_to_avro_type(ruby_class) | |
+ { | |
+ NilClass => 'null', | |
+ String => 'string', | |
+ Float => 'float', | |
+ Hash => 'record' | |
+ }.fetch(ruby_class, ruby_class) | |
+ end | |
+ | |
+ def ruby_integer_to_avro_type(value) | |
+ INT_RANGE.cover?(value) ? 'int' : 'long' | |
+ end | |
+ end | |
+ end | |
+end | |
diff --git a/lang/ruby/test/case_finder.rb b/lang/ruby/test/case_finder.rb | |
index de2d9c49..a666f338 100644 | |
--- a/lang/ruby/test/case_finder.rb | |
+++ b/lang/ruby/test/case_finder.rb | |
@@ -16,6 +16,8 @@ | |
# specific language governing permissions and limitations | |
# under the License. | |
# | |
+require 'strscan' | |
+ | |
class CaseFinder | |
PATH = File.expand_path("../../../../share/test/data/schema-tests.txt", __FILE__) | |
diff --git a/lang/ruby/test/random_data.rb b/lang/ruby/test/random_data.rb | |
index 9d276f7d..54fa8781 100644 | |
--- a/lang/ruby/test/random_data.rb | |
+++ b/lang/ruby/test/random_data.rb | |
@@ -27,15 +27,17 @@ class RandomData | |
end | |
def nextdata(schm, d=0) | |
+ return logical_nextdata(schm, d=0) unless schm.type_adapter.eql?(Avro::LogicalTypes::Identity) | |
+ | |
case schm.type_sym | |
when :boolean | |
rand > 0.5 | |
when :string | |
randstr() | |
when :int | |
- rand(Avro::Schema::INT_MAX_VALUE - Avro::Schema::INT_MIN_VALUE) + Avro::Schema::INT_MIN_VALUE | |
+ rand_int | |
when :long | |
- rand(Avro::Schema::LONG_MAX_VALUE - Avro::Schema::LONG_MIN_VALUE) + Avro::Schema::LONG_MIN_VALUE | |
+ rand_long | |
when :float | |
(-1024 + 2048 * rand).round.to_f | |
when :double | |
@@ -79,6 +81,15 @@ class RandomData | |
end | |
end | |
+ def logical_nextdata(schm, _d=0) | |
+ case schm.logical_type | |
+ when 'date' | |
+ Avro::LogicalTypes::IntDate.decode(rand_int) | |
+ when 'timestamp-millis', 'timestamp-micros' | |
+ Avro::LogicalTypes::TimestampMicros.decode(rand_long) | |
+ end | |
+ end | |
+ | |
CHARPOOL = 'abcdefghjkmnpqrstuvwxyzABCDEFGHJKLMNPQRSTUVWXYZ23456789' | |
BYTEPOOL = '12345abcd' | |
@@ -87,4 +98,12 @@ class RandomData | |
rand(length+1).times { str << chars[rand(chars.size)] } | |
str | |
end | |
+ | |
+ def rand_int | |
+ rand(Avro::Schema::INT_MAX_VALUE - Avro::Schema::INT_MIN_VALUE) + Avro::Schema::INT_MIN_VALUE | |
+ end | |
+ | |
+ def rand_long | |
+ rand(Avro::Schema::LONG_MAX_VALUE - Avro::Schema::LONG_MIN_VALUE) + Avro::Schema::LONG_MIN_VALUE | |
+ end | |
end | |
diff --git a/lang/ruby/test/test_datafile.rb b/lang/ruby/test/test_datafile.rb | |
index 8fe05d70..0f13df35 100644 | |
--- a/lang/ruby/test/test_datafile.rb | |
+++ b/lang/ruby/test/test_datafile.rb | |
@@ -20,13 +20,13 @@ require 'test_help' | |
class TestDataFile < Test::Unit::TestCase | |
HERE = File.expand_path File.dirname(__FILE__) | |
def setup | |
- if File.exists?(HERE + '/data.avr') | |
+ if File.exist?(HERE + '/data.avr') | |
File.unlink(HERE + '/data.avr') | |
end | |
end | |
def teardown | |
- if File.exists?(HERE + '/data.avr') | |
+ if File.exist?(HERE + '/data.avr') | |
File.unlink(HERE + '/data.avr') | |
end | |
end | |
@@ -38,7 +38,7 @@ class TestDataFile < Test::Unit::TestCase | |
"fields" : [ | |
{"name": "username", "type": "string"}, | |
{"name": "age", "type": "int"}, | |
- {"name": "verified", "type": "boolean", "default": "false"} | |
+ {"name": "verified", "type": "boolean", "default": false} | |
]} | |
JSON | |
diff --git a/lang/ruby/test/test_io.rb b/lang/ruby/test/test_io.rb | |
index f8f3da13..350d48bc 100644 | |
--- a/lang/ruby/test/test_io.rb | |
+++ b/lang/ruby/test/test_io.rb | |
@@ -84,6 +84,17 @@ EOS | |
check_default(record_schema, '{"f": 11}', {"f" => 11}) | |
end | |
+ def test_record_with_logical_type | |
+ record_schema = <<EOS | |
+ {"type": "record", | |
+ "name": "Test", | |
+ "fields": [{"name": "ts", | |
+ "type": {"type": "long", | |
+ "logicalType": "timestamp-micros"}}]} | |
+EOS | |
+ check(record_schema) | |
+ end | |
+ | |
def test_error | |
error_schema = <<EOS | |
{"type": "error", | |
@@ -115,6 +126,7 @@ EOS | |
def test_union | |
union_schema = <<EOS | |
["string", | |
+ {"type": "int", "logicalType": "date"}, | |
"null", | |
"long", | |
{"type": "record", | |
@@ -146,10 +158,31 @@ EOS | |
check_default(fixed_schema, '"a"', "a") | |
end | |
+ def test_record_with_nil | |
+ schema = Avro::Schema.parse('{"type":"record", "name":"rec", "fields":[{"type":"int", "name":"i"}]}') | |
+ assert_raise(Avro::IO::AvroTypeError) do | |
+ write_datum(nil, schema) | |
+ end | |
+ end | |
+ | |
+ def test_array_with_nil | |
+ schema = Avro::Schema.parse('{"type":"array", "items":"int"}') | |
+ assert_raise(Avro::IO::AvroTypeError) do | |
+ write_datum(nil, schema) | |
+ end | |
+ end | |
+ | |
+ def test_map_with_nil | |
+ schema = Avro::Schema.parse('{"type":"map", "values":"long"}') | |
+ assert_raise(Avro::IO::AvroTypeError) do | |
+ write_datum(nil, schema) | |
+ end | |
+ end | |
+ | |
def test_enum_with_duplicate | |
str = '{"type": "enum", "name": "Test","symbols" : ["AA", "AA"]}' | |
- assert_raises(Avro::SchemaParseError) do | |
- schema = Avro::Schema.parse str | |
+ assert_raises(Avro::SchemaParseError.new('Duplicate symbol: ["AA", "AA"]')) do | |
+ Avro::Schema.parse str | |
end | |
end | |
@@ -210,8 +243,53 @@ EOS | |
end | |
end | |
+ def test_utf8_string_encoding | |
+ [ | |
+ "\xC3".force_encoding('ISO-8859-1'), | |
+ "\xC3\x83".force_encoding('UTF-8') | |
+ ].each do |value| | |
+ output = ''.force_encoding('BINARY') | |
+ encoder = Avro::IO::BinaryEncoder.new(StringIO.new(output)) | |
+ datum_writer = Avro::IO::DatumWriter.new(Avro::Schema.parse('"string"')) | |
+ datum_writer.write(value, encoder) | |
+ | |
+ assert_equal "\x04\xc3\x83".force_encoding('BINARY'), output | |
+ end | |
+ end | |
+ | |
+ def test_bytes_encoding | |
+ [ | |
+ "\xC3\x83".force_encoding('BINARY'), | |
+ "\xC3\x83".force_encoding('ISO-8859-1'), | |
+ "\xC3\x83".force_encoding('UTF-8') | |
+ ].each do |value| | |
+ output = ''.force_encoding('BINARY') | |
+ encoder = Avro::IO::BinaryEncoder.new(StringIO.new(output)) | |
+ datum_writer = Avro::IO::DatumWriter.new(Avro::Schema.parse('"bytes"')) | |
+ datum_writer.write(value, encoder) | |
+ | |
+ assert_equal "\x04\xc3\x83".force_encoding('BINARY'), output | |
+ end | |
+ end | |
+ | |
+ def test_fixed_encoding | |
+ [ | |
+ "\xC3\x83".force_encoding('BINARY'), | |
+ "\xC3\x83".force_encoding('ISO-8859-1'), | |
+ "\xC3\x83".force_encoding('UTF-8') | |
+ ].each do |value| | |
+ output = ''.force_encoding('BINARY') | |
+ encoder = Avro::IO::BinaryEncoder.new(StringIO.new(output)) | |
+ schema = '{"type": "fixed", "name": "TwoBytes", "size": 2}' | |
+ datum_writer = Avro::IO::DatumWriter.new(Avro::Schema.parse(schema)) | |
+ datum_writer.write(value, encoder) | |
+ | |
+ assert_equal "\xc3\x83".force_encoding('BINARY'), output | |
+ end | |
+ end | |
+ | |
def test_skip_long | |
- for value_to_skip, hex_encoding in BINARY_INT_ENCODINGS | |
+ for value_to_skip, _hex_encoding in BINARY_INT_ENCODINGS | |
value_to_read = 6253 | |
# write some data in binary to string buffer | |
@@ -236,7 +314,7 @@ EOS | |
end | |
def test_skip_int | |
- for value_to_skip, hex_encoding in BINARY_INT_ENCODINGS | |
+ for value_to_skip, _hex_encoding in BINARY_INT_ENCODINGS | |
value_to_read = 6253 | |
writer = StringIO.new | |
@@ -286,7 +364,7 @@ EOS | |
datum_to_write = 219 | |
for rs in promotable_schemas[(i + 1)..-1] | |
readers_schema = Avro::Schema.parse(rs) | |
- writer, enc, dw = write_datum(datum_to_write, writers_schema) | |
+ writer, _enc, _dw = write_datum(datum_to_write, writers_schema) | |
datum_read = read_datum(writer, writers_schema, readers_schema) | |
if datum_read != datum_to_write | |
incorrect += 1 | |
@@ -295,8 +373,71 @@ EOS | |
assert_equal(incorrect, 0) | |
end | |
end | |
+ | |
+ def test_interchangeable_schemas | |
+ interchangeable_schemas = ['"string"', '"bytes"'] | |
+ incorrect = 0 | |
+ interchangeable_schemas.each_with_index do |ws, i| | |
+ writers_schema = Avro::Schema.parse(ws) | |
+ datum_to_write = 'foo' | |
+ readers_schema = Avro::Schema.parse(interchangeable_schemas[i == 0 ? 1 : 0]) | |
+ writer, * = write_datum(datum_to_write, writers_schema) | |
+ datum_read = read_datum(writer, writers_schema, readers_schema) | |
+ if datum_read != datum_to_write | |
+ incorrect += 1 | |
+ end | |
+ end | |
+ assert_equal(incorrect, 0) | |
+ end | |
+ | |
+ def test_array_schema_promotion | |
+ writers_schema = Avro::Schema.parse('{"type":"array", "items":"int"}') | |
+ readers_schema = Avro::Schema.parse('{"type":"array", "items":"long"}') | |
+ datum_to_write = [1, 2] | |
+ writer, * = write_datum(datum_to_write, writers_schema) | |
+ datum_read = read_datum(writer, writers_schema, readers_schema) | |
+ assert_equal(datum_read, datum_to_write) | |
+ end | |
+ | |
+ def test_map_schema_promotion | |
+ writers_schema = Avro::Schema.parse('{"type":"map", "values":"int"}') | |
+ readers_schema = Avro::Schema.parse('{"type":"map", "values":"long"}') | |
+ datum_to_write = { 'foo' => 1, 'bar' => 2 } | |
+ writer, * = write_datum(datum_to_write, writers_schema) | |
+ datum_read = read_datum(writer, writers_schema, readers_schema) | |
+ assert_equal(datum_read, datum_to_write) | |
+ end | |
+ | |
+ def test_snappy_backward_compat | |
+ # a snappy-compressed block payload without the checksum | |
+ # this has no back-references, just one literal so the last 9 | |
+ # bytes are the uncompressed payload. | |
+ old_snappy_bytes = "\x09\x20\x02\x06\x02\x0a\x67\x72\x65\x65\x6e" | |
+ uncompressed_bytes = "\x02\x06\x02\x0a\x67\x72\x65\x65\x6e" | |
+ snappy = Avro::DataFile::SnappyCodec.new | |
+ assert_equal(uncompressed_bytes, snappy.decompress(old_snappy_bytes)) | |
+ end | |
+ | |
private | |
+ def check_no_default(schema_json) | |
+ actual_schema = '{"type": "record", "name": "Foo", "fields": []}' | |
+ actual = Avro::Schema.parse(actual_schema) | |
+ | |
+ expected_schema = <<EOS | |
+ {"type": "record", | |
+ "name": "Foo", | |
+ "fields": [{"name": "f", "type": #{schema_json}}]} | |
+EOS | |
+ expected = Avro::Schema.parse(expected_schema) | |
+ | |
+ reader = Avro::IO::DatumReader.new(actual, expected) | |
+ assert_raise Avro::AvroError do | |
+ value = reader.read(Avro::IO::BinaryDecoder.new(StringIO.new)) | |
+ assert_not_equal(value, :no_default) # should never return this | |
+ end | |
+ end | |
+ | |
def check_default(schema_json, default_json, default_value) | |
actual_schema = '{"type": "record", "name": "Foo", "fields": []}' | |
actual = Avro::Schema.parse(actual_schema) | |
@@ -336,11 +477,14 @@ EOS | |
# test writing of data to file | |
check_datafile(schema) | |
+ | |
+ # check that AvroError is raised when there is no default | |
+ check_no_default(str) | |
end | |
def checkser(schm, randomdata) | |
datum = randomdata.next | |
- assert validate(schm, datum) | |
+ assert validate(schm, datum), 'datum is not valid for schema' | |
w = Avro::IO::DatumWriter.new(schm) | |
writer = StringIO.new "", "w" | |
w.write(datum, Avro::IO::BinaryEncoder.new(writer)) | |
diff --git a/lang/ruby/test/test_logical_types.rb b/lang/ruby/test/test_logical_types.rb | |
new file mode 100644 | |
index 00000000..8cd6819b | |
--- /dev/null | |
+++ b/lang/ruby/test/test_logical_types.rb | |
@@ -0,0 +1,128 @@ | |
+# -*- coding: utf-8 -*- | |
+# Licensed to the Apache Software Foundation (ASF) under one | |
+# or more contributor license agreements. See the NOTICE file | |
+# distributed with this work for additional information | |
+# regarding copyright ownership. The ASF licenses this file | |
+# to you under the Apache License, Version 2.0 (the | |
+# "License"); you may not use this file except in compliance | |
+# with the License. You may obtain a copy of the License at | |
+# | |
+# http://www.apache.org/licenses/LICENSE-2.0 | |
+# | |
+# Unless required by applicable law or agreed to in writing, software | |
+# distributed under the License is distributed on an "AS IS" BASIS, | |
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
+# See the License for the specific language governing permissions and | |
+# limitations under the License. | |
+ | |
+require 'test_help' | |
+ | |
+class TestLogicalTypes < Test::Unit::TestCase | |
+ def test_int_date | |
+ schema = Avro::Schema.parse <<-SCHEMA | |
+ { "type": "int", "logicalType": "date" } | |
+ SCHEMA | |
+ | |
+ assert_equal 'date', schema.logical_type | |
+ today = Date.today | |
+ assert_encode_and_decode today, schema | |
+ assert_preencoded Avro::LogicalTypes::IntDate.encode(today), schema, today | |
+ end | |
+ | |
+ def test_int_date_conversion | |
+ type = Avro::LogicalTypes::IntDate | |
+ | |
+ assert_equal 5, type.encode(Date.new(1970, 1, 6)) | |
+ assert_equal 0, type.encode(Date.new(1970, 1, 1)) | |
+ assert_equal(-5, type.encode(Date.new(1969, 12, 27))) | |
+ | |
+ assert_equal Date.new(1970, 1, 6), type.decode(5) | |
+ assert_equal Date.new(1970, 1, 1), type.decode(0) | |
+ assert_equal Date.new(1969, 12, 27), type.decode(-5) | |
+ end | |
+ | |
+ def test_timestamp_millis_long | |
+ schema = Avro::Schema.parse <<-SCHEMA | |
+ { "type": "long", "logicalType": "timestamp-millis" } | |
+ SCHEMA | |
+ | |
+ # The Time.at format is (seconds, microseconds) since Epoch. | |
+ time = Time.at(628232400, 12000) | |
+ | |
+ assert_equal 'timestamp-millis', schema.logical_type | |
+ assert_encode_and_decode time, schema | |
+ assert_preencoded Avro::LogicalTypes::TimestampMillis.encode(time), schema, time.utc | |
+ end | |
+ | |
+ def test_timestamp_millis_long_conversion | |
+ type = Avro::LogicalTypes::TimestampMillis | |
+ | |
+ now = Time.now.utc | |
+ now_millis = Time.utc(now.year, now.month, now.day, now.hour, now.min, now.sec, now.usec / 1000 * 1000) | |
+ | |
+ assert_equal now_millis, type.decode(type.encode(now_millis)) | |
+ assert_equal 1432849613221, type.encode(Time.utc(2015, 5, 28, 21, 46, 53, 221000)) | |
+ assert_equal 1432849613221, type.encode(DateTime.new(2015, 5, 28, 21, 46, 53.221)) | |
+ assert_equal Time.utc(2015, 5, 28, 21, 46, 53, 221000), type.decode(1432849613221) | |
+ end | |
+ | |
+ def test_timestamp_micros_long | |
+ schema = Avro::Schema.parse <<-SCHEMA | |
+ { "type": "long", "logicalType": "timestamp-micros" } | |
+ SCHEMA | |
+ | |
+ # The Time.at format is (seconds, microseconds) since Epoch. | |
+ time = Time.at(628232400, 12345) | |
+ | |
+ assert_equal 'timestamp-micros', schema.logical_type | |
+ assert_encode_and_decode time, schema | |
+ assert_preencoded Avro::LogicalTypes::TimestampMicros.encode(time), schema, time.utc | |
+ end | |
+ | |
+ def test_timestamp_micros_long_conversion | |
+ type = Avro::LogicalTypes::TimestampMicros | |
+ | |
+ now = Time.now.utc | |
+ | |
+ assert_equal Time.utc(now.year, now.month, now.day, now.hour, now.min, now.sec, now.usec), type.decode(type.encode(now)) | |
+ assert_equal 1432849613221843, type.encode(Time.utc(2015, 5, 28, 21, 46, 53, 221843)) | |
+ assert_equal 1432849613221843, type.encode(DateTime.new(2015, 5, 28, 21, 46, 53.221843)) | |
+ assert_equal Time.utc(2015, 5, 28, 21, 46, 53, 221843), type.decode(1432849613221843) | |
+ end | |
+ | |
+ def test_parse_fixed_duration | |
+ schema = Avro::Schema.parse <<-SCHEMA | |
+ { "type": "fixed", "size": 12, "name": "fixed_dur", "logicalType": "duration" } | |
+ SCHEMA | |
+ | |
+ assert_equal 'duration', schema.logical_type | |
+ end | |
+ | |
+ def encode(datum, schema) | |
+ buffer = StringIO.new("") | |
+ encoder = Avro::IO::BinaryEncoder.new(buffer) | |
+ | |
+ datum_writer = Avro::IO::DatumWriter.new(schema) | |
+ datum_writer.write(datum, encoder) | |
+ | |
+ buffer.string | |
+ end | |
+ | |
+ def decode(encoded, schema) | |
+ buffer = StringIO.new(encoded) | |
+ decoder = Avro::IO::BinaryDecoder.new(buffer) | |
+ | |
+ datum_reader = Avro::IO::DatumReader.new(schema, schema) | |
+ datum_reader.read(decoder) | |
+ end | |
+ | |
+ def assert_encode_and_decode(datum, schema) | |
+ encoded = encode(datum, schema) | |
+ assert_equal datum, decode(encoded, schema) | |
+ end | |
+ | |
+ def assert_preencoded(datum, schema, decoded) | |
+ encoded = encode(datum, schema) | |
+ assert_equal decoded, decode(encoded, schema) | |
+ end | |
+end | |
diff --git a/lang/ruby/test/test_protocol.rb b/lang/ruby/test/test_protocol.rb | |
index fda38824..1b036fe5 100644 | |
--- a/lang/ruby/test/test_protocol.rb | |
+++ b/lang/ruby/test/test_protocol.rb | |
@@ -5,9 +5,9 @@ | |
# to you under the Apache License, Version 2.0 (the | |
# "License"); you may not use this file except in compliance | |
# with the License. You may obtain a copy of the License at | |
-# | |
+# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
-# | |
+# | |
# Unless required by applicable law or agreed to in writing, software | |
# distributed under the License is distributed on an "AS IS" BASIS, | |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
@@ -139,7 +139,7 @@ EOS | |
} | |
EOS | |
- ExampleProtocol.new(<<-EOS, true) | |
+ ExampleProtocol.new(<<-EOS, true), | |
{"namespace": "org.apache.avro.test", | |
"protocol": "BulkData", | |
@@ -160,6 +160,29 @@ EOS | |
} | |
} | |
+EOS | |
+ ExampleProtocol.new(<<-EOS, true), | |
+{ | |
+ "namespace": "com.acme", | |
+ "protocol": "HelloWorld", | |
+ "doc": "protocol_documentation", | |
+ | |
+ "types": [ | |
+ {"name": "Greeting", "type": "record", "fields": [ | |
+ {"name": "message", "type": "string"}]}, | |
+ {"name": "Curse", "type": "error", "fields": [ | |
+ {"name": "message", "type": "string"}]} | |
+ ], | |
+ | |
+ "messages": { | |
+ "hello": { | |
+ "doc": "message_documentation", | |
+ "request": [{"name": "greeting", "type": "Greeting" }], | |
+ "response": "Greeting", | |
+ "errors": ["Curse"] | |
+ } | |
+ } | |
+} | |
EOS | |
] | |
@@ -196,4 +219,14 @@ EOS | |
assert_equal type.namespace, 'com.acme' | |
end | |
end | |
+ | |
+ def test_protocol_doc_attribute | |
+ original = Protocol.parse(EXAMPLES.last.protocol_string) | |
+ assert_equal 'protocol_documentation', original.doc | |
+ end | |
+ | |
+ def test_protocol_message_doc_attribute | |
+ original = Protocol.parse(EXAMPLES.last.protocol_string) | |
+ assert_equal 'message_documentation', original.messages['hello'].doc | |
+ end | |
end | |
diff --git a/lang/ruby/test/test_schema.rb b/lang/ruby/test/test_schema.rb | |
index 0668cf2f..ed1c8eef 100644 | |
--- a/lang/ruby/test/test_schema.rb | |
+++ b/lang/ruby/test/test_schema.rb | |
@@ -17,6 +17,10 @@ | |
require 'test_help' | |
class TestSchema < Test::Unit::TestCase | |
+ def hash_to_schema(hash) | |
+ Avro::Schema.parse(hash.to_json) | |
+ end | |
+ | |
def test_default_namespace | |
schema = Avro::Schema.parse <<-SCHEMA | |
{"type": "record", "name": "OuterRecord", "fields": [ | |
@@ -27,13 +31,13 @@ class TestSchema < Test::Unit::TestCase | |
]} | |
SCHEMA | |
- assert_equal schema.name, 'OuterRecord' | |
- assert_equal schema.fullname, 'OuterRecord' | |
+ assert_equal 'OuterRecord', schema.name | |
+ assert_equal 'OuterRecord', schema.fullname | |
assert_nil schema.namespace | |
schema.fields.each do |field| | |
- assert_equal field.type.name, 'InnerRecord' | |
- assert_equal field.type.fullname, 'InnerRecord' | |
+ assert_equal 'InnerRecord', field.type.name | |
+ assert_equal 'InnerRecord', field.type.fullname | |
assert_nil field.type.namespace | |
end | |
end | |
@@ -50,13 +54,13 @@ class TestSchema < Test::Unit::TestCase | |
]} | |
SCHEMA | |
- assert_equal schema.name, 'OuterRecord' | |
- assert_equal schema.fullname, 'my.name.space.OuterRecord' | |
- assert_equal schema.namespace, 'my.name.space' | |
+ assert_equal 'OuterRecord', schema.name | |
+ assert_equal 'my.name.space.OuterRecord', schema.fullname | |
+ assert_equal 'my.name.space', schema.namespace | |
schema.fields.each do |field| | |
- assert_equal field.type.name, 'InnerRecord' | |
- assert_equal field.type.fullname, 'my.name.space.InnerRecord' | |
- assert_equal field.type.namespace, 'my.name.space' | |
+ assert_equal 'InnerRecord', field.type.name | |
+ assert_equal 'my.name.space.InnerRecord', field.type.fullname | |
+ assert_equal 'my.name.space', field.type.namespace | |
end | |
end | |
@@ -71,13 +75,13 @@ class TestSchema < Test::Unit::TestCase | |
]} | |
SCHEMA | |
- assert_equal schema.name, 'OuterRecord' | |
- assert_equal schema.fullname, 'my.name.space.OuterRecord' | |
- assert_equal schema.namespace, 'my.name.space' | |
+ assert_equal 'OuterRecord', schema.name | |
+ assert_equal 'my.name.space.OuterRecord', schema.fullname | |
+ assert_equal 'my.name.space', schema.namespace | |
schema.fields.each do |field| | |
- assert_equal field.type.name, 'InnerEnum' | |
- assert_equal field.type.fullname, 'my.name.space.InnerEnum' | |
- assert_equal field.type.namespace, 'my.name.space' | |
+ assert_equal 'InnerEnum', field.type.name | |
+ assert_equal 'my.name.space.InnerEnum', field.type.fullname | |
+ assert_equal 'my.name.space', field.type.namespace | |
end | |
end | |
@@ -96,18 +100,18 @@ class TestSchema < Test::Unit::TestCase | |
]} | |
SCHEMA | |
- assert_equal schema.name, 'OuterRecord' | |
- assert_equal schema.fullname, 'outer.OuterRecord' | |
- assert_equal schema.namespace, 'outer' | |
+ assert_equal 'OuterRecord', schema.name | |
+ assert_equal 'outer.OuterRecord', schema.fullname | |
+ assert_equal 'outer', schema.namespace | |
middle = schema.fields.first.type | |
- assert_equal middle.name, 'MiddleRecord' | |
- assert_equal middle.fullname, 'middle.MiddleRecord' | |
- assert_equal middle.namespace, 'middle' | |
+ assert_equal 'MiddleRecord', middle.name | |
+ assert_equal 'middle.MiddleRecord', middle.fullname | |
+ assert_equal 'middle', middle.namespace | |
inner = middle.fields.first.type | |
- assert_equal inner.name, 'InnerRecord' | |
- assert_equal inner.fullname, 'middle.InnerRecord' | |
- assert_equal inner.namespace, 'middle' | |
- assert_equal inner.fields.first.type, middle | |
+ assert_equal 'InnerRecord', inner.name | |
+ assert_equal 'middle.InnerRecord', inner.fullname | |
+ assert_equal 'middle', inner.namespace | |
+ assert_equal middle, inner.fields.first.type | |
end | |
def test_to_avro_includes_namespaces | |
@@ -120,7 +124,7 @@ class TestSchema < Test::Unit::TestCase | |
]} | |
SCHEMA | |
- assert_equal schema.to_avro, { | |
+ assert_equal({ | |
'type' => 'record', 'name' => 'OuterRecord', 'namespace' => 'my.name.space', | |
'fields' => [ | |
{'name' => 'definition', 'type' => { | |
@@ -129,6 +133,21 @@ class TestSchema < Test::Unit::TestCase | |
}}, | |
{'name' => 'reference', 'type' => 'my.name.space.InnerFixed'} | |
] | |
+ }, schema.to_avro) | |
+ end | |
+ | |
+ def test_to_avro_includes_logical_type | |
+ schema = Avro::Schema.parse <<-SCHEMA | |
+ {"type": "record", "name": "has_logical", "fields": [ | |
+ {"name": "dt", "type": {"type": "int", "logicalType": "date"}}] | |
+ } | |
+ SCHEMA | |
+ | |
+ assert_equal schema.to_avro, { | |
+ 'type' => 'record', 'name' => 'has_logical', | |
+ 'fields' => [ | |
+ {'name' => 'dt', 'type' => {'type' => 'int', 'logicalType' => 'date'}} | |
+ ] | |
} | |
end | |
@@ -143,4 +162,298 @@ class TestSchema < Test::Unit::TestCase | |
assert_equal '"MissingType" is not a schema we know about.', error.message | |
end | |
+ | |
+ def test_to_avro_handles_falsey_defaults | |
+ schema = Avro::Schema.parse <<-SCHEMA | |
+ {"type": "record", "name": "Record", "namespace": "my.name.space", | |
+ "fields": [ | |
+ {"name": "is_usable", "type": "boolean", "default": false} | |
+ ] | |
+ } | |
+ SCHEMA | |
+ | |
+ assert_equal schema.to_avro, { | |
+ 'type' => 'record', 'name' => 'Record', 'namespace' => 'my.name.space', | |
+ 'fields' => [ | |
+ {'name' => 'is_usable', 'type' => 'boolean', 'default' => false} | |
+ ] | |
+ } | |
+ end | |
+ | |
+ def test_record_field_doc_attribute | |
+ field_schema_json = Avro::Schema.parse <<-SCHEMA | |
+ { | |
+ "type": "record", | |
+ "name": "Record", | |
+ "namespace": "my.name.space", | |
+ "fields": [ | |
+ { | |
+ "name": "name", | |
+ "type": "boolean", | |
+ "doc": "documentation" | |
+ } | |
+ ] | |
+ } | |
+ SCHEMA | |
+ | |
+ field_schema_hash = | |
+ { | |
+ 'type' => 'record', | |
+ 'name' => 'Record', | |
+ 'namespace' => 'my.name.space', | |
+ 'fields' => [ | |
+ { | |
+ 'name' => 'name', | |
+ 'type' => 'boolean', | |
+ 'doc' => 'documentation' | |
+ } | |
+ ] | |
+ } | |
+ | |
+ assert_equal field_schema_hash, field_schema_json.to_avro | |
+ end | |
+ | |
+ def test_record_doc_attribute | |
+ record_schema_json = Avro::Schema.parse <<-SCHEMA | |
+ { | |
+ "type": "record", | |
+ "name": "Record", | |
+ "namespace": "my.name.space", | |
+ "doc": "documentation", | |
+ "fields": [ | |
+ { | |
+ "name": "name", | |
+ "type": "boolean" | |
+ } | |
+ ] | |
+ } | |
+ SCHEMA | |
+ | |
+ record_schema_hash = | |
+ { | |
+ 'type' => 'record', | |
+ 'name' => 'Record', | |
+ 'namespace' => 'my.name.space', | |
+ 'doc' => 'documentation', | |
+ 'fields' => [ | |
+ { | |
+ 'name' => 'name', | |
+ 'type' => 'boolean' | |
+ } | |
+ ] | |
+ } | |
+ | |
+ assert_equal record_schema_hash, record_schema_json.to_avro | |
+ end | |
+ | |
+ def test_enum_doc_attribute | |
+ enum_schema_json = Avro::Schema.parse <<-SCHEMA | |
+ { | |
+ "type": "enum", | |
+ "name": "Enum", | |
+ "namespace": "my.name.space", | |
+ "doc": "documentation", | |
+ "symbols" : [ | |
+ "SPADES", | |
+ "HEARTS", | |
+ "DIAMONDS", | |
+ "CLUBS" | |
+ ] | |
+ } | |
+ SCHEMA | |
+ | |
+ enum_schema_hash = | |
+ { | |
+ 'type' => 'enum', | |
+ 'name' => 'Enum', | |
+ 'namespace' => 'my.name.space', | |
+ 'doc' => 'documentation', | |
+ 'symbols' => [ | |
+ 'SPADES', | |
+ 'HEARTS', | |
+ 'DIAMONDS', | |
+ 'CLUBS' | |
+ ] | |
+ } | |
+ assert_equal enum_schema_hash, enum_schema_json.to_avro | |
+ end | |
+ | |
+ def test_empty_record | |
+ schema = Avro::Schema.parse('{"type":"record", "name":"Empty"}') | |
+ assert_empty(schema.fields) | |
+ end | |
+ | |
+ def test_empty_union | |
+ schema = Avro::Schema.parse('[]') | |
+ assert_equal(schema.to_s, '[]') | |
+ end | |
+ | |
+ def test_read | |
+ schema = Avro::Schema.parse('"string"') | |
+ writer_schema = Avro::Schema.parse('"int"') | |
+ assert_false(schema.read?(writer_schema)) | |
+ assert_true(schema.read?(schema)) | |
+ end | |
+ | |
+ def test_be_read | |
+ schema = Avro::Schema.parse('"string"') | |
+ writer_schema = Avro::Schema.parse('"int"') | |
+ assert_false(schema.be_read?(writer_schema)) | |
+ assert_true(schema.be_read?(schema)) | |
+ end | |
+ | |
+ def test_mutual_read | |
+ schema = Avro::Schema.parse('"string"') | |
+ writer_schema = Avro::Schema.parse('"int"') | |
+ default1 = Avro::Schema.parse('{"type":"record", "name":"Default", "fields":[{"name":"i", "type":"int", "default": 1}]}') | |
+ default2 = Avro::Schema.parse('{"type":"record", "name":"Default", "fields":[{"name:":"s", "type":"string", "default": ""}]}') | |
+ assert_false(schema.mutual_read?(writer_schema)) | |
+ assert_true(schema.mutual_read?(schema)) | |
+ assert_true(default1.mutual_read?(default2)) | |
+ end | |
+ | |
+ def test_validate_defaults | |
+ exception = assert_raise(Avro::SchemaParseError) do | |
+ hash_to_schema( | |
+ type: 'record', | |
+ name: 'fruits', | |
+ fields: [ | |
+ { | |
+ name: 'veggies', | |
+ type: 'string', | |
+ default: nil | |
+ } | |
+ ] | |
+ ) | |
+ end | |
+ assert_equal('Error validating default for veggies: at . expected type string, got null', | |
+ exception.to_s) | |
+ end | |
+ | |
+ def test_field_default_validation_disabled | |
+ Avro.disable_field_default_validation = true | |
+ assert_nothing_raised do | |
+ hash_to_schema( | |
+ type: 'record', | |
+ name: 'fruits', | |
+ fields: [ | |
+ { | |
+ name: 'veggies', | |
+ type: 'string', | |
+ default: nil | |
+ } | |
+ ] | |
+ ) | |
+ end | |
+ ensure | |
+ Avro.disable_field_default_validation = false | |
+ end | |
+ | |
+ def test_field_default_validation_disabled_via_env | |
+ Avro.disable_field_default_validation = false | |
+ ENV['AVRO_DISABLE_FIELD_DEFAULT_VALIDATION'] = "1" | |
+ | |
+ assert_nothing_raised do | |
+ hash_to_schema( | |
+ type: 'record', | |
+ name: 'fruits', | |
+ fields: [ | |
+ { | |
+ name: 'veggies', | |
+ type: 'string', | |
+ default: nil | |
+ } | |
+ ] | |
+ ) | |
+ end | |
+ ensure | |
+ ENV.delete('AVRO_DISABLE_FIELD_DEFAULT_VALIDATION') | |
+ Avro.disable_field_default_validation = false | |
+ end | |
+ | |
+ def test_validate_record_valid_default | |
+ assert_nothing_raised(Avro::SchemaParseError) do | |
+ hash_to_schema( | |
+ type: 'record', | |
+ name: 'with_subrecord', | |
+ fields: [ | |
+ { | |
+ name: 'sub', | |
+ type: { | |
+ name: 'subrecord', | |
+ type: 'record', | |
+ fields: [ | |
+ { type: 'string', name: 'x' } | |
+ ] | |
+ }, | |
+ default: { | |
+ x: "y" | |
+ } | |
+ } | |
+ ] | |
+ ) | |
+ end | |
+ end | |
+ | |
+ def test_validate_record_invalid_default | |
+ exception = assert_raise(Avro::SchemaParseError) do | |
+ hash_to_schema( | |
+ type: 'record', | |
+ name: 'with_subrecord', | |
+ fields: [ | |
+ { | |
+ name: 'sub', | |
+ type: { | |
+ name: 'subrecord', | |
+ type: 'record', | |
+ fields: [ | |
+ { type: 'string', name: 'x' } | |
+ ] | |
+ }, | |
+ default: { | |
+ a: 1 | |
+ } | |
+ } | |
+ ] | |
+ ) | |
+ end | |
+ assert_equal('Error validating default for sub: at .x expected type string, got null', | |
+ exception.to_s) | |
+ end | |
+ | |
+ def test_validate_union_defaults | |
+ exception = assert_raise(Avro::SchemaParseError) do | |
+ hash_to_schema( | |
+ type: 'record', | |
+ name: 'fruits', | |
+ fields: [ | |
+ { | |
+ name: 'veggies', | |
+ type: %w(string null), | |
+ default: 5 | |
+ } | |
+ ] | |
+ ) | |
+ end | |
+ assert_equal('Error validating default for veggies: at . expected type string, got int with value 5', | |
+ exception.to_s) | |
+ end | |
+ | |
+ def test_validate_union_default_first_type | |
+ exception = assert_raise(Avro::SchemaParseError) do | |
+ hash_to_schema( | |
+ type: 'record', | |
+ name: 'fruits', | |
+ fields: [ | |
+ { | |
+ name: 'veggies', | |
+ type: %w(null string), | |
+ default: 'apple' | |
+ } | |
+ ] | |
+ ) | |
+ end | |
+ assert_equal('Error validating default for veggies: at . expected type null, got string with value "apple"', | |
+ exception.to_s) | |
+ end | |
end | |
diff --git a/lang/ruby/test/test_schema_compatibility.rb b/lang/ruby/test/test_schema_compatibility.rb | |
new file mode 100644 | |
index 00000000..4ec60ef7 | |
--- /dev/null | |
+++ b/lang/ruby/test/test_schema_compatibility.rb | |
@@ -0,0 +1,475 @@ | |
+# Licensed to the Apache Software Foundation (ASF) under one | |
+# or more contributor license agreements. See the NOTICE file | |
+# distributed with this work for additional information | |
+# regarding copyright ownership. The ASF licenses this file | |
+# to you under the Apache License, Version 2.0 (the | |
+# "License"); you may not use this file except in compliance | |
+# with the License. You may obtain a copy of the License at | |
+# | |
+# http://www.apache.org/licenses/LICENSE-2.0 | |
+# | |
+# Unless required by applicable law or agreed to in writing, software | |
+# distributed under the License is distributed on an "AS IS" BASIS, | |
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
+# See the License for the specific language governing permissions and | |
+# limitations under the License. | |
+ | |
+require 'test_help' | |
+ | |
+class TestSchemaCompatibility < Test::Unit::TestCase | |
+ | |
+ def test_primitive_schema_compatibility | |
+ Avro::Schema::PRIMITIVE_TYPES.each do |schema_type| | |
+ assert_true(can_read?(send("#{schema_type}_schema"), send("#{schema_type}_schema"))) | |
+ end | |
+ end | |
+ | |
+ def test_compatible_reader_writer_pairs | |
+ [ | |
+ long_schema, int_schema, | |
+ float_schema, int_schema, | |
+ float_schema, long_schema, | |
+ double_schema, long_schema, | |
+ double_schema, int_schema, | |
+ double_schema, float_schema, | |
+ | |
+ int_array_schema, int_array_schema, | |
+ long_array_schema, int_array_schema, | |
+ int_map_schema, int_map_schema, | |
+ long_map_schema, int_map_schema, | |
+ | |
+ enum1_ab_schema, enum1_ab_schema, | |
+ enum1_abc_schema, enum1_ab_schema, | |
+ | |
+ string_schema, bytes_schema, | |
+ bytes_schema, string_schema, | |
+ | |
+ empty_union_schema, empty_union_schema, | |
+ int_union_schema, int_union_schema, | |
+ int_string_union_schema, string_int_union_schema, | |
+ int_union_schema, empty_union_schema, | |
+ long_union_schema, int_union_schema, | |
+ | |
+ int_union_schema, int_schema, | |
+ int_schema, int_union_schema, | |
+ | |
+ empty_record1_schema, empty_record1_schema, | |
+ empty_record1_schema, a_int_record1_schema, | |
+ | |
+ a_int_record1_schema, a_int_record1_schema, | |
+ a_dint_record1_schema, a_int_record1_schema, | |
+ a_dint_record1_schema, a_dint_record1_schema, | |
+ a_int_record1_schema, a_dint_record1_schema, | |
+ | |
+ a_long_record1_schema, a_int_record1_schema, | |
+ | |
+ a_int_record1_schema, a_int_b_int_record1_schema, | |
+ a_dint_record1_schema, a_int_b_int_record1_schema, | |
+ | |
+ a_int_b_dint_record1_schema, a_int_record1_schema, | |
+ a_dint_b_dint_record1_schema, empty_record1_schema, | |
+ a_dint_b_dint_record1_schema, a_int_record1_schema, | |
+ a_int_b_int_record1_schema, a_dint_b_dint_record1_schema, | |
+ | |
+ int_list_record_schema, int_list_record_schema, | |
+ long_list_record_schema, long_list_record_schema, | |
+ long_list_record_schema, int_list_record_schema, | |
+ | |
+ null_schema, null_schema, | |
+ | |
+ nested_optional_record, nested_record | |
+ ].each_slice(2) do |(reader, writer)| | |
+ assert_true(can_read?(writer, reader), "expecting #{reader} to read #{writer}") | |
+ end | |
+ end | |
+ | |
+ def test_broken | |
+ assert_false(can_read?(int_string_union_schema, int_union_schema)) | |
+ end | |
+ | |
+ def test_incompatible_reader_writer_pairs | |
+ [ | |
+ null_schema, int_schema, | |
+ null_schema, long_schema, | |
+ | |
+ boolean_schema, int_schema, | |
+ | |
+ int_schema, null_schema, | |
+ int_schema, boolean_schema, | |
+ int_schema, long_schema, | |
+ int_schema, float_schema, | |
+ int_schema, double_schema, | |
+ | |
+ long_schema, float_schema, | |
+ long_schema, double_schema, | |
+ | |
+ float_schema, double_schema, | |
+ | |
+ string_schema, boolean_schema, | |
+ string_schema, int_schema, | |
+ | |
+ bytes_schema, null_schema, | |
+ bytes_schema, int_schema, | |
+ | |
+ int_array_schema, long_array_schema, | |
+ int_map_schema, int_array_schema, | |
+ int_array_schema, int_map_schema, | |
+ int_map_schema, long_map_schema, | |
+ | |
+ enum1_ab_schema, enum1_abc_schema, | |
+ enum1_bc_schema, enum1_abc_schema, | |
+ | |
+ enum1_ab_schema, enum2_ab_schema, | |
+ int_schema, enum2_ab_schema, | |
+ enum2_ab_schema, int_schema, | |
+ | |
+ int_union_schema, int_string_union_schema, | |
+ string_union_schema, int_string_union_schema, | |
+ | |
+ empty_record2_schema, empty_record1_schema, | |
+ a_int_record1_schema, empty_record1_schema, | |
+ a_int_b_dint_record1_schema, empty_record1_schema, | |
+ | |
+ int_list_record_schema, long_list_record_schema, | |
+ | |
+ null_schema, int_schema, | |
+ | |
+ nested_record, nested_optional_record | |
+ ].each_slice(2) do |(reader, writer)| | |
+ assert_false(can_read?(writer, reader), "expecting #{reader} not to read #{writer}") | |
+ end | |
+ end | |
+ | |
+ def writer_schema | |
+ Avro::Schema.parse <<-SCHEMA | |
+ {"type":"record", "name":"Record", "fields":[ | |
+ {"name":"oldfield1", "type":"int"}, | |
+ {"name":"oldfield2", "type":"string"} | |
+ ]} | |
+ SCHEMA | |
+ end | |
+ | |
+ def test_missing_field | |
+ reader_schema = Avro::Schema.parse <<-SCHEMA | |
+ {"type":"record", "name":"Record", "fields":[ | |
+ {"name":"oldfield1", "type":"int"} | |
+ ]} | |
+ SCHEMA | |
+ assert_true(can_read?(writer_schema, reader_schema)) | |
+ assert_false(can_read?(reader_schema, writer_schema)) | |
+ end | |
+ | |
+ def test_missing_second_field | |
+ reader_schema = Avro::Schema.parse <<-SCHEMA | |
+ {"type":"record", "name":"Record", "fields":[ | |
+ {"name":"oldfield2", "type":"string"} | |
+ ]} | |
+ SCHEMA | |
+ assert_true(can_read?(writer_schema, reader_schema)) | |
+ assert_false(can_read?(reader_schema, writer_schema)) | |
+ end | |
+ | |
+ def test_all_fields | |
+ reader_schema = Avro::Schema.parse <<-SCHEMA | |
+ {"type":"record", "name":"Record", "fields":[ | |
+ {"name":"oldfield1", "type":"int"}, | |
+ {"name":"oldfield2", "type":"string"} | |
+ ]} | |
+ SCHEMA | |
+ assert_true(can_read?(writer_schema, reader_schema)) | |
+ assert_true(can_read?(reader_schema, writer_schema)) | |
+ end | |
+ | |
+ def test_new_field_with_default | |
+ reader_schema = Avro::Schema.parse <<-SCHEMA | |
+ {"type":"record", "name":"Record", "fields":[ | |
+ {"name":"oldfield1", "type":"int"}, | |
+ {"name":"newfield1", "type":"int", "default":42} | |
+ ]} | |
+ SCHEMA | |
+ assert_true(can_read?(writer_schema, reader_schema)) | |
+ assert_false(can_read?(reader_schema, writer_schema)) | |
+ end | |
+ | |
+ def test_new_field | |
+ reader_schema = Avro::Schema.parse <<-SCHEMA | |
+ {"type":"record", "name":"Record", "fields":[ | |
+ {"name":"oldfield1", "type":"int"}, | |
+ {"name":"newfield1", "type":"int"} | |
+ ]} | |
+ SCHEMA | |
+ assert_false(can_read?(writer_schema, reader_schema)) | |
+ assert_false(can_read?(reader_schema, writer_schema)) | |
+ end | |
+ | |
+ def test_array_writer_schema | |
+ valid_reader = string_array_schema | |
+ invalid_reader = string_map_schema | |
+ | |
+ assert_true(can_read?(string_array_schema, valid_reader)) | |
+ assert_false(can_read?(string_array_schema, invalid_reader)) | |
+ end | |
+ | |
+ def test_primitive_writer_schema | |
+ valid_reader = string_schema | |
+ assert_true(can_read?(string_schema, valid_reader)) | |
+ assert_false(can_read?(int_schema, string_schema)) | |
+ end | |
+ | |
+ def test_union_reader_writer_subset_incompatiblity | |
+ # reader union schema must contain all writer union branches | |
+ union_writer = union_schema(int_schema, string_schema) | |
+ union_reader = union_schema(string_schema) | |
+ | |
+ assert_false(can_read?(union_writer, union_reader)) | |
+ assert_true(can_read?(union_reader, union_writer)) | |
+ end | |
+ | |
+ def test_incompatible_record_field | |
+ string_schema = Avro::Schema.parse <<-SCHEMA | |
+ {"type":"record", "name":"MyRecord", "namespace":"ns", "fields": [ | |
+ {"name":"field1", "type":"string"} | |
+ ]} | |
+ SCHEMA | |
+ int_schema = Avro::Schema.parse <<-SCHEMA2 | |
+ {"type":"record", "name":"MyRecord", "namespace":"ns", "fields": [ | |
+ {"name":"field1", "type":"int"} | |
+ ]} | |
+ SCHEMA2 | |
+ assert_false(can_read?(string_schema, int_schema)) | |
+ end | |
+ | |
+ def test_enum_symbols | |
+ enum_schema1 = Avro::Schema.parse <<-SCHEMA | |
+ {"type":"enum", "name":"MyEnum", "symbols":["A","B"]} | |
+ SCHEMA | |
+ enum_schema2 = Avro::Schema.parse <<-SCHEMA | |
+ {"type":"enum", "name":"MyEnum", "symbols":["A","B","C"]} | |
+ SCHEMA | |
+ assert_false(can_read?(enum_schema2, enum_schema1)) | |
+ assert_true(can_read?(enum_schema1, enum_schema2)) | |
+ end | |
+ | |
+ # Tests from lang/java/avro/src/test/java/org/apache/avro/io/parsing/TestResolvingGrammarGenerator2.java | |
+ | |
+ def point_2d_schema | |
+ Avro::Schema.parse <<-SCHEMA | |
+ {"type":"record", "name":"Point2D", "fields":[ | |
+ {"name":"x", "type":"double"}, | |
+ {"name":"y", "type":"double"} | |
+ ]} | |
+ SCHEMA | |
+ end | |
+ | |
+ def point_2d_fullname_schema | |
+ Avro::Schema.parse <<-SCHEMA | |
+ {"type":"record", "name":"Point", "namespace":"written", "fields":[ | |
+ {"name":"x", "type":"double"}, | |
+ {"name":"y", "type":"double"} | |
+ ]} | |
+ SCHEMA | |
+ end | |
+ | |
+ def point_3d_no_default_schema | |
+ Avro::Schema.parse <<-SCHEMA | |
+ {"type":"record", "name":"Point", "fields":[ | |
+ {"name":"x", "type":"double"}, | |
+ {"name":"y", "type":"double"}, | |
+ {"name":"z", "type":"double"} | |
+ ]} | |
+ SCHEMA | |
+ end | |
+ | |
+ def point_3d_schema | |
+ Avro::Schema.parse <<-SCHEMA | |
+ {"type":"record", "name":"Point3D", "fields":[ | |
+ {"name":"x", "type":"double"}, | |
+ {"name":"y", "type":"double"}, | |
+ {"name":"z", "type":"double", "default": 0.0} | |
+ ]} | |
+ SCHEMA | |
+ end | |
+ | |
+ def point_3d_match_name_schema | |
+ Avro::Schema.parse <<-SCHEMA | |
+ {"type":"record", "name":"Point", "fields":[ | |
+ {"name":"x", "type":"double"}, | |
+ {"name":"y", "type":"double"}, | |
+ {"name":"z", "type":"double", "default": 0.0} | |
+ ]} | |
+ SCHEMA | |
+ end | |
+ | |
+ def test_union_resolution_no_structure_match | |
+ # short name match, but no structure match | |
+ read_schema = union_schema(null_schema, point_3d_no_default_schema) | |
+ assert_false(can_read?(point_2d_fullname_schema, read_schema)) | |
+ end | |
+ | |
+ def test_union_resolution_first_structure_match_2d | |
+ # multiple structure matches with no name matches | |
+ read_schema = union_schema(null_schema, point_3d_no_default_schema, point_2d_schema, point_3d_schema) | |
+ assert_false(can_read?(point_2d_fullname_schema, read_schema)) | |
+ end | |
+ | |
+ def test_union_resolution_first_structure_match_3d | |
+ # multiple structure matches with no name matches | |
+ read_schema = union_schema(null_schema, point_3d_no_default_schema, point_3d_schema, point_2d_schema) | |
+ assert_false(can_read?(point_2d_fullname_schema, read_schema)) | |
+ end | |
+ | |
+ def test_union_resolution_named_structure_match | |
+ # multiple structure matches with a short name match | |
+ read_schema = union_schema(null_schema, point_2d_schema, point_3d_match_name_schema, point_3d_schema) | |
+ assert_false(can_read?(point_2d_fullname_schema, read_schema)) | |
+ end | |
+ | |
+ def test_union_resolution_full_name_match | |
+ # there is a full name match that should be chosen | |
+ read_schema = union_schema(null_schema, point_2d_schema, point_3d_match_name_schema, point_3d_schema, point_2d_fullname_schema) | |
+ assert_true(can_read?(point_2d_fullname_schema, read_schema)) | |
+ end | |
+ | |
+ def can_read?(writer, reader) | |
+ Avro::SchemaCompatibility.can_read?(writer, reader) | |
+ end | |
+ | |
+ def union_schema(*schemas) | |
+ schemas ||= [] | |
+ Avro::Schema.parse("[#{schemas.map(&:to_s).join(',')}]") | |
+ end | |
+ | |
+ Avro::Schema::PRIMITIVE_TYPES.each do |schema_type| | |
+ define_method("#{schema_type}_schema") do | |
+ Avro::Schema.parse("\"#{schema_type}\"") | |
+ end | |
+ end | |
+ | |
+ def int_array_schema | |
+ Avro::Schema.parse('{"type":"array", "items":"int"}') | |
+ end | |
+ | |
+ def long_array_schema | |
+ Avro::Schema.parse('{"type":"array", "items":"long"}') | |
+ end | |
+ | |
+ def string_array_schema | |
+ Avro::Schema.parse('{"type":"array", "items":"string"}') | |
+ end | |
+ | |
+ def int_map_schema | |
+ Avro::Schema.parse('{"type":"map", "values":"int"}') | |
+ end | |
+ | |
+ def long_map_schema | |
+ Avro::Schema.parse('{"type":"map", "values":"long"}') | |
+ end | |
+ | |
+ def string_map_schema | |
+ Avro::Schema.parse('{"type":"map", "values":"string"}') | |
+ end | |
+ | |
+ def enum1_ab_schema | |
+ Avro::Schema.parse('{"type":"enum", "name":"Enum1", "symbols":["A","B"]}') | |
+ end | |
+ | |
+ def enum1_abc_schema | |
+ Avro::Schema.parse('{"type":"enum", "name":"Enum1", "symbols":["A","B","C"]}') | |
+ end | |
+ | |
+ def enum1_bc_schema | |
+ Avro::Schema.parse('{"type":"enum", "name":"Enum1", "symbols":["B","C"]}') | |
+ end | |
+ | |
+ def enum2_ab_schema | |
+ Avro::Schema.parse('{"type":"enum", "name":"Enum2", "symbols":["A","B"]}') | |
+ end | |
+ | |
+ def empty_record1_schema | |
+ Avro::Schema.parse('{"type":"record", "name":"Record1"}') | |
+ end | |
+ | |
+ def empty_record2_schema | |
+ Avro::Schema.parse('{"type":"record", "name":"Record2"}') | |
+ end | |
+ | |
+ def a_int_record1_schema | |
+ Avro::Schema.parse('{"type":"record", "name":"Record1", "fields":[{"name":"a", "type":"int"}]}') | |
+ end | |
+ | |
+ def a_long_record1_schema | |
+ Avro::Schema.parse('{"type":"record", "name":"Record1", "fields":[{"name":"a", "type":"long"}]}') | |
+ end | |
+ | |
+ def a_int_b_int_record1_schema | |
+ Avro::Schema.parse('{"type":"record", "name":"Record1", "fields":[{"name":"a", "type":"int"}, {"name":"b", "type":"int"}]}') | |
+ end | |
+ | |
+ def a_dint_record1_schema | |
+ Avro::Schema.parse('{"type":"record", "name":"Record1", "fields":[{"name":"a", "type":"int", "default":0}]}') | |
+ end | |
+ | |
+ def a_int_b_dint_record1_schema | |
+ Avro::Schema.parse('{"type":"record", "name":"Record1", "fields":[{"name":"a", "type":"int"}, {"name":"b", "type":"int", "default":0}]}') | |
+ end | |
+ | |
+ def a_dint_b_dint_record1_schema | |
+ Avro::Schema.parse('{"type":"record", "name":"Record1", "fields":[{"name":"a", "type":"int", "default":0}, {"name":"b", "type":"int", "default":0}]}') | |
+ end | |
+ | |
+ def nested_record | |
+ Avro::Schema.parse('{"type":"record","name":"parent","fields":[{"name":"attribute","type":{"type":"record","name":"child","fields":[{"name":"id","type":"string"}]}}]}') | |
+ end | |
+ | |
+ def nested_optional_record | |
+ Avro::Schema.parse('{"type":"record","name":"parent","fields":[{"name":"attribute","type":["null",{"type":"record","name":"child","fields":[{"name":"id","type":"string"}]}],"default":null}]}') | |
+ end | |
+ | |
+ def int_list_record_schema | |
+ Avro::Schema.parse <<-SCHEMA | |
+ { | |
+ "type":"record", "name":"List", "fields": [ | |
+ {"name": "head", "type": "int"}, | |
+ {"name": "tail", "type": "List"} | |
+ ]} | |
+ SCHEMA | |
+ end | |
+ | |
+ def long_list_record_schema | |
+ Avro::Schema.parse <<-SCHEMA | |
+ { | |
+ "type":"record", "name":"List", "fields": [ | |
+ {"name": "head", "type": "long"}, | |
+ {"name": "tail", "type": "List"} | |
+ ]} | |
+ SCHEMA | |
+ end | |
+ | |
+ def empty_union_schema | |
+ union_schema | |
+ end | |
+ | |
+ def null_union_schema | |
+ union_schema(null_schema) | |
+ end | |
+ | |
+ def int_union_schema | |
+ union_schema(int_schema) | |
+ end | |
+ | |
+ def long_union_schema | |
+ union_schema(long_schema) | |
+ end | |
+ | |
+ def string_union_schema | |
+ union_schema(string_schema) | |
+ end | |
+ | |
+ def int_string_union_schema | |
+ union_schema(int_schema, string_schema) | |
+ end | |
+ | |
+ def string_int_union_schema | |
+ union_schema(string_schema, int_schema) | |
+ end | |
+end | |
diff --git a/lang/ruby/test/test_schema_validator.rb b/lang/ruby/test/test_schema_validator.rb | |
new file mode 100644 | |
index 00000000..0126c354 | |
--- /dev/null | |
+++ b/lang/ruby/test/test_schema_validator.rb | |
@@ -0,0 +1,554 @@ | |
+# Licensed to the Apache Software Foundation (ASF) under one | |
+# or more contributor license agreements. See the NOTICE file | |
+# distributed with this work for additional information | |
+# regarding copyright ownership. The ASF licenses this file | |
+# to you under the Apache License, Version 2.0 (the | |
+# "License"); you may not use this file except in compliance | |
+# with the License. You may obtain a copy of the License at | |
+# | |
+# http://www.apache.org/licenses/LICENSE-2.0 | |
+# | |
+# Unless required by applicable law or agreed to in writing, software | |
+# distributed under the License is distributed on an "AS IS" BASIS, | |
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
+# See the License for the specific language governing permissions and | |
+# limitations under the License. | |
+ | |
+require 'test_help' | |
+ | |
+class TestSchema < Test::Unit::TestCase | |
+ def validate!(schema, value, options=nil) | |
+ Avro::SchemaValidator.validate!(schema, value, options) | |
+ end | |
+ | |
+ def validate_simple!(schema, value) | |
+ Avro::SchemaValidator.validate!(schema, value, recursive: false) | |
+ end | |
+ | |
+ def hash_to_schema(hash) | |
+ Avro::Schema.parse(hash.to_json) | |
+ end | |
+ | |
+ def assert_failed_validation(messages) | |
+ error = assert_raise(Avro::SchemaValidator::ValidationError) { yield } | |
+ | |
+ assert_messages = [messages].flatten | |
+ result_errors = error.result.errors | |
+ assert_messages.each do |message| | |
+ assert(result_errors.include?(message), "expected '#{message}' to be in '#{result_errors}'") | |
+ end | |
+ assert_equal(assert_messages.size, result_errors.size) | |
+ end | |
+ | |
+ def assert_valid_schema(schema, valid, invalid, simple = false) | |
+ valid.each do |value| | |
+ assert_nothing_raised { Avro::SchemaValidator.validate!(schema, value) } | |
+ assert_nothing_raised { Avro::SchemaValidator.validate!(schema, value, recursive: false) } if simple | |
+ end | |
+ | |
+ invalid.each do |value| | |
+ assert_raise { Avro::SchemaValidator.validate!(schema, value) } | |
+ assert_raise { Avro::SchemaValidator.validate!(schema, value, recursive: false) } if simple | |
+ assert_nothing_raised { Avro::SchemaValidator.validate!(schema, value, recursive: false) } unless simple | |
+ end | |
+ end | |
+ | |
+ def test_validate_nil | |
+ schema = hash_to_schema(type: 'null', name: 'name') | |
+ | |
+ assert_nothing_raised { validate!(schema, nil) } | |
+ assert_nothing_raised { validate_simple!(schema, nil) } | |
+ | |
+ assert_failed_validation('at . expected type null, got int with value 1') do | |
+ validate!(schema, 1) | |
+ end | |
+ | |
+ assert_failed_validation('at . expected type null, got int with value 1') do | |
+ validate_simple!(schema, 1) | |
+ end | |
+ end | |
+ | |
+ def test_validate_boolean | |
+ schema = hash_to_schema(type: 'boolean', name: 'name') | |
+ | |
+ assert_nothing_raised { validate!(schema, true) } | |
+ assert_nothing_raised { validate!(schema, false) } | |
+ assert_nothing_raised { validate_simple!(schema, true) } | |
+ assert_nothing_raised { validate_simple!(schema, false) } | |
+ | |
+ assert_failed_validation('at . expected type boolean, got int with value 1') do | |
+ validate!(schema, 1) | |
+ end | |
+ assert_failed_validation('at . expected type boolean, got int with value 1') do | |
+ validate_simple!(schema, 1) | |
+ end | |
+ | |
+ assert_failed_validation('at . expected type boolean, got null') do | |
+ validate!(schema, nil) | |
+ end | |
+ assert_failed_validation('at . expected type boolean, got null') do | |
+ validate_simple!(schema, nil) | |
+ end | |
+ end | |
+ | |
+ def test_fixed_size_string | |
+ schema = hash_to_schema(type: 'fixed', name: 'some', size: 3) | |
+ | |
+ assert_nothing_raised { validate!(schema, 'baf') } | |
+ assert_nothing_raised { validate_simple!(schema, 'baf') } | |
+ | |
+ assert_failed_validation('at . expected fixed with size 3, got "some" with size 4') do | |
+ validate!(schema, 'some') | |
+ end | |
+ assert_failed_validation('at . expected fixed with size 3, got "some" with size 4') do | |
+ validate_simple!(schema, 'some') | |
+ end | |
+ | |
+ assert_failed_validation('at . expected fixed with size 3, got null') do | |
+ validate!(schema, nil) | |
+ end | |
+ assert_failed_validation('at . expected fixed with size 3, got null') do | |
+ validate_simple!(schema, nil) | |
+ end | |
+ | |
+ assert_failed_validation("at . expected fixed with size 3, got \"a\u2014b\" with size 5") do | |
+ validate!(schema, "a\u2014b") | |
+ end | |
+ assert_failed_validation("at . expected fixed with size 3, got \"a\u2014b\" with size 5") do | |
+ validate_simple!(schema, "a\u2014b") | |
+ end | |
+ end | |
+ | |
+ def test_original_validate_nil | |
+ schema = hash_to_schema(type: 'null', name: 'name') | |
+ | |
+ assert_valid_schema(schema, [nil], ['something'], true) | |
+ end | |
+ | |
+ def test_original_validate_boolean | |
+ schema = hash_to_schema(type: 'boolean', name: 'name') | |
+ | |
+ assert_valid_schema(schema, [true, false], [nil, 1], true) | |
+ end | |
+ | |
+ def test_validate_string | |
+ schema = hash_to_schema(type: 'string', name: 'name') | |
+ | |
+ assert_valid_schema(schema, ['string'], [nil, 1], true) | |
+ end | |
+ | |
+ def test_validate_bytes | |
+ schema = hash_to_schema(type: 'bytes', name: 'name') | |
+ | |
+ assert_valid_schema(schema, ['string'], [nil, 1], true) | |
+ end | |
+ | |
+ def test_validate_int | |
+ schema = hash_to_schema(type: 'int', name: 'name') | |
+ | |
+ assert_valid_schema( | |
+ schema, | |
+ [Avro::Schema::INT_MIN_VALUE, Avro::Schema::INT_MAX_VALUE, 1], | |
+ [Avro::Schema::LONG_MIN_VALUE, Avro::Schema::LONG_MAX_VALUE, 'string'], | |
+ true | |
+ ) | |
+ assert_failed_validation('at . out of bound value 9223372036854775807') do | |
+ validate!(schema, Avro::Schema::LONG_MAX_VALUE) | |
+ end | |
+ assert_failed_validation('at . out of bound value 9223372036854775807') do | |
+ validate_simple!(schema, Avro::Schema::LONG_MAX_VALUE) | |
+ end | |
+ end | |
+ | |
+ def test_validate_long | |
+ schema = hash_to_schema(type: 'long', name: 'name') | |
+ | |
+ assert_valid_schema(schema, [Avro::Schema::LONG_MIN_VALUE, Avro::Schema::LONG_MAX_VALUE, 1], [1.1, 'string'], true) | |
+ end | |
+ | |
+ def test_validate_float | |
+ schema = hash_to_schema(type: 'float', name: 'name') | |
+ | |
+ assert_valid_schema(schema, [1.1, 1, Avro::Schema::LONG_MAX_VALUE], ['string'], true) | |
+ end | |
+ | |
+ def test_validate_double | |
+ schema = hash_to_schema(type: 'double', name: 'name') | |
+ | |
+ assert_valid_schema(schema, [1.1, 1, Avro::Schema::LONG_MAX_VALUE], ['string'], true) | |
+ end | |
+ | |
+ def test_validate_fixed | |
+ schema = hash_to_schema(type: 'fixed', name: 'name', size: 3) | |
+ | |
+ assert_valid_schema(schema, ['abc'], ['ab', 1, 1.1, true], true) | |
+ end | |
+ | |
+ def test_validate_original_num | |
+ schema = hash_to_schema(type: 'enum', name: 'name', symbols: %w(a b)) | |
+ | |
+ assert_valid_schema(schema, ['a', 'b'], ['c'], true) | |
+ end | |
+ | |
+ def test_validate_record | |
+ schema = hash_to_schema(type: 'record', name: 'name', fields: [{ type: 'null', name: 'sub' }]) | |
+ | |
+ assert_valid_schema(schema, [{ 'sub' => nil }], [{ 'sub' => 1 }]) | |
+ end | |
+ | |
+ def test_validate_shallow_record | |
+ schema = hash_to_schema( | |
+ type: 'record', name: 'name', fields: [{ type: 'int', name: 'sub' }] | |
+ ) | |
+ | |
+ assert_nothing_raised { validate!(schema, 'sub' => 1) } | |
+ assert_nothing_raised { validate_simple!(schema, 'sub' => 1) } | |
+ | |
+ assert_failed_validation('at .sub expected type int, got null') do | |
+ validate!(schema, {}) | |
+ end | |
+ assert_nothing_raised { validate_simple!(schema, {}) } | |
+ | |
+ assert_failed_validation('at . expected type record, got float with value 1.2') do | |
+ validate!(schema, 1.2) | |
+ end | |
+ assert_nothing_raised { validate_simple!(schema, 1.2) } | |
+ | |
+ assert_failed_validation('at .sub expected type int, got float with value 1.2') do | |
+ validate!(schema, 'sub' => 1.2) | |
+ end | |
+ assert_nothing_raised { validate_simple!(schema, 'sub' => 1.2) } | |
+ end | |
+ | |
+ def test_validate_array | |
+ schema = hash_to_schema(type: 'array', | |
+ name: 'person', | |
+ items: [{ type: 'int', name: 'height' }]) | |
+ | |
+ assert_nothing_raised { validate!(schema, []) } | |
+ assert_nothing_raised { validate_simple!(schema, []) } | |
+ | |
+ assert_failed_validation 'at . expected type array, got null' do | |
+ validate!(schema, nil) | |
+ end | |
+ assert_nothing_raised { validate_simple!(schema, nil) } | |
+ | |
+ assert_failed_validation('at .[0] expected type int, got null') do | |
+ validate!(schema, [nil]) | |
+ end | |
+ assert_nothing_raised { validate_simple!(schema, [nil]) } | |
+ | |
+ assert_failed_validation('at .[3] expected type int, got string with value "so wrong"') do | |
+ validate!(schema, [1, 3, 9, 'so wrong']) | |
+ end | |
+ assert_nothing_raised { validate_simple!(schema, [1, 3, 9, 'so wrong']) } | |
+ end | |
+ | |
+ def test_validate_enum | |
+ schema = hash_to_schema(type: 'enum', | |
+ name: 'person', | |
+ symbols: %w(one two three)) | |
+ | |
+ assert_nothing_raised { validate!(schema, 'one') } | |
+ assert_nothing_raised { validate_simple!(schema, 'one') } | |
+ | |
+ assert_failed_validation('at . expected enum with values ["one", "two", "three"], got string with value "five"') do | |
+ validate!(schema, 'five') | |
+ end | |
+ assert_failed_validation('at . expected enum with values ["one", "two", "three"], got string with value "five"') do | |
+ validate_simple!(schema, 'five') | |
+ end | |
+ end | |
+ | |
+ def test_validate_union_on_primitive_types | |
+ schema = hash_to_schema( | |
+ name: 'should_not_matter', | |
+ type: 'record', | |
+ fields: [ | |
+ { name: 'what_ever', type: %w(long string) } | |
+ ] | |
+ ) | |
+ | |
+ assert_failed_validation('at .what_ever expected union of [\'long\', \'string\'], got null') { | |
+ validate!(schema, 'what_ever' => nil) | |
+ } | |
+ assert_nothing_raised { validate_simple!(schema, 'what_ever' => nil) } | |
+ end | |
+ | |
+ def test_validate_union_of_nil_and_record_inside_array | |
+ schema = hash_to_schema( | |
+ name: 'this does not matter', | |
+ type: 'record', | |
+ fields: [ | |
+ { | |
+ name: 'person', | |
+ type: { | |
+ name: 'person_entry', | |
+ type: 'record', | |
+ fields: [ | |
+ { | |
+ name: 'houses', | |
+ type: [ | |
+ 'null', | |
+ { | |
+ name: 'houses_entry', | |
+ type: 'array', | |
+ items: [ | |
+ { | |
+ name: 'house_entry', | |
+ type: 'record', | |
+ fields: [ | |
+ { name: 'number_of_rooms', type: 'long' } | |
+ ] | |
+ } | |
+ ] | |
+ } | |
+ ], | |
+ } | |
+ ] | |
+ } | |
+ } | |
+ ] | |
+ ) | |
+ | |
+ assert_failed_validation('at .person expected type record, got null') { | |
+ validate!(schema, 'not at all' => nil) | |
+ } | |
+ assert_nothing_raised { validate_simple!(schema, 'person' => {}) } | |
+ | |
+ assert_nothing_raised { validate!(schema, 'person' => {}) } | |
+ assert_nothing_raised { validate!(schema, 'person' => { houses: [] }) } | |
+ assert_nothing_raised { validate!(schema, 'person' => { 'houses' => [{ 'number_of_rooms' => 1 }] }) } | |
+ | |
+ assert_nothing_raised { validate_simple!(schema, 'person' => {}) } | |
+ assert_nothing_raised { validate_simple!(schema, 'person' => { houses: [] }) } | |
+ assert_nothing_raised { validate_simple!(schema, 'person' => { 'houses' => [{ 'number_of_rooms' => 1 }] }) } | |
+ | |
+ message = 'at .person.houses[1].number_of_rooms expected type long, got string with value "not valid at all"' | |
+ datum = { | |
+ 'person' => { | |
+ 'houses' => [ | |
+ { 'number_of_rooms' => 2 }, | |
+ { 'number_of_rooms' => 'not valid at all' } | |
+ ] | |
+ } | |
+ } | |
+ assert_failed_validation(message) { validate!(schema, datum) } | |
+ assert_nothing_raised { validate_simple!(schema, datum) } | |
+ end | |
+ | |
+ def test_validate_map | |
+ schema = hash_to_schema(type: 'map', | |
+ name: 'numbers', | |
+ values: [ | |
+ { name: 'some', type: 'int' } | |
+ ]) | |
+ | |
+ assert_nothing_raised { validate!(schema, 'some' => 1) } | |
+ assert_nothing_raised { validate_simple!(schema, 'some' => 1) } | |
+ | |
+ assert_failed_validation('at .some expected type int, got string with value "nope"') do | |
+ validate!(schema, 'some' => 'nope') | |
+ end | |
+ assert_nothing_raised { validate_simple!(schema, 'some' => 'nope')} | |
+ | |
+ assert_failed_validation("at . unexpected key type 'Symbol' in map") do | |
+ validate!(schema, some: 1) | |
+ end | |
+ assert_nothing_raised { validate_simple!(schema, some: 1) } | |
+ | |
+ assert_failed_validation('at . expected type map, got null') do | |
+ validate!(schema, nil) | |
+ end | |
+ assert_nothing_raised { validate_simple!(schema, nil) } | |
+ end | |
+ | |
+ def test_validate_deep_record | |
+ schema = hash_to_schema(type: 'record', | |
+ name: 'person', | |
+ fields: [ | |
+ { | |
+ name: 'head', | |
+ type: { | |
+ name: 'head', | |
+ type: 'record', | |
+ fields: [ | |
+ { | |
+ name: 'hair', | |
+ type: { | |
+ name: 'hair', | |
+ type: 'record', | |
+ fields: [ | |
+ { | |
+ name: 'color', | |
+ type: 'string' | |
+ } | |
+ ] | |
+ } | |
+ } | |
+ ] | |
+ } | |
+ } | |
+ ]) | |
+ | |
+ assert_nothing_raised { validate!(schema, 'head' => { 'hair' => { 'color' => 'black' } }) } | |
+ assert_nothing_raised { validate_simple!(schema, 'head' => { 'hair' => { 'color' => 'black' } }) } | |
+ | |
+ assert_failed_validation('at .head.hair.color expected type string, got null') do | |
+ validate!(schema, 'head' => { 'hair' => { 'color' => nil } }) | |
+ end | |
+ assert_nothing_raised { validate_simple!(schema, 'head' => { 'hair' => { 'color' => nil } }) } | |
+ | |
+ assert_failed_validation('at .head.hair.color expected type string, got null') do | |
+ validate!(schema, 'head' => { 'hair' => {} }) | |
+ end | |
+ assert_nothing_raised { validate_simple!(schema, 'head' => { 'hair' => {} }) } | |
+ | |
+ assert_failed_validation('at .head.hair expected type record, got null') do | |
+ validate!(schema, 'head' => {}) | |
+ end | |
+ assert_nothing_raised { validate_simple!(schema, 'head' => {}) } | |
+ | |
+ assert_failed_validation('at . expected type record, got null') do | |
+ validate!(schema, nil) | |
+ end | |
+ assert_nothing_raised { validate_simple!(schema, nil) } | |
+ end | |
+ | |
+ def test_validate_deep_record_with_array | |
+ schema = hash_to_schema(type: 'record', | |
+ name: 'fruits', | |
+ fields: [ | |
+ { | |
+ name: 'fruits', | |
+ type: { | |
+ name: 'fruits', | |
+ type: 'array', | |
+ items: [ | |
+ { | |
+ name: 'fruit', | |
+ type: 'record', | |
+ fields: [ | |
+ { name: 'name', type: 'string' }, | |
+ { name: 'weight', type: 'float' } | |
+ ] | |
+ } | |
+ ] | |
+ } | |
+ } | |
+ ]) | |
+ assert_nothing_raised { validate!(schema, 'fruits' => [{ 'name' => 'apple', 'weight' => 30.2 }]) } | |
+ assert_nothing_raised { validate_simple!(schema, 'fruits' => [{ 'name' => 'apple', 'weight' => 30.2 }]) } | |
+ | |
+ assert_failed_validation('at .fruits[0].name expected type string, got null') do | |
+ validate!(schema, 'fruits' => [{ 'name' => nil, 'weight' => 30.2 }]) | |
+ end | |
+ assert_nothing_raised { validate_simple!(schema, 'fruits' => [{ 'name' => nil, 'weight' => 30.2 }]) } | |
+ | |
+ assert_failed_validation('at .fruits expected type array, got int with value 1') do | |
+ validate!(schema, 'fruits' => 1) | |
+ end | |
+ assert_nothing_raised { validate_simple!(schema, 'fruits' => 1) } | |
+ end | |
+ | |
+ def test_validate_multiple_errors | |
+ schema = hash_to_schema(type: 'array', | |
+ name: 'ages', | |
+ items: [ | |
+ { type: 'int', name: 'age' } | |
+ ]) | |
+ | |
+ exception = assert_raise(Avro::SchemaValidator::ValidationError) do | |
+ validate!(schema, [nil, 'e']) | |
+ end | |
+ assert_nothing_raised { validate_simple!(schema, [nil, 'e']) } | |
+ assert_equal 2, exception.result.errors.size | |
+ assert_equal( | |
+ "at .[0] expected type int, got null\nat .[1] expected type int, got string with value \"e\"", | |
+ exception.to_s | |
+ ) | |
+ end | |
+ | |
+ def test_validate_extra_fields | |
+ schema = hash_to_schema( | |
+ type: 'record', | |
+ name: 'fruits', | |
+ fields: [ | |
+ { | |
+ name: 'veggies', | |
+ type: 'string' | |
+ } | |
+ ] | |
+ ) | |
+ exception = assert_raise(Avro::SchemaValidator::ValidationError) do | |
+ validate!(schema, {'veggies' => 'tomato', 'bread' => 'rye'}, fail_on_extra_fields: true) | |
+ end | |
+ assert_equal(1, exception.result.errors.size) | |
+ assert_equal("at . extra field 'bread' - not in schema", | |
+ exception.to_s) | |
+ end | |
+ | |
+ def test_validate_subrecord_extra_fields | |
+ schema = hash_to_schema(type: 'record', | |
+ name: 'top', | |
+ fields: [ | |
+ { | |
+ name: 'fruit', | |
+ type: { | |
+ name: 'fruit', | |
+ type: 'record', | |
+ fields: [{ name: 'name', type: 'string' }] | |
+ } | |
+ } | |
+ ]) | |
+ exception = assert_raise(Avro::SchemaValidator::ValidationError) do | |
+ validate!(schema, { 'fruit' => { 'name' => 'orange', 'color' => 'orange' } }, fail_on_extra_fields: true) | |
+ end | |
+ assert_equal(1, exception.result.errors.size) | |
+ assert_equal("at .fruit extra field 'color' - not in schema", exception.to_s) | |
+ end | |
+ | |
+ def test_validate_array_extra_fields | |
+ schema = hash_to_schema(type: 'array', | |
+ items: { | |
+ name: 'fruit', | |
+ type: 'record', | |
+ fields: [{ name: 'name', type: 'string' }] | |
+ }) | |
+ exception = assert_raise(Avro::SchemaValidator::ValidationError) do | |
+ validate!(schema, [{ 'name' => 'orange', 'color' => 'orange' }], fail_on_extra_fields: true) | |
+ end | |
+ assert_equal(1, exception.result.errors.size) | |
+ assert_equal("at .[0] extra field 'color' - not in schema", exception.to_s) | |
+ end | |
+ | |
+ def test_validate_map_extra_fields | |
+ schema = hash_to_schema(type: 'map', | |
+ values: { | |
+ name: 'fruit', | |
+ type: 'record', | |
+ fields: [{ name: 'color', type: 'string' }] | |
+ }) | |
+ exception = assert_raise(Avro::SchemaValidator::ValidationError) do | |
+ validate!(schema, { 'apple' => { 'color' => 'green', 'extra' => 1 } }, fail_on_extra_fields: true) | |
+ end | |
+ assert_equal(1, exception.result.errors.size) | |
+ assert_equal("at .apple extra field 'extra' - not in schema", exception.to_s) | |
+ end | |
+ | |
+ def test_validate_union_extra_fields | |
+ schema = hash_to_schema([ | |
+ 'null', | |
+ { | |
+ type: 'record', | |
+ name: 'fruit', | |
+ fields: [{ name: 'name', type: 'string' }] | |
+ } | |
+ ]) | |
+ exception = assert_raise(Avro::SchemaValidator::ValidationError) do | |
+ validate!(schema, { 'name' => 'apple', 'color' => 'green' }, fail_on_extra_fields: true) | |
+ end | |
+ assert_equal(1, exception.result.errors.size) | |
+ assert_equal("at . extra field 'color' - not in schema", exception.to_s) | |
+ end | |
+end | |
diff --git a/lang/ruby/test/tool.rb b/lang/ruby/test/tool.rb | |
index 111b9d77..1a1f12ee 100644 | |
--- a/lang/ruby/test/tool.rb | |
+++ b/lang/ruby/test/tool.rb | |
@@ -42,7 +42,6 @@ class GenericHandler < WEBrick::HTTPServlet::AbstractServlet | |
writer = Avro::IPC::FramedWriter.new(StringIO.new) | |
writer.write_framed_message(unframed_resp) | |
resp.body = writer.to_s | |
- @server.stop | |
end | |
end | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment