Skip to content

Instantly share code, notes, and snippets.

@mlarraz
Last active May 29, 2019 23:12
Show Gist options
  • Save mlarraz/3e6bef9af50ba090085ca97551c6ec60 to your computer and use it in GitHub Desktop.
Save mlarraz/3e6bef9af50ba090085ca97551c6ec60 to your computer and use it in GitHub Desktop.
avro ruby diff (1.8.2 vs 1.9.0). extracted from https://github.com/apache/avro/compare/release-1.8.2...release-1.9.0
:100644 100644 3f9ddafa 7a61fe1f M lang/ruby/.gitignore
:100644 100644 20543da7 17161f52 M lang/ruby/Gemfile
:100644 100644 8008c46b 9fc48c2a M lang/ruby/Manifest
:100755 100755 8fea5656 a09617c3 M lang/ruby/build.sh
:100644 100644 c419ab1e 5a645663 M lang/ruby/lib/avro.rb
:100644 100644 c27c2dc1 e4650557 M lang/ruby/lib/avro/data_file.rb
:100644 100644 9dc41db4 31107217 M lang/ruby/lib/avro/io.rb
:100644 100644 1ac81294 f0816e83 M lang/ruby/lib/avro/ipc.rb
:000000 100644 00000000 e1b219d7 A lang/ruby/lib/avro/logical_types.rb
:100644 100644 6c210e19 9f509dde M lang/ruby/lib/avro/protocol.rb
:100644 100644 87f6fa48 4a766fdc M lang/ruby/lib/avro/schema.rb
:000000 100644 00000000 37401fc0 A lang/ruby/lib/avro/schema_compatibility.rb
:100644 100644 0a5bee5d 1506fcac M lang/ruby/lib/avro/schema_normalization.rb
:000000 100644 00000000 f9274f06 A lang/ruby/lib/avro/schema_validator.rb
:100644 100644 de2d9c49 a666f338 M lang/ruby/test/case_finder.rb
:100644 100644 9d276f7d 54fa8781 M lang/ruby/test/random_data.rb
:100644 100644 8fe05d70 0f13df35 M lang/ruby/test/test_datafile.rb
:100644 100644 f8f3da13 350d48bc M lang/ruby/test/test_io.rb
:000000 100644 00000000 8cd6819b A lang/ruby/test/test_logical_types.rb
:100644 100644 fda38824 1b036fe5 M lang/ruby/test/test_protocol.rb
:100644 100644 0668cf2f ed1c8eef M lang/ruby/test/test_schema.rb
:000000 100644 00000000 4ec60ef7 A lang/ruby/test/test_schema_compatibility.rb
:000000 100644 00000000 0126c354 A lang/ruby/test/test_schema_validator.rb
:100644 100644 111b9d77 1a1f12ee M lang/ruby/test/tool.rb
diff --git a/lang/ruby/.gitignore b/lang/ruby/.gitignore
index 3f9ddafa..7a61fe1f 100644
--- a/lang/ruby/.gitignore
+++ b/lang/ruby/.gitignore
@@ -1,3 +1,9 @@
tmp
data.avr
Gemfile.lock
+.bundle/
+.gem/
+avro.gemspec
+pkg/
+.ruby-version
+.ruby-gemset
diff --git a/lang/ruby/Gemfile b/lang/ruby/Gemfile
index 20543da7..17161f52 100644
--- a/lang/ruby/Gemfile
+++ b/lang/ruby/Gemfile
@@ -18,3 +18,4 @@ gem 'rake'
gem 'echoe'
gem 'multi_json'
gem 'snappy'
+gem 'test-unit'
diff --git a/lang/ruby/Manifest b/lang/ruby/Manifest
index 8008c46b..9fc48c2a 100644
--- a/lang/ruby/Manifest
+++ b/lang/ruby/Manifest
@@ -9,16 +9,27 @@ lib/avro.rb
lib/avro/data_file.rb
lib/avro/io.rb
lib/avro/ipc.rb
+lib/avro/logical_types.rb
lib/avro/protocol.rb
lib/avro/schema.rb
+lib/avro/schema_compatibility.rb
+lib/avro/schema_normalization.rb
+lib/avro/schema_validator.rb
+test/case_finder.rb
test/random_data.rb
test/sample_ipc_client.rb
test/sample_ipc_http_client.rb
test/sample_ipc_http_server.rb
test/sample_ipc_server.rb
test/test_datafile.rb
+test/test_fingerprints.rb
test/test_help.rb
test/test_io.rb
+test/test_logical_types.rb
test/test_protocol.rb
+test/test_schema.rb
+test/test_schema_compatibility.rb
+test/test_schema_normalization.rb
+test/test_schema_validator.rb
test/test_socket_transport.rb
test/tool.rb
diff --git a/lang/ruby/build.sh b/lang/ruby/build.sh
index 8fea5656..a09617c3 100755
--- a/lang/ruby/build.sh
+++ b/lang/ruby/build.sh
@@ -25,11 +25,11 @@ export GEM_HOME=.gem/
export PATH="$PATH:.gem/bin"
# bootstrap bundler
-gem install --no-rdoc --no-ri bundler
+gem install --no-document -v 1.17.3 bundler
+bundle install
case "$1" in
test)
- bundle install
bundle exec rake test
;;
@@ -39,6 +39,7 @@ case "$1" in
clean)
bundle exec rake clean
+ rm -rf tmp avro.gemspec data.avr
;;
*)
diff --git a/lang/ruby/lib/avro.rb b/lang/ruby/lib/avro.rb
index c419ab1e..5a645663 100644
--- a/lang/ruby/lib/avro.rb
+++ b/lang/ruby/lib/avro.rb
@@ -32,6 +32,15 @@ module Avro
super(msg)
end
end
+
+ class << self
+ attr_writer :disable_field_default_validation
+
+ def disable_field_default_validation
+ @disable_field_default_validation ||=
+ ENV.fetch('AVRO_DISABLE_FIELD_DEFAULT_VALIDATION', '') != ''
+ end
+ end
end
require 'avro/schema'
@@ -40,3 +49,5 @@ require 'avro/data_file'
require 'avro/protocol'
require 'avro/ipc'
require 'avro/schema_normalization'
+require 'avro/schema_validator'
+require 'avro/schema_compatibility'
diff --git a/lang/ruby/lib/avro/data_file.rb b/lang/ruby/lib/avro/data_file.rb
index c27c2dc1..e4650557 100644
--- a/lang/ruby/lib/avro/data_file.rb
+++ b/lang/ruby/lib/avro/data_file.rb
@@ -338,12 +338,29 @@ module Avro
def decompress(data)
load_snappy!
+ crc32 = data.slice(-4..-1).unpack('N').first
+ uncompressed = Snappy.inflate(data.slice(0..-5))
+
+ if crc32 == Zlib.crc32(uncompressed)
+ uncompressed
+ else
+ # older versions of avro-ruby didn't write the checksum, so if it
+ # doesn't match this must assume that it wasn't there and return
+ # the entire payload uncompressed.
+ Snappy.inflate(data)
+ end
+ rescue Snappy::Error
+ # older versions of avro-ruby didn't write the checksum, so removing
+ # the last 4 bytes may cause Snappy to fail. recover by assuming the
+ # payload is from an older file and uncompress the entire buffer.
Snappy.inflate(data)
end
def compress(data)
load_snappy!
- Snappy.deflate(data)
+ crc32 = Zlib.crc32(data)
+ compressed = Snappy.deflate(data)
+ [compressed, crc32].pack('a*N')
end
private
diff --git a/lang/ruby/lib/avro/io.rb b/lang/ruby/lib/avro/io.rb
index 9dc41db4..31107217 100644
--- a/lang/ruby/lib/avro/io.rb
+++ b/lang/ruby/lib/avro/io.rb
@@ -5,9 +5,9 @@
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
-#
+#
# http://www.apache.org/licenses/LICENSE-2.0
-#
+#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -43,9 +43,9 @@ module Avro
end
def byte!
- @reader.read(1).unpack('C').first
+ @reader.readbyte
end
-
+
def read_null
# null is written as zero byte's
nil
@@ -76,7 +76,7 @@ module Avro
# The float is converted into a 32-bit integer using a method
# equivalent to Java's floatToIntBits and then encoded in
# little-endian format.
- @reader.read(4).unpack('e')[0]
+ read_and_unpack(4, 'e'.freeze)
end
def read_double
@@ -84,7 +84,7 @@ module Avro
# The double is converted into a 64-bit integer using a method
# equivalent to Java's doubleToLongBits and then encoded in
# little-endian format.
- @reader.read(8).unpack('E')[0]
+ read_and_unpack(8, 'E'.freeze)
end
def read_bytes
@@ -97,7 +97,7 @@ module Avro
# A string is encoded as a long followed by that many bytes of
# UTF-8 encoded character data.
read_bytes.tap do |string|
- string.force_encoding("UTF-8") if string.respond_to? :force_encoding
+ string.force_encoding('UTF-8'.freeze) if string.respond_to? :force_encoding
end
end
@@ -144,6 +144,23 @@ module Avro
def skip(n)
reader.seek(reader.tell() + n)
end
+
+ private
+
+ # Optimize unpacking strings when `unpack1` is available (ruby >= 2.4)
+ if String.instance_methods.include?(:unpack1)
+
+ def read_and_unpack(byte_count, format)
+ @reader.read(byte_count).unpack1(format)
+ end
+
+ else
+
+ def read_and_unpack(byte_count, format)
+ @reader.read(byte_count).unpack(format)[0]
+ end
+
+ end
end
# Write leaf values
@@ -159,7 +176,7 @@ module Avro
nil
end
- # a boolean is written as a single byte
+ # a boolean is written as a single byte
# whose value is either 0 (false) or 1 (true).
def write_boolean(datum)
on_disk = datum ? 1.chr : 0.chr
@@ -175,7 +192,6 @@ module Avro
# int and long values are written using variable-length,
# zig-zag coding.
def write_long(n)
- foo = n
n = (n << 1) ^ (n >> 63)
while (n & ~0x7F) != 0
@writer.write(((n & 0x7f) | 0x80).chr)
@@ -189,7 +205,7 @@ module Avro
# equivalent to Java's floatToIntBits and then encoded in
# little-endian format.
def write_float(datum)
- @writer.write([datum].pack('e'))
+ @writer.write([datum].pack('e'.freeze))
end
# A double is written as 8 bytes.
@@ -197,7 +213,7 @@ module Avro
# equivalent to Java's doubleToLongBits and then encoded in
# little-endian format.
def write_double(datum)
- @writer.write([datum].pack('E'))
+ @writer.write([datum].pack('E'.freeze))
end
# Bytes are encoded as a long followed by that many bytes of data.
@@ -209,7 +225,7 @@ module Avro
# A string is encoded as a long followed by that many bytes of
# UTF-8 encoded character data
def write_string(datum)
- # FIXME utf-8 encode this in 1.9
+ datum = datum.encode('utf-8'.freeze) if datum.respond_to? :encode
write_bytes(datum)
end
@@ -221,46 +237,7 @@ module Avro
class DatumReader
def self.match_schemas(writers_schema, readers_schema)
- w_type = writers_schema.type_sym
- r_type = readers_schema.type_sym
-
- # This conditional is begging for some OO love.
- if w_type == :union || r_type == :union
- return true
- end
-
- if w_type == r_type
- return true if Schema::PRIMITIVE_TYPES_SYM.include?(r_type)
-
- case r_type
- when :record
- return writers_schema.fullname == readers_schema.fullname
- when :error
- return writers_schema.fullname == readers_schema.fullname
- when :request
- return true
- when :fixed
- return writers_schema.fullname == readers_schema.fullname &&
- writers_schema.size == readers_schema.size
- when :enum
- return writers_schema.fullname == readers_schema.fullname
- when :map
- return writers_schema.values.type == readers_schema.values.type
- when :array
- return writers_schema.items.type == readers_schema.items.type
- end
- end
-
- # Handle schema promotion
- if w_type == :int && [:long, :float, :double].include?(r_type)
- return true
- elsif w_type == :long && [:float, :double].include?(r_type)
- return true
- elsif w_type == :float && r_type == :double
- return true
- end
-
- return false
+ Avro::SchemaCompatibility.match_schemas(writers_schema, readers_schema)
end
attr_accessor :writers_schema, :readers_schema
@@ -293,7 +270,7 @@ module Avro
# function dispatch for reading data based on type of writer's
# schema
- case writers_schema.type_sym
+ datum = case writers_schema.type_sym
when :null; decoder.read_null
when :boolean; decoder.read_boolean
when :string; decoder.read_string
@@ -311,6 +288,8 @@ module Avro
else
raise AvroError, "Cannot read unknown schema type: #{writers_schema.type}"
end
+
+ readers_schema.type_adapter.decode(datum)
end
def read_fixed(writers_schema, readers_schema, decoder)
@@ -336,7 +315,7 @@ module Avro
while block_count != 0
if block_count < 0
block_count = -block_count
- block_size = decoder.read_long
+ _block_size = decoder.read_long
end
block_count.times do
read_items << read_data(writers_schema.items,
@@ -355,7 +334,7 @@ module Avro
while block_count != 0
if block_count < 0
block_count = -block_count
- block_size = decoder.read_long
+ _block_size = decoder.read_long
end
block_count.times do
key = decoder.read_string
@@ -393,11 +372,11 @@ module Avro
writers_fields_hash = writers_schema.fields_hash
readers_fields_hash.each do |field_name, field|
unless writers_fields_hash.has_key? field_name
- if !field.default.nil?
+ if field.default?
field_val = read_default_value(field.type, field.default)
read_record[field.name] = field_val
else
- # FIXME(jmhodges) another 'unset' here
+ raise AvroError, "Missing data for #{field.type} with no default"
end
end
end
@@ -520,7 +499,7 @@ module Avro
if block_count < 0
decoder.skip(decoder.read_long)
else
- block_count.times &blk
+ block_count.times(&blk)
end
block_count = decoder.read_long
end
@@ -538,8 +517,10 @@ module Avro
write_data(writers_schema, datum, encoder)
end
- def write_data(writers_schema, datum, encoder)
- unless Schema.validate(writers_schema, datum)
+ def write_data(writers_schema, logical_datum, encoder)
+ datum = writers_schema.type_adapter.encode(logical_datum)
+
+ unless Schema.validate(writers_schema, datum, { recursive: false, encoded: true })
raise AvroTypeError.new(writers_schema, datum)
end
@@ -574,6 +555,7 @@ module Avro
end
def write_array(writers_schema, datum, encoder)
+ raise AvroTypeError.new(writers_schema, datum) unless datum.is_a?(Array)
if datum.size > 0
encoder.write_long(datum.size)
datum.each do |item|
@@ -584,6 +566,7 @@ module Avro
end
def write_map(writers_schema, datum, encoder)
+ raise AvroTypeError.new(writers_schema, datum) unless datum.is_a?(Hash)
if datum.size > 0
encoder.write_long(datum.size)
datum.each do |k,v|
@@ -606,6 +589,7 @@ module Avro
end
def write_record(writers_schema, datum, encoder)
+ raise AvroTypeError.new(writers_schema, datum) unless datum.is_a?(Hash)
writers_schema.fields.each do |field|
write_data(field.type, datum[field.name], encoder)
end
diff --git a/lang/ruby/lib/avro/ipc.rb b/lang/ruby/lib/avro/ipc.rb
index 1ac81294..f0816e83 100644
--- a/lang/ruby/lib/avro/ipc.rb
+++ b/lang/ruby/lib/avro/ipc.rb
@@ -5,9 +5,9 @@
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
-#
+#
# http://www.apache.org/licenses/LICENSE-2.0
-#
+#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -74,10 +74,10 @@ module Avro::IPC
class ConnectionClosedException < Avro::AvroError; end
+ # Base class for the client side of a protocol interaction.
class Requestor
- """Base class for the client side of a protocol interaction."""
- attr_reader :local_protocol, :transport
- attr_accessor :remote_protocol, :remote_hash, :send_protocol
+ attr_reader :local_protocol, :transport, :remote_protocol, :remote_hash
+ attr_accessor :send_protocol
def initialize(local_protocol, transport)
@local_protocol = local_protocol
@@ -100,7 +100,7 @@ module Avro::IPC
def request(message_name, request_datum)
# Writes a request message and reads a response or error message.
# build handshake and call request
- buffer_writer = StringIO.new('', 'w+')
+ buffer_writer = StringIO.new(''.force_encoding('BINARY'))
buffer_encoder = Avro::IO::BinaryEncoder.new(buffer_writer)
write_handshake_request(buffer_encoder)
write_call_request(message_name, request_datum, buffer_encoder)
@@ -193,9 +193,9 @@ module Avro::IPC
# * a one-byte error flag boolean, followed by either:
# * if the error flag is false,
# the message response, serialized per the message's response schema.
- # * if the error flag is true,
+ # * if the error flag is true,
# the error, serialized per the message's error union schema.
- response_metadata = META_READER.read(decoder)
+ _response_metadata = META_READER.read(decoder)
# remote response schema
remote_message_schema = remote_protocol.messages[message_name]
@@ -244,7 +244,7 @@ module Avro::IPC
# a response or error. Compare to 'handle()' in Thrift.
def respond(call_request, transport=nil)
buffer_decoder = Avro::IO::BinaryDecoder.new(StringIO.new(call_request))
- buffer_writer = StringIO.new('', 'w+')
+ buffer_writer = StringIO.new(''.force_encoding('BINARY'))
buffer_encoder = Avro::IO::BinaryEncoder.new(buffer_writer)
error = nil
response_metadata = {}
@@ -257,7 +257,7 @@ module Avro::IPC
end
# read request using remote protocol
- request_metadata = META_READER.read(buffer_decoder)
+ _request_metadata = META_READER.read(buffer_decoder)
remote_message_name = buffer_decoder.read_string
# get remote and local request schemas so we can do
@@ -294,6 +294,7 @@ module Avro::IPC
end
rescue Avro::AvroError => e
error = AvroRemoteException.new(e.to_s)
+ # TODO does the stuff written here ever get used?
buffer_encoder = Avro::IO::BinaryEncoder.new(StringIO.new)
META_WRITER.write(response_metadata, buffer_encoder)
buffer_encoder.write_boolean(true)
@@ -393,7 +394,7 @@ module Avro::IPC
def read_framed_message
message = []
loop do
- buffer = StringIO.new
+ buffer = StringIO.new(''.force_encoding('BINARY'))
buffer_length = read_buffer_length
if buffer_length == 0
return message.join
@@ -410,7 +411,7 @@ module Avro::IPC
end
def write_framed_message(message)
- message_length = message.size
+ message_length = message.bytesize
total_bytes_sent = 0
while message_length - total_bytes_sent > 0
if message_length - total_bytes_sent > BUFFER_SIZE
@@ -426,7 +427,7 @@ module Avro::IPC
end
def write_buffer(chunk)
- buffer_length = chunk.size
+ buffer_length = chunk.bytesize
write_buffer_length(buffer_length)
total_bytes_sent = 0
while total_bytes_sent < buffer_length
@@ -467,7 +468,7 @@ module Avro::IPC
end
def write_framed_message(message)
- message_size = message.size
+ message_size = message.bytesize
total_bytes_sent = 0
while message_size - total_bytes_sent > 0
if message_size - total_bytes_sent > BUFFER_SIZE
@@ -485,7 +486,7 @@ module Avro::IPC
private
def write_buffer(chunk)
- buffer_size = chunk.size
+ buffer_size = chunk.bytesize
write_buffer_size(buffer_size)
writer << chunk
end
@@ -505,13 +506,13 @@ module Avro::IPC
def read_framed_message
message = []
loop do
- buffer = ""
+ buffer = ''.force_encoding('BINARY')
buffer_size = read_buffer_size
return message.join if buffer_size == 0
- while buffer.size < buffer_size
- chunk = reader.read(buffer_size - buffer.size)
+ while buffer.bytesize < buffer_size
+ chunk = reader.read(buffer_size - buffer.bytesize)
chunk_error?(chunk)
buffer << chunk
end
@@ -541,7 +542,7 @@ module Avro::IPC
end
def transceive(message)
- writer = FramedWriter.new(StringIO.new)
+ writer = FramedWriter.new(StringIO.new(''.force_encoding('BINARY')))
writer.write_framed_message(message)
resp = @conn.post('/', writer.to_s, {'Content-Type' => 'avro/binary'})
FramedReader.new(StringIO.new(resp.body)).read_framed_message
diff --git a/lang/ruby/lib/avro/logical_types.rb b/lang/ruby/lib/avro/logical_types.rb
new file mode 100644
index 00000000..e1b219d7
--- /dev/null
+++ b/lang/ruby/lib/avro/logical_types.rb
@@ -0,0 +1,90 @@
+# -*- coding: utf-8 -*-
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+require 'date'
+
+module Avro
+ module LogicalTypes
+ module IntDate
+ EPOCH_START = Date.new(1970, 1, 1)
+
+ def self.encode(date)
+ return date.to_i if date.is_a?(Numeric)
+
+ (date - EPOCH_START).to_i
+ end
+
+ def self.decode(int)
+ EPOCH_START + int
+ end
+ end
+
+ module TimestampMillis
+ def self.encode(value)
+ return value.to_i if value.is_a?(Numeric)
+
+ time = value.to_time
+ time.to_i * 1000 + time.usec / 1000
+ end
+
+ def self.decode(int)
+ s, ms = int / 1000, int % 1000
+ Time.at(s, ms * 1000).utc
+ end
+ end
+
+ module TimestampMicros
+ def self.encode(value)
+ return value.to_i if value.is_a?(Numeric)
+
+ time = value.to_time
+ time.to_i * 1000_000 + time.usec
+ end
+
+ def self.decode(int)
+ s, us = int / 1000_000, int % 1000_000
+ Time.at(s, us).utc
+ end
+ end
+
+ module Identity
+ def self.encode(datum)
+ datum
+ end
+
+ def self.decode(datum)
+ datum
+ end
+ end
+
+ TYPES = {
+ "int" => {
+ "date" => IntDate
+ },
+ "long" => {
+ "timestamp-millis" => TimestampMillis,
+ "timestamp-micros" => TimestampMicros
+ },
+ }.freeze
+
+ def self.type_adapter(type, logical_type)
+ return unless logical_type
+
+ TYPES.fetch(type, {}.freeze).fetch(logical_type, Identity)
+ end
+ end
+end
diff --git a/lang/ruby/lib/avro/protocol.rb b/lang/ruby/lib/avro/protocol.rb
index 6c210e19..9f509dde 100644
--- a/lang/ruby/lib/avro/protocol.rb
+++ b/lang/ruby/lib/avro/protocol.rb
@@ -20,7 +20,7 @@ module Avro
VALID_TYPE_SCHEMA_TYPES_SYM = Set.new(VALID_TYPE_SCHEMA_TYPES.map(&:to_sym))
class ProtocolParseError < Avro::AvroError; end
- attr_reader :name, :namespace, :types, :messages, :md5
+ attr_reader :name, :namespace, :types, :messages, :md5, :doc
def self.parse(protocol_string)
json_data = MultiJson.load(protocol_string)
@@ -29,13 +29,14 @@ module Avro
namespace = json_data['namespace']
types = json_data['types']
messages = json_data['messages']
- Protocol.new(name, namespace, types, messages)
+ doc = json_data['doc']
+ Protocol.new(name, namespace, types, messages, doc)
else
raise ProtocolParseError, "Not a JSON object: #{json_data}"
end
end
- def initialize(name, namespace=nil, types=nil, messages=nil)
+ def initialize(name, namespace=nil, types=nil, messages=nil, doc=nil)
# Ensure valid ctor args
if !name
raise ProtocolParseError, 'Protocols must have a non-empty name.'
@@ -55,6 +56,7 @@ module Avro
@types = parse_types(types, type_names)
@messages = parse_messages(messages, type_names)
@md5 = Digest::MD5.digest(to_s)
+ @doc = doc
end
def to_s
@@ -67,7 +69,6 @@ module Avro
private
def parse_types(types, type_names)
- type_objects = []
types.collect do |type|
# FIXME adding type.name to type_names is not defined in the
# spec. Possible bug in the python impl and the spec.
@@ -92,7 +93,8 @@ module Avro
request = body['request']
response = body['response']
errors = body['errors']
- message_objects[name] = Message.new(name, request, response, errors, names, namespace)
+ doc = body['doc']
+ message_objects[name] = Message.new(name, request, response, errors, names, namespace, doc)
end
message_objects
end
@@ -111,14 +113,15 @@ module Avro
end
class Message
- attr_reader :name, :request, :response, :errors, :default_namespace
+ attr_reader :name, :request, :response, :errors, :default_namespace, :doc
- def initialize(name, request, response, errors=nil, names=nil, default_namespace=nil)
+ def initialize(name, request, response, errors=nil, names=nil, default_namespace=nil, doc=nil)
@name = name
@default_namespace = default_namespace
@request = parse_request(request, names)
@response = parse_response(response, names)
@errors = parse_errors(errors, names) if errors
+ @doc = doc
end
def to_avro(names=Set.new)
@@ -127,6 +130,7 @@ module Avro
'response' => response.to_avro(names)
}.tap do |hash|
hash['errors'] = errors.to_avro(names) if errors
+ hash['doc'] = @doc if @doc
end
end
diff --git a/lang/ruby/lib/avro/schema.rb b/lang/ruby/lib/avro/schema.rb
index 87f6fa48..4a766fdc 100644
--- a/lang/ruby/lib/avro/schema.rb
+++ b/lang/ruby/lib/avro/schema.rb
@@ -14,6 +14,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.
+require 'avro/logical_types'
+
module Avro
class Schema
# Sets of strings, for backwards compatibility. See below for sets of symbols,
@@ -40,6 +42,7 @@ module Avro
def self.real_parse(json_obj, names=nil, default_namespace=nil)
if json_obj.is_a? Hash
type = json_obj['type']
+ logical_type = json_obj['logicalType']
raise SchemaParseError, %Q(No "type" property: #{json_obj}) if type.nil?
# Check that the type is valid before calling #to_sym, since symbols are never garbage
@@ -50,7 +53,7 @@ module Avro
type_sym = type.to_sym
if PRIMITIVE_TYPES_SYM.include?(type_sym)
- return PrimitiveSchema.new(type_sym)
+ return PrimitiveSchema.new(type_sym, logical_type)
elsif NAMED_TYPES_SYM.include? type_sym
name = json_obj['name']
@@ -58,13 +61,15 @@ module Avro
case type_sym
when :fixed
size = json_obj['size']
- return FixedSchema.new(name, namespace, size, names)
+ return FixedSchema.new(name, namespace, size, names, logical_type)
when :enum
symbols = json_obj['symbols']
- return EnumSchema.new(name, namespace, symbols, names)
+ doc = json_obj['doc']
+ return EnumSchema.new(name, namespace, symbols, names, doc)
when :record, :error
fields = json_obj['fields']
- return RecordSchema.new(name, namespace, fields, names, type_sym)
+ doc = json_obj['doc']
+ return RecordSchema.new(name, namespace, fields, names, type_sym, doc)
else
raise SchemaParseError.new("Unknown named type: #{type}")
end
@@ -91,52 +96,29 @@ module Avro
end
# Determine if a ruby datum is an instance of a schema
- def self.validate(expected_schema, datum)
- case expected_schema.type_sym
- when :null
- datum.nil?
- when :boolean
- datum == true || datum == false
- when :string, :bytes
- datum.is_a? String
- when :int
- (datum.is_a?(Fixnum) || datum.is_a?(Bignum)) &&
- (INT_MIN_VALUE <= datum) && (datum <= INT_MAX_VALUE)
- when :long
- (datum.is_a?(Fixnum) || datum.is_a?(Bignum)) &&
- (LONG_MIN_VALUE <= datum) && (datum <= LONG_MAX_VALUE)
- when :float, :double
- datum.is_a?(Float) || datum.is_a?(Fixnum) || datum.is_a?(Bignum)
- when :fixed
- datum.is_a?(String) && datum.size == expected_schema.size
- when :enum
- expected_schema.symbols.include? datum
- when :array
- datum.is_a?(Array) &&
- datum.all?{|d| validate(expected_schema.items, d) }
- when :map
- datum.keys.all?{|k| k.is_a? String } &&
- datum.values.all?{|v| validate(expected_schema.values, v) }
- when :union
- expected_schema.schemas.any?{|s| validate(s, datum) }
- when :record, :error, :request
- datum.is_a?(Hash) &&
- expected_schema.fields.all?{|f| validate(f.type, datum[f.name]) }
- else
- raise "you suck #{expected_schema.inspect} is not allowed."
- end
+ def self.validate(expected_schema, logical_datum, options = { recursive: true, encoded: false })
+ SchemaValidator.validate!(expected_schema, logical_datum, options)
+ true
+ rescue SchemaValidator::ValidationError
+ false
end
- def initialize(type)
+ def initialize(type, logical_type=nil)
@type_sym = type.is_a?(Symbol) ? type : type.to_sym
+ @logical_type = logical_type
end
attr_reader :type_sym
+ attr_reader :logical_type
# Returns the type as a string (rather than a symbol), for backwards compatibility.
# Deprecated in favor of {#type_sym}.
def type; @type_sym.to_s; end
+ def type_adapter
+ @type_adapter ||= LogicalTypes.type_adapter(type, logical_type) || LogicalTypes::Identity
+ end
+
# Returns the MD5 fingerprint of the schema as an Integer.
def md5_fingerprint
parsing_form = SchemaNormalization.to_parsing_form(self)
@@ -149,6 +131,18 @@ module Avro
Digest::SHA256.hexdigest(parsing_form).to_i(16)
end
+ def read?(writers_schema)
+ SchemaCompatibility.can_read?(writers_schema, self)
+ end
+
+ def be_read?(other_schema)
+ other_schema.read?(self)
+ end
+
+ def mutual_read?(other_schema)
+ SchemaCompatibility.mutual_read?(other_schema, self)
+ end
+
def ==(other, seen=nil)
other.is_a?(Schema) && type_sym == other.type_sym
end
@@ -172,7 +166,9 @@ module Avro
end
def to_avro(names=nil)
- {'type' => type}
+ props = {'type' => type}
+ props['logicalType'] = logical_type if logical_type
+ props
end
def to_s
@@ -181,9 +177,11 @@ module Avro
class NamedSchema < Schema
attr_reader :name, :namespace
- def initialize(type, name, namespace=nil, names=nil)
- super(type)
+
+ def initialize(type, name, namespace=nil, names=nil, doc=nil, logical_type=nil)
+ super(type, logical_type)
@name, @namespace = Name.extract_namespace(name, namespace)
+ @doc = doc
names = Name.add_name(names, self)
end
@@ -194,6 +192,7 @@ module Avro
end
props = {'name' => @name}
props.merge!('namespace' => @namespace) if @namespace
+ props.merge!('doc' => @doc) if @doc
super.merge props
end
@@ -203,7 +202,7 @@ module Avro
end
class RecordSchema < NamedSchema
- attr_reader :fields
+ attr_reader :fields, :doc
def self.make_field_objects(field_data, names, namespace=nil)
field_objects, field_names = [], Set.new
@@ -211,9 +210,10 @@ module Avro
if field.respond_to?(:[]) # TODO(jmhodges) wtffffff
type = field['type']
name = field['name']
- default = field['default']
+ default = field.key?('default') ? field['default'] : :no_default
order = field['order']
- new_field = Field.new(type, name, default, order, names, namespace)
+ doc = field['doc']
+ new_field = Field.new(type, name, default, order, names, namespace, doc)
# make sure field name has not been used yet
if field_names.include?(new_field.name)
raise SchemaParseError, "Field name #{new_field.name.inspect} is already in use"
@@ -227,14 +227,20 @@ module Avro
field_objects
end
- def initialize(name, namespace, fields, names=nil, schema_type=:record)
+ def initialize(name, namespace, fields, names=nil, schema_type=:record, doc=nil)
if schema_type == :request || schema_type == 'request'
@type_sym = schema_type.to_sym
@namespace = namespace
+ @name = nil
+ @doc = nil
else
- super(schema_type, name, namespace, names)
+ super(schema_type, name, namespace, names, doc)
end
- @fields = RecordSchema.make_field_objects(fields, names, self.namespace)
+ @fields = if fields
+ RecordSchema.make_field_objects(fields, names, self.namespace)
+ else
+ {}
+ end
end
def fields_hash
@@ -285,8 +291,7 @@ module Avro
def initialize(schemas, names=nil, default_namespace=nil)
super(:union)
- schema_objects = []
- schemas.each_with_index do |schema, i|
+ @schemas = schemas.each_with_object([]) do |schema, schema_objects|
new_schema = subparse(schema, names, default_namespace)
ns_type = new_schema.type_sym
@@ -299,7 +304,6 @@ module Avro
else
schema_objects << new_schema
end
- @schemas = schema_objects
end
end
@@ -309,13 +313,14 @@ module Avro
end
class EnumSchema < NamedSchema
- attr_reader :symbols
- def initialize(name, space, symbols, names=nil)
+ attr_reader :symbols, :doc
+
+ def initialize(name, space, symbols, names=nil, doc=nil)
if symbols.uniq.length < symbols.length
- fail_msg = 'Duplicate symbol: %s' % symbols
+ fail_msg = "Duplicate symbol: #{symbols}"
raise Avro::SchemaParseError, fail_msg
end
- super(:enum, name, space, names)
+ super(:enum, name, space, names, doc)
@symbols = symbols
end
@@ -327,11 +332,11 @@ module Avro
# Valid primitive types are in PRIMITIVE_TYPES.
class PrimitiveSchema < Schema
- def initialize(type)
+ def initialize(type, logical_type=nil)
if PRIMITIVE_TYPES_SYM.include?(type)
- super(type)
+ super(type, logical_type)
elsif PRIMITIVE_TYPES.include?(type)
- super(type.to_sym)
+ super(type.to_sym, logical_type)
else
raise AvroError.new("#{type} is not a valid primitive type.")
end
@@ -345,12 +350,12 @@ module Avro
class FixedSchema < NamedSchema
attr_reader :size
- def initialize(name, space, size, names=nil)
+ def initialize(name, space, size, names=nil, logical_type=nil)
# Ensure valid cto args
- unless size.is_a?(Fixnum) || size.is_a?(Bignum)
+ unless size.is_a?(Integer)
raise AvroError, 'Fixed Schema requires a valid integer for size property.'
end
- super(:fixed, name, space, names)
+ super(:fixed, name, space, names, nil, logical_type)
@size = size
end
@@ -361,21 +366,42 @@ module Avro
end
class Field < Schema
- attr_reader :type, :name, :default, :order
+ attr_reader :type, :name, :default, :order, :doc
- def initialize(type, name, default=nil, order=nil, names=nil, namespace=nil)
+ def initialize(type, name, default=:no_default, order=nil, names=nil, namespace=nil, doc=nil)
@type = subparse(type, names, namespace)
@name = name
@default = default
@order = order
+ @doc = doc
+ validate_default! if default? && !Avro.disable_field_default_validation
+ end
+
+ def default?
+ @default != :no_default
end
def to_avro(names=Set.new)
{'name' => name, 'type' => type.to_avro(names)}.tap do |avro|
- avro['default'] = default if default
+ avro['default'] = default if default?
avro['order'] = order if order
+ avro['doc'] = doc if doc
end
end
+
+ private
+
+ def validate_default!
+ type_for_default = if type.type_sym == :union
+ type.schemas.first
+ else
+ type
+ end
+
+ Avro::SchemaValidator.validate!(type_for_default, default)
+ rescue Avro::SchemaValidator::ValidationError => e
+ raise Avro::SchemaParseError, "Error validating default for #{name}: #{e.message}"
+ end
end
end
diff --git a/lang/ruby/lib/avro/schema_compatibility.rb b/lang/ruby/lib/avro/schema_compatibility.rb
new file mode 100644
index 00000000..37401fc0
--- /dev/null
+++ b/lang/ruby/lib/avro/schema_compatibility.rb
@@ -0,0 +1,170 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+module Avro
+ module SchemaCompatibility
+ # Perform a full, recursive check that a datum written using the writers_schema
+ # can be read using the readers_schema.
+ def self.can_read?(writers_schema, readers_schema)
+ Checker.new.can_read?(writers_schema, readers_schema)
+ end
+
+ # Perform a full, recursive check that a datum written using either the
+ # writers_schema or the readers_schema can be read using the other schema.
+ def self.mutual_read?(writers_schema, readers_schema)
+ Checker.new.mutual_read?(writers_schema, readers_schema)
+ end
+
+ # Perform a basic check that a datum written with the writers_schema could
+ # be read using the readers_schema. This check only includes matching the types,
+ # including schema promotion, and matching the full name for named types.
+ # Aliases for named types are not supported here, and the ruby implementation
+ # of Avro in general does not include support for aliases.
+ def self.match_schemas(writers_schema, readers_schema)
+ w_type = writers_schema.type_sym
+ r_type = readers_schema.type_sym
+
+ # This conditional is begging for some OO love.
+ if w_type == :union || r_type == :union
+ return true
+ end
+
+ if w_type == r_type
+ return true if Schema::PRIMITIVE_TYPES_SYM.include?(r_type)
+
+ case r_type
+ when :record
+ return writers_schema.fullname == readers_schema.fullname
+ when :error
+ return writers_schema.fullname == readers_schema.fullname
+ when :request
+ return true
+ when :fixed
+ return writers_schema.fullname == readers_schema.fullname &&
+ writers_schema.size == readers_schema.size
+ when :enum
+ return writers_schema.fullname == readers_schema.fullname
+ when :map
+ return match_schemas(writers_schema.values, readers_schema.values)
+ when :array
+ return match_schemas(writers_schema.items, readers_schema.items)
+ end
+ end
+
+ # Handle schema promotion
+ if w_type == :int && [:long, :float, :double].include?(r_type)
+ return true
+ elsif w_type == :long && [:float, :double].include?(r_type)
+ return true
+ elsif w_type == :float && r_type == :double
+ return true
+ elsif w_type == :string && r_type == :bytes
+ return true
+ elsif w_type == :bytes && r_type == :string
+ return true
+ end
+
+ return false
+ end
+
+ class Checker
+ SIMPLE_CHECKS = Schema::PRIMITIVE_TYPES_SYM.dup.add(:fixed).freeze
+
+ attr_reader :recursion_set
+ private :recursion_set
+
+ def initialize
+ @recursion_set = Set.new
+ end
+
+ def can_read?(writers_schema, readers_schema)
+ full_match_schemas(writers_schema, readers_schema)
+ end
+
+ def mutual_read?(writers_schema, readers_schema)
+ can_read?(writers_schema, readers_schema) && can_read?(readers_schema, writers_schema)
+ end
+
+ private
+
+ def full_match_schemas(writers_schema, readers_schema)
+ return true if recursion_in_progress?(writers_schema, readers_schema)
+
+ return false unless Avro::SchemaCompatibility.match_schemas(writers_schema, readers_schema)
+
+ if writers_schema.type_sym != :union && SIMPLE_CHECKS.include?(readers_schema.type_sym)
+ return true
+ end
+
+ case readers_schema.type_sym
+ when :record
+ match_record_schemas(writers_schema, readers_schema)
+ when :map
+ full_match_schemas(writers_schema.values, readers_schema.values)
+ when :array
+ full_match_schemas(writers_schema.items, readers_schema.items)
+ when :union
+ match_union_schemas(writers_schema, readers_schema)
+ when :enum
+ # reader's symbols must contain all writer's symbols
+ (writers_schema.symbols - readers_schema.symbols).empty?
+ else
+ if writers_schema.type_sym == :union && writers_schema.schemas.size == 1
+ full_match_schemas(writers_schema.schemas.first, readers_schema)
+ else
+ false
+ end
+ end
+ end
+
+ def match_union_schemas(writers_schema, readers_schema)
+ raise 'readers_schema must be a union' unless readers_schema.type_sym == :union
+
+ case writers_schema.type_sym
+ when :union
+ writers_schema.schemas.all? { |writer_type| full_match_schemas(writer_type, readers_schema) }
+ else
+ readers_schema.schemas.any? { |reader_type| full_match_schemas(writers_schema, reader_type) }
+ end
+ end
+
+ def match_record_schemas(writers_schema, readers_schema)
+ return false if writers_schema.type_sym == :union
+
+ writer_fields_hash = writers_schema.fields_hash
+ readers_schema.fields.each do |field|
+ if writer_fields_hash.key?(field.name)
+ return false unless full_match_schemas(writer_fields_hash[field.name].type, field.type)
+ else
+ return false unless field.default?
+ end
+ end
+
+ return true
+ end
+
+ def recursion_in_progress?(writers_schema, readers_schema)
+ key = [writers_schema.object_id, readers_schema.object_id]
+
+ if recursion_set.include?(key)
+ true
+ else
+ recursion_set.add(key)
+ false
+ end
+ end
+ end
+ end
+end
diff --git a/lang/ruby/lib/avro/schema_normalization.rb b/lang/ruby/lib/avro/schema_normalization.rb
index 0a5bee5d..1506fcac 100644
--- a/lang/ruby/lib/avro/schema_normalization.rb
+++ b/lang/ruby/lib/avro/schema_normalization.rb
@@ -25,7 +25,7 @@ module Avro
end
def to_parsing_form(schema)
- JSON.dump(normalize_schema(schema))
+ MultiJson.dump(normalize_schema(schema))
end
private
diff --git a/lang/ruby/lib/avro/schema_validator.rb b/lang/ruby/lib/avro/schema_validator.rb
new file mode 100644
index 00000000..f9274f06
--- /dev/null
+++ b/lang/ruby/lib/avro/schema_validator.rb
@@ -0,0 +1,242 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+module Avro
+ class SchemaValidator
+ ROOT_IDENTIFIER = '.'.freeze
+ PATH_SEPARATOR = '.'.freeze
+ INT_RANGE = Schema::INT_MIN_VALUE..Schema::INT_MAX_VALUE
+ LONG_RANGE = Schema::LONG_MIN_VALUE..Schema::LONG_MAX_VALUE
+ COMPLEX_TYPES = [:array, :error, :map, :record, :request].freeze
+ BOOLEAN_VALUES = [true, false].freeze
+
+ class Result
+ attr_reader :errors
+
+ def initialize
+ @errors = []
+ end
+
+ def <<(error)
+ @errors << error
+ end
+
+ def add_error(path, message)
+ self << "at #{path} #{message}"
+ end
+
+ def failure?
+ @errors.any?
+ end
+
+ def to_s
+ errors.join("\n")
+ end
+ end
+
+ class ValidationError < StandardError
+ attr_reader :result
+
+ def initialize(result = Result.new)
+ @result = result
+ super
+ end
+
+ def to_s
+ result.to_s
+ end
+ end
+
+ TypeMismatchError = Class.new(ValidationError)
+
+ class << self
+ def validate!(expected_schema, logical_datum, options = { recursive: true, encoded: false, fail_on_extra_fields: false })
+ options ||= {}
+ options[:recursive] = true unless options.key?(:recursive)
+
+ result = Result.new
+ if options[:recursive]
+ validate_recursive(expected_schema, logical_datum, ROOT_IDENTIFIER, result, options)
+ else
+ validate_simple(expected_schema, logical_datum, ROOT_IDENTIFIER, result, options)
+ end
+ fail ValidationError, result if result.failure?
+ result
+ end
+
+ private
+
+ def validate_recursive(expected_schema, logical_datum, path, result, options = {})
+ datum = resolve_datum(expected_schema, logical_datum, options[:encoded])
+
+ validate_simple(expected_schema, datum, path, result, encoded: true)
+
+ case expected_schema.type_sym
+ when :array
+ validate_array(expected_schema, datum, path, result, options)
+ when :map
+ validate_map(expected_schema, datum, path, result, options)
+ when :union
+ validate_union(expected_schema, datum, path, result, options)
+ when :record, :error, :request
+ fail TypeMismatchError unless datum.is_a?(Hash)
+ expected_schema.fields.each do |field|
+ deeper_path = deeper_path_for_hash(field.name, path)
+ validate_recursive(field.type, datum[field.name], deeper_path, result, options)
+ end
+ if options[:fail_on_extra_fields]
+ datum_fields = datum.keys.map(&:to_s)
+ schema_fields = expected_schema.fields.map(&:name)
+ (datum_fields - schema_fields).each do |extra_field|
+ result.add_error(path, "extra field '#{extra_field}' - not in schema")
+ end
+ end
+ end
+ rescue TypeMismatchError
+ result.add_error(path, "expected type #{expected_schema.type_sym}, got #{actual_value_message(datum)}")
+ end
+
+ def validate_simple(expected_schema, logical_datum, path, result, options = {})
+ datum = resolve_datum(expected_schema, logical_datum, options[:encoded])
+ validate_type(expected_schema)
+
+ case expected_schema.type_sym
+ when :null
+ fail TypeMismatchError unless datum.nil?
+ when :boolean
+ fail TypeMismatchError unless BOOLEAN_VALUES.include?(datum)
+ when :string, :bytes
+ fail TypeMismatchError unless datum.is_a?(String)
+ when :int
+ fail TypeMismatchError unless datum.is_a?(Integer)
+ result.add_error(path, "out of bound value #{datum}") unless INT_RANGE.cover?(datum)
+ when :long
+ fail TypeMismatchError unless datum.is_a?(Integer)
+ result.add_error(path, "out of bound value #{datum}") unless LONG_RANGE.cover?(datum)
+ when :float, :double
+ fail TypeMismatchError unless datum.is_a?(Float) || datum.is_a?(Integer)
+ when :fixed
+ if datum.is_a? String
+ result.add_error(path, fixed_string_message(expected_schema.size, datum)) unless datum.bytesize == expected_schema.size
+ else
+ result.add_error(path, "expected fixed with size #{expected_schema.size}, got #{actual_value_message(datum)}")
+ end
+ when :enum
+ result.add_error(path, enum_message(expected_schema.symbols, datum)) unless expected_schema.symbols.include?(datum)
+ end
+ rescue TypeMismatchError
+ result.add_error(path, "expected type #{expected_schema.type_sym}, got #{actual_value_message(datum)}")
+ end
+
+ def resolve_datum(expected_schema, logical_datum, encoded)
+ if encoded
+ logical_datum
+ else
+ expected_schema.type_adapter.encode(logical_datum) rescue nil
+ end
+ end
+
+ def validate_type(expected_schema)
+ unless Avro::Schema::VALID_TYPES_SYM.include?(expected_schema.type_sym)
+ fail "Unexpected schema type #{expected_schema.type_sym} #{expected_schema.inspect}"
+ end
+ end
+
+ def fixed_string_message(size, datum)
+ "expected fixed with size #{size}, got \"#{datum}\" with size #{datum.bytesize}"
+ end
+
+ def enum_message(symbols, datum)
+ "expected enum with values #{symbols}, got #{actual_value_message(datum)}"
+ end
+
+ def validate_array(expected_schema, datum, path, result, options = {})
+ fail TypeMismatchError unless datum.is_a?(Array)
+ datum.each_with_index do |d, i|
+ validate_recursive(expected_schema.items, d, path + "[#{i}]", result, options)
+ end
+ end
+
+ def validate_map(expected_schema, datum, path, result, options = {})
+ fail TypeMismatchError unless datum.is_a?(Hash)
+ datum.keys.each do |k|
+ result.add_error(path, "unexpected key type '#{ruby_to_avro_type(k.class)}' in map") unless k.is_a?(String)
+ end
+ datum.each do |k, v|
+ deeper_path = deeper_path_for_hash(k, path)
+ validate_recursive(expected_schema.values, v, deeper_path, result, options)
+ end
+ end
+
+ def validate_union(expected_schema, datum, path, result, options = {})
+ if expected_schema.schemas.size == 1
+ validate_recursive(expected_schema.schemas.first, datum, path, result, options)
+ return
+ end
+ failures = []
+ compatible_type = first_compatible_type(datum, expected_schema, path, failures, options)
+ return unless compatible_type.nil?
+
+ complex_type_failed = failures.detect { |r| COMPLEX_TYPES.include?(r[:type]) }
+ if complex_type_failed
+ complex_type_failed[:result].errors.each { |error| result << error }
+ else
+ types = expected_schema.schemas.map { |s| "'#{s.type_sym}'" }.join(', ')
+ result.add_error(path, "expected union of [#{types}], got #{actual_value_message(datum)}")
+ end
+ end
+
+ def first_compatible_type(datum, expected_schema, path, failures, options = {})
+ expected_schema.schemas.find do |schema|
+ result = Result.new
+ validate_recursive(schema, datum, path, result, options)
+ failures << { type: schema.type_sym, result: result } if result.failure?
+ !result.failure?
+ end
+ end
+
+ def deeper_path_for_hash(sub_key, path)
+ "#{path}#{PATH_SEPARATOR}#{sub_key}".squeeze(PATH_SEPARATOR)
+ end
+
+ def actual_value_message(value)
+ avro_type = if value.is_a?(Integer)
+ ruby_integer_to_avro_type(value)
+ else
+ ruby_to_avro_type(value.class)
+ end
+ if value.nil?
+ avro_type
+ else
+ "#{avro_type} with value #{value.inspect}"
+ end
+ end
+
+ def ruby_to_avro_type(ruby_class)
+ {
+ NilClass => 'null',
+ String => 'string',
+ Float => 'float',
+ Hash => 'record'
+ }.fetch(ruby_class, ruby_class)
+ end
+
+ def ruby_integer_to_avro_type(value)
+ INT_RANGE.cover?(value) ? 'int' : 'long'
+ end
+ end
+ end
+end
diff --git a/lang/ruby/test/case_finder.rb b/lang/ruby/test/case_finder.rb
index de2d9c49..a666f338 100644
--- a/lang/ruby/test/case_finder.rb
+++ b/lang/ruby/test/case_finder.rb
@@ -16,6 +16,8 @@
# specific language governing permissions and limitations
# under the License.
#
+require 'strscan'
+
class CaseFinder
PATH = File.expand_path("../../../../share/test/data/schema-tests.txt", __FILE__)
diff --git a/lang/ruby/test/random_data.rb b/lang/ruby/test/random_data.rb
index 9d276f7d..54fa8781 100644
--- a/lang/ruby/test/random_data.rb
+++ b/lang/ruby/test/random_data.rb
@@ -27,15 +27,17 @@ class RandomData
end
def nextdata(schm, d=0)
+ return logical_nextdata(schm, d=0) unless schm.type_adapter.eql?(Avro::LogicalTypes::Identity)
+
case schm.type_sym
when :boolean
rand > 0.5
when :string
randstr()
when :int
- rand(Avro::Schema::INT_MAX_VALUE - Avro::Schema::INT_MIN_VALUE) + Avro::Schema::INT_MIN_VALUE
+ rand_int
when :long
- rand(Avro::Schema::LONG_MAX_VALUE - Avro::Schema::LONG_MIN_VALUE) + Avro::Schema::LONG_MIN_VALUE
+ rand_long
when :float
(-1024 + 2048 * rand).round.to_f
when :double
@@ -79,6 +81,15 @@ class RandomData
end
end
+ def logical_nextdata(schm, _d=0)
+ case schm.logical_type
+ when 'date'
+ Avro::LogicalTypes::IntDate.decode(rand_int)
+ when 'timestamp-millis', 'timestamp-micros'
+ Avro::LogicalTypes::TimestampMicros.decode(rand_long)
+ end
+ end
+
CHARPOOL = 'abcdefghjkmnpqrstuvwxyzABCDEFGHJKLMNPQRSTUVWXYZ23456789'
BYTEPOOL = '12345abcd'
@@ -87,4 +98,12 @@ class RandomData
rand(length+1).times { str << chars[rand(chars.size)] }
str
end
+
+ def rand_int
+ rand(Avro::Schema::INT_MAX_VALUE - Avro::Schema::INT_MIN_VALUE) + Avro::Schema::INT_MIN_VALUE
+ end
+
+ def rand_long
+ rand(Avro::Schema::LONG_MAX_VALUE - Avro::Schema::LONG_MIN_VALUE) + Avro::Schema::LONG_MIN_VALUE
+ end
end
diff --git a/lang/ruby/test/test_datafile.rb b/lang/ruby/test/test_datafile.rb
index 8fe05d70..0f13df35 100644
--- a/lang/ruby/test/test_datafile.rb
+++ b/lang/ruby/test/test_datafile.rb
@@ -20,13 +20,13 @@ require 'test_help'
class TestDataFile < Test::Unit::TestCase
HERE = File.expand_path File.dirname(__FILE__)
def setup
- if File.exists?(HERE + '/data.avr')
+ if File.exist?(HERE + '/data.avr')
File.unlink(HERE + '/data.avr')
end
end
def teardown
- if File.exists?(HERE + '/data.avr')
+ if File.exist?(HERE + '/data.avr')
File.unlink(HERE + '/data.avr')
end
end
@@ -38,7 +38,7 @@ class TestDataFile < Test::Unit::TestCase
"fields" : [
{"name": "username", "type": "string"},
{"name": "age", "type": "int"},
- {"name": "verified", "type": "boolean", "default": "false"}
+ {"name": "verified", "type": "boolean", "default": false}
]}
JSON
diff --git a/lang/ruby/test/test_io.rb b/lang/ruby/test/test_io.rb
index f8f3da13..350d48bc 100644
--- a/lang/ruby/test/test_io.rb
+++ b/lang/ruby/test/test_io.rb
@@ -84,6 +84,17 @@ EOS
check_default(record_schema, '{"f": 11}', {"f" => 11})
end
+ def test_record_with_logical_type
+ record_schema = <<EOS
+ {"type": "record",
+ "name": "Test",
+ "fields": [{"name": "ts",
+ "type": {"type": "long",
+ "logicalType": "timestamp-micros"}}]}
+EOS
+ check(record_schema)
+ end
+
def test_error
error_schema = <<EOS
{"type": "error",
@@ -115,6 +126,7 @@ EOS
def test_union
union_schema = <<EOS
["string",
+ {"type": "int", "logicalType": "date"},
"null",
"long",
{"type": "record",
@@ -146,10 +158,31 @@ EOS
check_default(fixed_schema, '"a"', "a")
end
+ def test_record_with_nil
+ schema = Avro::Schema.parse('{"type":"record", "name":"rec", "fields":[{"type":"int", "name":"i"}]}')
+ assert_raise(Avro::IO::AvroTypeError) do
+ write_datum(nil, schema)
+ end
+ end
+
+ def test_array_with_nil
+ schema = Avro::Schema.parse('{"type":"array", "items":"int"}')
+ assert_raise(Avro::IO::AvroTypeError) do
+ write_datum(nil, schema)
+ end
+ end
+
+ def test_map_with_nil
+ schema = Avro::Schema.parse('{"type":"map", "values":"long"}')
+ assert_raise(Avro::IO::AvroTypeError) do
+ write_datum(nil, schema)
+ end
+ end
+
def test_enum_with_duplicate
str = '{"type": "enum", "name": "Test","symbols" : ["AA", "AA"]}'
- assert_raises(Avro::SchemaParseError) do
- schema = Avro::Schema.parse str
+ assert_raises(Avro::SchemaParseError.new('Duplicate symbol: ["AA", "AA"]')) do
+ Avro::Schema.parse str
end
end
@@ -210,8 +243,53 @@ EOS
end
end
+ def test_utf8_string_encoding
+ [
+ "\xC3".force_encoding('ISO-8859-1'),
+ "\xC3\x83".force_encoding('UTF-8')
+ ].each do |value|
+ output = ''.force_encoding('BINARY')
+ encoder = Avro::IO::BinaryEncoder.new(StringIO.new(output))
+ datum_writer = Avro::IO::DatumWriter.new(Avro::Schema.parse('"string"'))
+ datum_writer.write(value, encoder)
+
+ assert_equal "\x04\xc3\x83".force_encoding('BINARY'), output
+ end
+ end
+
+ def test_bytes_encoding
+ [
+ "\xC3\x83".force_encoding('BINARY'),
+ "\xC3\x83".force_encoding('ISO-8859-1'),
+ "\xC3\x83".force_encoding('UTF-8')
+ ].each do |value|
+ output = ''.force_encoding('BINARY')
+ encoder = Avro::IO::BinaryEncoder.new(StringIO.new(output))
+ datum_writer = Avro::IO::DatumWriter.new(Avro::Schema.parse('"bytes"'))
+ datum_writer.write(value, encoder)
+
+ assert_equal "\x04\xc3\x83".force_encoding('BINARY'), output
+ end
+ end
+
+ def test_fixed_encoding
+ [
+ "\xC3\x83".force_encoding('BINARY'),
+ "\xC3\x83".force_encoding('ISO-8859-1'),
+ "\xC3\x83".force_encoding('UTF-8')
+ ].each do |value|
+ output = ''.force_encoding('BINARY')
+ encoder = Avro::IO::BinaryEncoder.new(StringIO.new(output))
+ schema = '{"type": "fixed", "name": "TwoBytes", "size": 2}'
+ datum_writer = Avro::IO::DatumWriter.new(Avro::Schema.parse(schema))
+ datum_writer.write(value, encoder)
+
+ assert_equal "\xc3\x83".force_encoding('BINARY'), output
+ end
+ end
+
def test_skip_long
- for value_to_skip, hex_encoding in BINARY_INT_ENCODINGS
+ for value_to_skip, _hex_encoding in BINARY_INT_ENCODINGS
value_to_read = 6253
# write some data in binary to string buffer
@@ -236,7 +314,7 @@ EOS
end
def test_skip_int
- for value_to_skip, hex_encoding in BINARY_INT_ENCODINGS
+ for value_to_skip, _hex_encoding in BINARY_INT_ENCODINGS
value_to_read = 6253
writer = StringIO.new
@@ -286,7 +364,7 @@ EOS
datum_to_write = 219
for rs in promotable_schemas[(i + 1)..-1]
readers_schema = Avro::Schema.parse(rs)
- writer, enc, dw = write_datum(datum_to_write, writers_schema)
+ writer, _enc, _dw = write_datum(datum_to_write, writers_schema)
datum_read = read_datum(writer, writers_schema, readers_schema)
if datum_read != datum_to_write
incorrect += 1
@@ -295,8 +373,71 @@ EOS
assert_equal(incorrect, 0)
end
end
+
+ def test_interchangeable_schemas
+ interchangeable_schemas = ['"string"', '"bytes"']
+ incorrect = 0
+ interchangeable_schemas.each_with_index do |ws, i|
+ writers_schema = Avro::Schema.parse(ws)
+ datum_to_write = 'foo'
+ readers_schema = Avro::Schema.parse(interchangeable_schemas[i == 0 ? 1 : 0])
+ writer, * = write_datum(datum_to_write, writers_schema)
+ datum_read = read_datum(writer, writers_schema, readers_schema)
+ if datum_read != datum_to_write
+ incorrect += 1
+ end
+ end
+ assert_equal(incorrect, 0)
+ end
+
+ def test_array_schema_promotion
+ writers_schema = Avro::Schema.parse('{"type":"array", "items":"int"}')
+ readers_schema = Avro::Schema.parse('{"type":"array", "items":"long"}')
+ datum_to_write = [1, 2]
+ writer, * = write_datum(datum_to_write, writers_schema)
+ datum_read = read_datum(writer, writers_schema, readers_schema)
+ assert_equal(datum_read, datum_to_write)
+ end
+
+ def test_map_schema_promotion
+ writers_schema = Avro::Schema.parse('{"type":"map", "values":"int"}')
+ readers_schema = Avro::Schema.parse('{"type":"map", "values":"long"}')
+ datum_to_write = { 'foo' => 1, 'bar' => 2 }
+ writer, * = write_datum(datum_to_write, writers_schema)
+ datum_read = read_datum(writer, writers_schema, readers_schema)
+ assert_equal(datum_read, datum_to_write)
+ end
+
+ def test_snappy_backward_compat
+ # a snappy-compressed block payload without the checksum
+ # this has no back-references, just one literal so the last 9
+ # bytes are the uncompressed payload.
+ old_snappy_bytes = "\x09\x20\x02\x06\x02\x0a\x67\x72\x65\x65\x6e"
+ uncompressed_bytes = "\x02\x06\x02\x0a\x67\x72\x65\x65\x6e"
+ snappy = Avro::DataFile::SnappyCodec.new
+ assert_equal(uncompressed_bytes, snappy.decompress(old_snappy_bytes))
+ end
+
private
+ def check_no_default(schema_json)
+ actual_schema = '{"type": "record", "name": "Foo", "fields": []}'
+ actual = Avro::Schema.parse(actual_schema)
+
+ expected_schema = <<EOS
+ {"type": "record",
+ "name": "Foo",
+ "fields": [{"name": "f", "type": #{schema_json}}]}
+EOS
+ expected = Avro::Schema.parse(expected_schema)
+
+ reader = Avro::IO::DatumReader.new(actual, expected)
+ assert_raise Avro::AvroError do
+ value = reader.read(Avro::IO::BinaryDecoder.new(StringIO.new))
+ assert_not_equal(value, :no_default) # should never return this
+ end
+ end
+
def check_default(schema_json, default_json, default_value)
actual_schema = '{"type": "record", "name": "Foo", "fields": []}'
actual = Avro::Schema.parse(actual_schema)
@@ -336,11 +477,14 @@ EOS
# test writing of data to file
check_datafile(schema)
+
+ # check that AvroError is raised when there is no default
+ check_no_default(str)
end
def checkser(schm, randomdata)
datum = randomdata.next
- assert validate(schm, datum)
+ assert validate(schm, datum), 'datum is not valid for schema'
w = Avro::IO::DatumWriter.new(schm)
writer = StringIO.new "", "w"
w.write(datum, Avro::IO::BinaryEncoder.new(writer))
diff --git a/lang/ruby/test/test_logical_types.rb b/lang/ruby/test/test_logical_types.rb
new file mode 100644
index 00000000..8cd6819b
--- /dev/null
+++ b/lang/ruby/test/test_logical_types.rb
@@ -0,0 +1,128 @@
+# -*- coding: utf-8 -*-
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+require 'test_help'
+
+class TestLogicalTypes < Test::Unit::TestCase
+ def test_int_date
+ schema = Avro::Schema.parse <<-SCHEMA
+ { "type": "int", "logicalType": "date" }
+ SCHEMA
+
+ assert_equal 'date', schema.logical_type
+ today = Date.today
+ assert_encode_and_decode today, schema
+ assert_preencoded Avro::LogicalTypes::IntDate.encode(today), schema, today
+ end
+
+ def test_int_date_conversion
+ type = Avro::LogicalTypes::IntDate
+
+ assert_equal 5, type.encode(Date.new(1970, 1, 6))
+ assert_equal 0, type.encode(Date.new(1970, 1, 1))
+ assert_equal(-5, type.encode(Date.new(1969, 12, 27)))
+
+ assert_equal Date.new(1970, 1, 6), type.decode(5)
+ assert_equal Date.new(1970, 1, 1), type.decode(0)
+ assert_equal Date.new(1969, 12, 27), type.decode(-5)
+ end
+
+ def test_timestamp_millis_long
+ schema = Avro::Schema.parse <<-SCHEMA
+ { "type": "long", "logicalType": "timestamp-millis" }
+ SCHEMA
+
+ # The Time.at format is (seconds, microseconds) since Epoch.
+ time = Time.at(628232400, 12000)
+
+ assert_equal 'timestamp-millis', schema.logical_type
+ assert_encode_and_decode time, schema
+ assert_preencoded Avro::LogicalTypes::TimestampMillis.encode(time), schema, time.utc
+ end
+
+ def test_timestamp_millis_long_conversion
+ type = Avro::LogicalTypes::TimestampMillis
+
+ now = Time.now.utc
+ now_millis = Time.utc(now.year, now.month, now.day, now.hour, now.min, now.sec, now.usec / 1000 * 1000)
+
+ assert_equal now_millis, type.decode(type.encode(now_millis))
+ assert_equal 1432849613221, type.encode(Time.utc(2015, 5, 28, 21, 46, 53, 221000))
+ assert_equal 1432849613221, type.encode(DateTime.new(2015, 5, 28, 21, 46, 53.221))
+ assert_equal Time.utc(2015, 5, 28, 21, 46, 53, 221000), type.decode(1432849613221)
+ end
+
+ def test_timestamp_micros_long
+ schema = Avro::Schema.parse <<-SCHEMA
+ { "type": "long", "logicalType": "timestamp-micros" }
+ SCHEMA
+
+ # The Time.at format is (seconds, microseconds) since Epoch.
+ time = Time.at(628232400, 12345)
+
+ assert_equal 'timestamp-micros', schema.logical_type
+ assert_encode_and_decode time, schema
+ assert_preencoded Avro::LogicalTypes::TimestampMicros.encode(time), schema, time.utc
+ end
+
+ def test_timestamp_micros_long_conversion
+ type = Avro::LogicalTypes::TimestampMicros
+
+ now = Time.now.utc
+
+ assert_equal Time.utc(now.year, now.month, now.day, now.hour, now.min, now.sec, now.usec), type.decode(type.encode(now))
+ assert_equal 1432849613221843, type.encode(Time.utc(2015, 5, 28, 21, 46, 53, 221843))
+ assert_equal 1432849613221843, type.encode(DateTime.new(2015, 5, 28, 21, 46, 53.221843))
+ assert_equal Time.utc(2015, 5, 28, 21, 46, 53, 221843), type.decode(1432849613221843)
+ end
+
+ def test_parse_fixed_duration
+ schema = Avro::Schema.parse <<-SCHEMA
+ { "type": "fixed", "size": 12, "name": "fixed_dur", "logicalType": "duration" }
+ SCHEMA
+
+ assert_equal 'duration', schema.logical_type
+ end
+
+ def encode(datum, schema)
+ buffer = StringIO.new("")
+ encoder = Avro::IO::BinaryEncoder.new(buffer)
+
+ datum_writer = Avro::IO::DatumWriter.new(schema)
+ datum_writer.write(datum, encoder)
+
+ buffer.string
+ end
+
+ def decode(encoded, schema)
+ buffer = StringIO.new(encoded)
+ decoder = Avro::IO::BinaryDecoder.new(buffer)
+
+ datum_reader = Avro::IO::DatumReader.new(schema, schema)
+ datum_reader.read(decoder)
+ end
+
+ def assert_encode_and_decode(datum, schema)
+ encoded = encode(datum, schema)
+ assert_equal datum, decode(encoded, schema)
+ end
+
+ def assert_preencoded(datum, schema, decoded)
+ encoded = encode(datum, schema)
+ assert_equal decoded, decode(encoded, schema)
+ end
+end
diff --git a/lang/ruby/test/test_protocol.rb b/lang/ruby/test/test_protocol.rb
index fda38824..1b036fe5 100644
--- a/lang/ruby/test/test_protocol.rb
+++ b/lang/ruby/test/test_protocol.rb
@@ -5,9 +5,9 @@
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
-#
+#
# http://www.apache.org/licenses/LICENSE-2.0
-#
+#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -139,7 +139,7 @@ EOS
}
EOS
- ExampleProtocol.new(<<-EOS, true)
+ ExampleProtocol.new(<<-EOS, true),
{"namespace": "org.apache.avro.test",
"protocol": "BulkData",
@@ -160,6 +160,29 @@ EOS
}
}
+EOS
+ ExampleProtocol.new(<<-EOS, true),
+{
+ "namespace": "com.acme",
+ "protocol": "HelloWorld",
+ "doc": "protocol_documentation",
+
+ "types": [
+ {"name": "Greeting", "type": "record", "fields": [
+ {"name": "message", "type": "string"}]},
+ {"name": "Curse", "type": "error", "fields": [
+ {"name": "message", "type": "string"}]}
+ ],
+
+ "messages": {
+ "hello": {
+ "doc": "message_documentation",
+ "request": [{"name": "greeting", "type": "Greeting" }],
+ "response": "Greeting",
+ "errors": ["Curse"]
+ }
+ }
+}
EOS
]
@@ -196,4 +219,14 @@ EOS
assert_equal type.namespace, 'com.acme'
end
end
+
+ def test_protocol_doc_attribute
+ original = Protocol.parse(EXAMPLES.last.protocol_string)
+ assert_equal 'protocol_documentation', original.doc
+ end
+
+ def test_protocol_message_doc_attribute
+ original = Protocol.parse(EXAMPLES.last.protocol_string)
+ assert_equal 'message_documentation', original.messages['hello'].doc
+ end
end
diff --git a/lang/ruby/test/test_schema.rb b/lang/ruby/test/test_schema.rb
index 0668cf2f..ed1c8eef 100644
--- a/lang/ruby/test/test_schema.rb
+++ b/lang/ruby/test/test_schema.rb
@@ -17,6 +17,10 @@
require 'test_help'
class TestSchema < Test::Unit::TestCase
+ def hash_to_schema(hash)
+ Avro::Schema.parse(hash.to_json)
+ end
+
def test_default_namespace
schema = Avro::Schema.parse <<-SCHEMA
{"type": "record", "name": "OuterRecord", "fields": [
@@ -27,13 +31,13 @@ class TestSchema < Test::Unit::TestCase
]}
SCHEMA
- assert_equal schema.name, 'OuterRecord'
- assert_equal schema.fullname, 'OuterRecord'
+ assert_equal 'OuterRecord', schema.name
+ assert_equal 'OuterRecord', schema.fullname
assert_nil schema.namespace
schema.fields.each do |field|
- assert_equal field.type.name, 'InnerRecord'
- assert_equal field.type.fullname, 'InnerRecord'
+ assert_equal 'InnerRecord', field.type.name
+ assert_equal 'InnerRecord', field.type.fullname
assert_nil field.type.namespace
end
end
@@ -50,13 +54,13 @@ class TestSchema < Test::Unit::TestCase
]}
SCHEMA
- assert_equal schema.name, 'OuterRecord'
- assert_equal schema.fullname, 'my.name.space.OuterRecord'
- assert_equal schema.namespace, 'my.name.space'
+ assert_equal 'OuterRecord', schema.name
+ assert_equal 'my.name.space.OuterRecord', schema.fullname
+ assert_equal 'my.name.space', schema.namespace
schema.fields.each do |field|
- assert_equal field.type.name, 'InnerRecord'
- assert_equal field.type.fullname, 'my.name.space.InnerRecord'
- assert_equal field.type.namespace, 'my.name.space'
+ assert_equal 'InnerRecord', field.type.name
+ assert_equal 'my.name.space.InnerRecord', field.type.fullname
+ assert_equal 'my.name.space', field.type.namespace
end
end
@@ -71,13 +75,13 @@ class TestSchema < Test::Unit::TestCase
]}
SCHEMA
- assert_equal schema.name, 'OuterRecord'
- assert_equal schema.fullname, 'my.name.space.OuterRecord'
- assert_equal schema.namespace, 'my.name.space'
+ assert_equal 'OuterRecord', schema.name
+ assert_equal 'my.name.space.OuterRecord', schema.fullname
+ assert_equal 'my.name.space', schema.namespace
schema.fields.each do |field|
- assert_equal field.type.name, 'InnerEnum'
- assert_equal field.type.fullname, 'my.name.space.InnerEnum'
- assert_equal field.type.namespace, 'my.name.space'
+ assert_equal 'InnerEnum', field.type.name
+ assert_equal 'my.name.space.InnerEnum', field.type.fullname
+ assert_equal 'my.name.space', field.type.namespace
end
end
@@ -96,18 +100,18 @@ class TestSchema < Test::Unit::TestCase
]}
SCHEMA
- assert_equal schema.name, 'OuterRecord'
- assert_equal schema.fullname, 'outer.OuterRecord'
- assert_equal schema.namespace, 'outer'
+ assert_equal 'OuterRecord', schema.name
+ assert_equal 'outer.OuterRecord', schema.fullname
+ assert_equal 'outer', schema.namespace
middle = schema.fields.first.type
- assert_equal middle.name, 'MiddleRecord'
- assert_equal middle.fullname, 'middle.MiddleRecord'
- assert_equal middle.namespace, 'middle'
+ assert_equal 'MiddleRecord', middle.name
+ assert_equal 'middle.MiddleRecord', middle.fullname
+ assert_equal 'middle', middle.namespace
inner = middle.fields.first.type
- assert_equal inner.name, 'InnerRecord'
- assert_equal inner.fullname, 'middle.InnerRecord'
- assert_equal inner.namespace, 'middle'
- assert_equal inner.fields.first.type, middle
+ assert_equal 'InnerRecord', inner.name
+ assert_equal 'middle.InnerRecord', inner.fullname
+ assert_equal 'middle', inner.namespace
+ assert_equal middle, inner.fields.first.type
end
def test_to_avro_includes_namespaces
@@ -120,7 +124,7 @@ class TestSchema < Test::Unit::TestCase
]}
SCHEMA
- assert_equal schema.to_avro, {
+ assert_equal({
'type' => 'record', 'name' => 'OuterRecord', 'namespace' => 'my.name.space',
'fields' => [
{'name' => 'definition', 'type' => {
@@ -129,6 +133,21 @@ class TestSchema < Test::Unit::TestCase
}},
{'name' => 'reference', 'type' => 'my.name.space.InnerFixed'}
]
+ }, schema.to_avro)
+ end
+
+ def test_to_avro_includes_logical_type
+ schema = Avro::Schema.parse <<-SCHEMA
+ {"type": "record", "name": "has_logical", "fields": [
+ {"name": "dt", "type": {"type": "int", "logicalType": "date"}}]
+ }
+ SCHEMA
+
+ assert_equal schema.to_avro, {
+ 'type' => 'record', 'name' => 'has_logical',
+ 'fields' => [
+ {'name' => 'dt', 'type' => {'type' => 'int', 'logicalType' => 'date'}}
+ ]
}
end
@@ -143,4 +162,298 @@ class TestSchema < Test::Unit::TestCase
assert_equal '"MissingType" is not a schema we know about.', error.message
end
+
+ def test_to_avro_handles_falsey_defaults
+ schema = Avro::Schema.parse <<-SCHEMA
+ {"type": "record", "name": "Record", "namespace": "my.name.space",
+ "fields": [
+ {"name": "is_usable", "type": "boolean", "default": false}
+ ]
+ }
+ SCHEMA
+
+ assert_equal schema.to_avro, {
+ 'type' => 'record', 'name' => 'Record', 'namespace' => 'my.name.space',
+ 'fields' => [
+ {'name' => 'is_usable', 'type' => 'boolean', 'default' => false}
+ ]
+ }
+ end
+
+ def test_record_field_doc_attribute
+ field_schema_json = Avro::Schema.parse <<-SCHEMA
+ {
+ "type": "record",
+ "name": "Record",
+ "namespace": "my.name.space",
+ "fields": [
+ {
+ "name": "name",
+ "type": "boolean",
+ "doc": "documentation"
+ }
+ ]
+ }
+ SCHEMA
+
+ field_schema_hash =
+ {
+ 'type' => 'record',
+ 'name' => 'Record',
+ 'namespace' => 'my.name.space',
+ 'fields' => [
+ {
+ 'name' => 'name',
+ 'type' => 'boolean',
+ 'doc' => 'documentation'
+ }
+ ]
+ }
+
+ assert_equal field_schema_hash, field_schema_json.to_avro
+ end
+
+ def test_record_doc_attribute
+ record_schema_json = Avro::Schema.parse <<-SCHEMA
+ {
+ "type": "record",
+ "name": "Record",
+ "namespace": "my.name.space",
+ "doc": "documentation",
+ "fields": [
+ {
+ "name": "name",
+ "type": "boolean"
+ }
+ ]
+ }
+ SCHEMA
+
+ record_schema_hash =
+ {
+ 'type' => 'record',
+ 'name' => 'Record',
+ 'namespace' => 'my.name.space',
+ 'doc' => 'documentation',
+ 'fields' => [
+ {
+ 'name' => 'name',
+ 'type' => 'boolean'
+ }
+ ]
+ }
+
+ assert_equal record_schema_hash, record_schema_json.to_avro
+ end
+
+ def test_enum_doc_attribute
+ enum_schema_json = Avro::Schema.parse <<-SCHEMA
+ {
+ "type": "enum",
+ "name": "Enum",
+ "namespace": "my.name.space",
+ "doc": "documentation",
+ "symbols" : [
+ "SPADES",
+ "HEARTS",
+ "DIAMONDS",
+ "CLUBS"
+ ]
+ }
+ SCHEMA
+
+ enum_schema_hash =
+ {
+ 'type' => 'enum',
+ 'name' => 'Enum',
+ 'namespace' => 'my.name.space',
+ 'doc' => 'documentation',
+ 'symbols' => [
+ 'SPADES',
+ 'HEARTS',
+ 'DIAMONDS',
+ 'CLUBS'
+ ]
+ }
+ assert_equal enum_schema_hash, enum_schema_json.to_avro
+ end
+
+ def test_empty_record
+ schema = Avro::Schema.parse('{"type":"record", "name":"Empty"}')
+ assert_empty(schema.fields)
+ end
+
+ def test_empty_union
+ schema = Avro::Schema.parse('[]')
+ assert_equal(schema.to_s, '[]')
+ end
+
+ def test_read
+ schema = Avro::Schema.parse('"string"')
+ writer_schema = Avro::Schema.parse('"int"')
+ assert_false(schema.read?(writer_schema))
+ assert_true(schema.read?(schema))
+ end
+
+ def test_be_read
+ schema = Avro::Schema.parse('"string"')
+ writer_schema = Avro::Schema.parse('"int"')
+ assert_false(schema.be_read?(writer_schema))
+ assert_true(schema.be_read?(schema))
+ end
+
+ def test_mutual_read
+ schema = Avro::Schema.parse('"string"')
+ writer_schema = Avro::Schema.parse('"int"')
+ default1 = Avro::Schema.parse('{"type":"record", "name":"Default", "fields":[{"name":"i", "type":"int", "default": 1}]}')
+ default2 = Avro::Schema.parse('{"type":"record", "name":"Default", "fields":[{"name:":"s", "type":"string", "default": ""}]}')
+ assert_false(schema.mutual_read?(writer_schema))
+ assert_true(schema.mutual_read?(schema))
+ assert_true(default1.mutual_read?(default2))
+ end
+
+ def test_validate_defaults
+ exception = assert_raise(Avro::SchemaParseError) do
+ hash_to_schema(
+ type: 'record',
+ name: 'fruits',
+ fields: [
+ {
+ name: 'veggies',
+ type: 'string',
+ default: nil
+ }
+ ]
+ )
+ end
+ assert_equal('Error validating default for veggies: at . expected type string, got null',
+ exception.to_s)
+ end
+
+ def test_field_default_validation_disabled
+ Avro.disable_field_default_validation = true
+ assert_nothing_raised do
+ hash_to_schema(
+ type: 'record',
+ name: 'fruits',
+ fields: [
+ {
+ name: 'veggies',
+ type: 'string',
+ default: nil
+ }
+ ]
+ )
+ end
+ ensure
+ Avro.disable_field_default_validation = false
+ end
+
+ def test_field_default_validation_disabled_via_env
+ Avro.disable_field_default_validation = false
+ ENV['AVRO_DISABLE_FIELD_DEFAULT_VALIDATION'] = "1"
+
+ assert_nothing_raised do
+ hash_to_schema(
+ type: 'record',
+ name: 'fruits',
+ fields: [
+ {
+ name: 'veggies',
+ type: 'string',
+ default: nil
+ }
+ ]
+ )
+ end
+ ensure
+ ENV.delete('AVRO_DISABLE_FIELD_DEFAULT_VALIDATION')
+ Avro.disable_field_default_validation = false
+ end
+
+ def test_validate_record_valid_default
+ assert_nothing_raised(Avro::SchemaParseError) do
+ hash_to_schema(
+ type: 'record',
+ name: 'with_subrecord',
+ fields: [
+ {
+ name: 'sub',
+ type: {
+ name: 'subrecord',
+ type: 'record',
+ fields: [
+ { type: 'string', name: 'x' }
+ ]
+ },
+ default: {
+ x: "y"
+ }
+ }
+ ]
+ )
+ end
+ end
+
+ def test_validate_record_invalid_default
+ exception = assert_raise(Avro::SchemaParseError) do
+ hash_to_schema(
+ type: 'record',
+ name: 'with_subrecord',
+ fields: [
+ {
+ name: 'sub',
+ type: {
+ name: 'subrecord',
+ type: 'record',
+ fields: [
+ { type: 'string', name: 'x' }
+ ]
+ },
+ default: {
+ a: 1
+ }
+ }
+ ]
+ )
+ end
+ assert_equal('Error validating default for sub: at .x expected type string, got null',
+ exception.to_s)
+ end
+
+ def test_validate_union_defaults
+ exception = assert_raise(Avro::SchemaParseError) do
+ hash_to_schema(
+ type: 'record',
+ name: 'fruits',
+ fields: [
+ {
+ name: 'veggies',
+ type: %w(string null),
+ default: 5
+ }
+ ]
+ )
+ end
+ assert_equal('Error validating default for veggies: at . expected type string, got int with value 5',
+ exception.to_s)
+ end
+
+ def test_validate_union_default_first_type
+ exception = assert_raise(Avro::SchemaParseError) do
+ hash_to_schema(
+ type: 'record',
+ name: 'fruits',
+ fields: [
+ {
+ name: 'veggies',
+ type: %w(null string),
+ default: 'apple'
+ }
+ ]
+ )
+ end
+ assert_equal('Error validating default for veggies: at . expected type null, got string with value "apple"',
+ exception.to_s)
+ end
end
diff --git a/lang/ruby/test/test_schema_compatibility.rb b/lang/ruby/test/test_schema_compatibility.rb
new file mode 100644
index 00000000..4ec60ef7
--- /dev/null
+++ b/lang/ruby/test/test_schema_compatibility.rb
@@ -0,0 +1,475 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+require 'test_help'
+
+class TestSchemaCompatibility < Test::Unit::TestCase
+
+ def test_primitive_schema_compatibility
+ Avro::Schema::PRIMITIVE_TYPES.each do |schema_type|
+ assert_true(can_read?(send("#{schema_type}_schema"), send("#{schema_type}_schema")))
+ end
+ end
+
+ def test_compatible_reader_writer_pairs
+ [
+ long_schema, int_schema,
+ float_schema, int_schema,
+ float_schema, long_schema,
+ double_schema, long_schema,
+ double_schema, int_schema,
+ double_schema, float_schema,
+
+ int_array_schema, int_array_schema,
+ long_array_schema, int_array_schema,
+ int_map_schema, int_map_schema,
+ long_map_schema, int_map_schema,
+
+ enum1_ab_schema, enum1_ab_schema,
+ enum1_abc_schema, enum1_ab_schema,
+
+ string_schema, bytes_schema,
+ bytes_schema, string_schema,
+
+ empty_union_schema, empty_union_schema,
+ int_union_schema, int_union_schema,
+ int_string_union_schema, string_int_union_schema,
+ int_union_schema, empty_union_schema,
+ long_union_schema, int_union_schema,
+
+ int_union_schema, int_schema,
+ int_schema, int_union_schema,
+
+ empty_record1_schema, empty_record1_schema,
+ empty_record1_schema, a_int_record1_schema,
+
+ a_int_record1_schema, a_int_record1_schema,
+ a_dint_record1_schema, a_int_record1_schema,
+ a_dint_record1_schema, a_dint_record1_schema,
+ a_int_record1_schema, a_dint_record1_schema,
+
+ a_long_record1_schema, a_int_record1_schema,
+
+ a_int_record1_schema, a_int_b_int_record1_schema,
+ a_dint_record1_schema, a_int_b_int_record1_schema,
+
+ a_int_b_dint_record1_schema, a_int_record1_schema,
+ a_dint_b_dint_record1_schema, empty_record1_schema,
+ a_dint_b_dint_record1_schema, a_int_record1_schema,
+ a_int_b_int_record1_schema, a_dint_b_dint_record1_schema,
+
+ int_list_record_schema, int_list_record_schema,
+ long_list_record_schema, long_list_record_schema,
+ long_list_record_schema, int_list_record_schema,
+
+ null_schema, null_schema,
+
+ nested_optional_record, nested_record
+ ].each_slice(2) do |(reader, writer)|
+ assert_true(can_read?(writer, reader), "expecting #{reader} to read #{writer}")
+ end
+ end
+
+ def test_broken
+ assert_false(can_read?(int_string_union_schema, int_union_schema))
+ end
+
+ def test_incompatible_reader_writer_pairs
+ [
+ null_schema, int_schema,
+ null_schema, long_schema,
+
+ boolean_schema, int_schema,
+
+ int_schema, null_schema,
+ int_schema, boolean_schema,
+ int_schema, long_schema,
+ int_schema, float_schema,
+ int_schema, double_schema,
+
+ long_schema, float_schema,
+ long_schema, double_schema,
+
+ float_schema, double_schema,
+
+ string_schema, boolean_schema,
+ string_schema, int_schema,
+
+ bytes_schema, null_schema,
+ bytes_schema, int_schema,
+
+ int_array_schema, long_array_schema,
+ int_map_schema, int_array_schema,
+ int_array_schema, int_map_schema,
+ int_map_schema, long_map_schema,
+
+ enum1_ab_schema, enum1_abc_schema,
+ enum1_bc_schema, enum1_abc_schema,
+
+ enum1_ab_schema, enum2_ab_schema,
+ int_schema, enum2_ab_schema,
+ enum2_ab_schema, int_schema,
+
+ int_union_schema, int_string_union_schema,
+ string_union_schema, int_string_union_schema,
+
+ empty_record2_schema, empty_record1_schema,
+ a_int_record1_schema, empty_record1_schema,
+ a_int_b_dint_record1_schema, empty_record1_schema,
+
+ int_list_record_schema, long_list_record_schema,
+
+ null_schema, int_schema,
+
+ nested_record, nested_optional_record
+ ].each_slice(2) do |(reader, writer)|
+ assert_false(can_read?(writer, reader), "expecting #{reader} not to read #{writer}")
+ end
+ end
+
+ def writer_schema
+ Avro::Schema.parse <<-SCHEMA
+ {"type":"record", "name":"Record", "fields":[
+ {"name":"oldfield1", "type":"int"},
+ {"name":"oldfield2", "type":"string"}
+ ]}
+ SCHEMA
+ end
+
+ def test_missing_field
+ reader_schema = Avro::Schema.parse <<-SCHEMA
+ {"type":"record", "name":"Record", "fields":[
+ {"name":"oldfield1", "type":"int"}
+ ]}
+ SCHEMA
+ assert_true(can_read?(writer_schema, reader_schema))
+ assert_false(can_read?(reader_schema, writer_schema))
+ end
+
+ def test_missing_second_field
+ reader_schema = Avro::Schema.parse <<-SCHEMA
+ {"type":"record", "name":"Record", "fields":[
+ {"name":"oldfield2", "type":"string"}
+ ]}
+ SCHEMA
+ assert_true(can_read?(writer_schema, reader_schema))
+ assert_false(can_read?(reader_schema, writer_schema))
+ end
+
+ def test_all_fields
+ reader_schema = Avro::Schema.parse <<-SCHEMA
+ {"type":"record", "name":"Record", "fields":[
+ {"name":"oldfield1", "type":"int"},
+ {"name":"oldfield2", "type":"string"}
+ ]}
+ SCHEMA
+ assert_true(can_read?(writer_schema, reader_schema))
+ assert_true(can_read?(reader_schema, writer_schema))
+ end
+
+ def test_new_field_with_default
+ reader_schema = Avro::Schema.parse <<-SCHEMA
+ {"type":"record", "name":"Record", "fields":[
+ {"name":"oldfield1", "type":"int"},
+ {"name":"newfield1", "type":"int", "default":42}
+ ]}
+ SCHEMA
+ assert_true(can_read?(writer_schema, reader_schema))
+ assert_false(can_read?(reader_schema, writer_schema))
+ end
+
+ def test_new_field
+ reader_schema = Avro::Schema.parse <<-SCHEMA
+ {"type":"record", "name":"Record", "fields":[
+ {"name":"oldfield1", "type":"int"},
+ {"name":"newfield1", "type":"int"}
+ ]}
+ SCHEMA
+ assert_false(can_read?(writer_schema, reader_schema))
+ assert_false(can_read?(reader_schema, writer_schema))
+ end
+
+ def test_array_writer_schema
+ valid_reader = string_array_schema
+ invalid_reader = string_map_schema
+
+ assert_true(can_read?(string_array_schema, valid_reader))
+ assert_false(can_read?(string_array_schema, invalid_reader))
+ end
+
+ def test_primitive_writer_schema
+ valid_reader = string_schema
+ assert_true(can_read?(string_schema, valid_reader))
+ assert_false(can_read?(int_schema, string_schema))
+ end
+
+ def test_union_reader_writer_subset_incompatiblity
+ # reader union schema must contain all writer union branches
+ union_writer = union_schema(int_schema, string_schema)
+ union_reader = union_schema(string_schema)
+
+ assert_false(can_read?(union_writer, union_reader))
+ assert_true(can_read?(union_reader, union_writer))
+ end
+
+ def test_incompatible_record_field
+ string_schema = Avro::Schema.parse <<-SCHEMA
+ {"type":"record", "name":"MyRecord", "namespace":"ns", "fields": [
+ {"name":"field1", "type":"string"}
+ ]}
+ SCHEMA
+ int_schema = Avro::Schema.parse <<-SCHEMA2
+ {"type":"record", "name":"MyRecord", "namespace":"ns", "fields": [
+ {"name":"field1", "type":"int"}
+ ]}
+ SCHEMA2
+ assert_false(can_read?(string_schema, int_schema))
+ end
+
+ def test_enum_symbols
+ enum_schema1 = Avro::Schema.parse <<-SCHEMA
+ {"type":"enum", "name":"MyEnum", "symbols":["A","B"]}
+ SCHEMA
+ enum_schema2 = Avro::Schema.parse <<-SCHEMA
+ {"type":"enum", "name":"MyEnum", "symbols":["A","B","C"]}
+ SCHEMA
+ assert_false(can_read?(enum_schema2, enum_schema1))
+ assert_true(can_read?(enum_schema1, enum_schema2))
+ end
+
+ # Tests from lang/java/avro/src/test/java/org/apache/avro/io/parsing/TestResolvingGrammarGenerator2.java
+
+ def point_2d_schema
+ Avro::Schema.parse <<-SCHEMA
+ {"type":"record", "name":"Point2D", "fields":[
+ {"name":"x", "type":"double"},
+ {"name":"y", "type":"double"}
+ ]}
+ SCHEMA
+ end
+
+ def point_2d_fullname_schema
+ Avro::Schema.parse <<-SCHEMA
+ {"type":"record", "name":"Point", "namespace":"written", "fields":[
+ {"name":"x", "type":"double"},
+ {"name":"y", "type":"double"}
+ ]}
+ SCHEMA
+ end
+
+ def point_3d_no_default_schema
+ Avro::Schema.parse <<-SCHEMA
+ {"type":"record", "name":"Point", "fields":[
+ {"name":"x", "type":"double"},
+ {"name":"y", "type":"double"},
+ {"name":"z", "type":"double"}
+ ]}
+ SCHEMA
+ end
+
+ def point_3d_schema
+ Avro::Schema.parse <<-SCHEMA
+ {"type":"record", "name":"Point3D", "fields":[
+ {"name":"x", "type":"double"},
+ {"name":"y", "type":"double"},
+ {"name":"z", "type":"double", "default": 0.0}
+ ]}
+ SCHEMA
+ end
+
+ def point_3d_match_name_schema
+ Avro::Schema.parse <<-SCHEMA
+ {"type":"record", "name":"Point", "fields":[
+ {"name":"x", "type":"double"},
+ {"name":"y", "type":"double"},
+ {"name":"z", "type":"double", "default": 0.0}
+ ]}
+ SCHEMA
+ end
+
+ def test_union_resolution_no_structure_match
+ # short name match, but no structure match
+ read_schema = union_schema(null_schema, point_3d_no_default_schema)
+ assert_false(can_read?(point_2d_fullname_schema, read_schema))
+ end
+
+ def test_union_resolution_first_structure_match_2d
+ # multiple structure matches with no name matches
+ read_schema = union_schema(null_schema, point_3d_no_default_schema, point_2d_schema, point_3d_schema)
+ assert_false(can_read?(point_2d_fullname_schema, read_schema))
+ end
+
+ def test_union_resolution_first_structure_match_3d
+ # multiple structure matches with no name matches
+ read_schema = union_schema(null_schema, point_3d_no_default_schema, point_3d_schema, point_2d_schema)
+ assert_false(can_read?(point_2d_fullname_schema, read_schema))
+ end
+
+ def test_union_resolution_named_structure_match
+ # multiple structure matches with a short name match
+ read_schema = union_schema(null_schema, point_2d_schema, point_3d_match_name_schema, point_3d_schema)
+ assert_false(can_read?(point_2d_fullname_schema, read_schema))
+ end
+
+ def test_union_resolution_full_name_match
+ # there is a full name match that should be chosen
+ read_schema = union_schema(null_schema, point_2d_schema, point_3d_match_name_schema, point_3d_schema, point_2d_fullname_schema)
+ assert_true(can_read?(point_2d_fullname_schema, read_schema))
+ end
+
+ def can_read?(writer, reader)
+ Avro::SchemaCompatibility.can_read?(writer, reader)
+ end
+
+ def union_schema(*schemas)
+ schemas ||= []
+ Avro::Schema.parse("[#{schemas.map(&:to_s).join(',')}]")
+ end
+
+ Avro::Schema::PRIMITIVE_TYPES.each do |schema_type|
+ define_method("#{schema_type}_schema") do
+ Avro::Schema.parse("\"#{schema_type}\"")
+ end
+ end
+
+ def int_array_schema
+ Avro::Schema.parse('{"type":"array", "items":"int"}')
+ end
+
+ def long_array_schema
+ Avro::Schema.parse('{"type":"array", "items":"long"}')
+ end
+
+ def string_array_schema
+ Avro::Schema.parse('{"type":"array", "items":"string"}')
+ end
+
+ def int_map_schema
+ Avro::Schema.parse('{"type":"map", "values":"int"}')
+ end
+
+ def long_map_schema
+ Avro::Schema.parse('{"type":"map", "values":"long"}')
+ end
+
+ def string_map_schema
+ Avro::Schema.parse('{"type":"map", "values":"string"}')
+ end
+
+ def enum1_ab_schema
+ Avro::Schema.parse('{"type":"enum", "name":"Enum1", "symbols":["A","B"]}')
+ end
+
+ def enum1_abc_schema
+ Avro::Schema.parse('{"type":"enum", "name":"Enum1", "symbols":["A","B","C"]}')
+ end
+
+ def enum1_bc_schema
+ Avro::Schema.parse('{"type":"enum", "name":"Enum1", "symbols":["B","C"]}')
+ end
+
+ def enum2_ab_schema
+ Avro::Schema.parse('{"type":"enum", "name":"Enum2", "symbols":["A","B"]}')
+ end
+
+ def empty_record1_schema
+ Avro::Schema.parse('{"type":"record", "name":"Record1"}')
+ end
+
+ def empty_record2_schema
+ Avro::Schema.parse('{"type":"record", "name":"Record2"}')
+ end
+
+ def a_int_record1_schema
+ Avro::Schema.parse('{"type":"record", "name":"Record1", "fields":[{"name":"a", "type":"int"}]}')
+ end
+
+ def a_long_record1_schema
+ Avro::Schema.parse('{"type":"record", "name":"Record1", "fields":[{"name":"a", "type":"long"}]}')
+ end
+
+ def a_int_b_int_record1_schema
+ Avro::Schema.parse('{"type":"record", "name":"Record1", "fields":[{"name":"a", "type":"int"}, {"name":"b", "type":"int"}]}')
+ end
+
+ def a_dint_record1_schema
+ Avro::Schema.parse('{"type":"record", "name":"Record1", "fields":[{"name":"a", "type":"int", "default":0}]}')
+ end
+
+ def a_int_b_dint_record1_schema
+ Avro::Schema.parse('{"type":"record", "name":"Record1", "fields":[{"name":"a", "type":"int"}, {"name":"b", "type":"int", "default":0}]}')
+ end
+
+ def a_dint_b_dint_record1_schema
+ Avro::Schema.parse('{"type":"record", "name":"Record1", "fields":[{"name":"a", "type":"int", "default":0}, {"name":"b", "type":"int", "default":0}]}')
+ end
+
+ def nested_record
+ Avro::Schema.parse('{"type":"record","name":"parent","fields":[{"name":"attribute","type":{"type":"record","name":"child","fields":[{"name":"id","type":"string"}]}}]}')
+ end
+
+ def nested_optional_record
+ Avro::Schema.parse('{"type":"record","name":"parent","fields":[{"name":"attribute","type":["null",{"type":"record","name":"child","fields":[{"name":"id","type":"string"}]}],"default":null}]}')
+ end
+
+ def int_list_record_schema
+ Avro::Schema.parse <<-SCHEMA
+ {
+ "type":"record", "name":"List", "fields": [
+ {"name": "head", "type": "int"},
+ {"name": "tail", "type": "List"}
+ ]}
+ SCHEMA
+ end
+
+ def long_list_record_schema
+ Avro::Schema.parse <<-SCHEMA
+ {
+ "type":"record", "name":"List", "fields": [
+ {"name": "head", "type": "long"},
+ {"name": "tail", "type": "List"}
+ ]}
+ SCHEMA
+ end
+
+ def empty_union_schema
+ union_schema
+ end
+
+ def null_union_schema
+ union_schema(null_schema)
+ end
+
+ def int_union_schema
+ union_schema(int_schema)
+ end
+
+ def long_union_schema
+ union_schema(long_schema)
+ end
+
+ def string_union_schema
+ union_schema(string_schema)
+ end
+
+ def int_string_union_schema
+ union_schema(int_schema, string_schema)
+ end
+
+ def string_int_union_schema
+ union_schema(string_schema, int_schema)
+ end
+end
diff --git a/lang/ruby/test/test_schema_validator.rb b/lang/ruby/test/test_schema_validator.rb
new file mode 100644
index 00000000..0126c354
--- /dev/null
+++ b/lang/ruby/test/test_schema_validator.rb
@@ -0,0 +1,554 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+require 'test_help'
+
+class TestSchema < Test::Unit::TestCase
+ def validate!(schema, value, options=nil)
+ Avro::SchemaValidator.validate!(schema, value, options)
+ end
+
+ def validate_simple!(schema, value)
+ Avro::SchemaValidator.validate!(schema, value, recursive: false)
+ end
+
+ def hash_to_schema(hash)
+ Avro::Schema.parse(hash.to_json)
+ end
+
+ def assert_failed_validation(messages)
+ error = assert_raise(Avro::SchemaValidator::ValidationError) { yield }
+
+ assert_messages = [messages].flatten
+ result_errors = error.result.errors
+ assert_messages.each do |message|
+ assert(result_errors.include?(message), "expected '#{message}' to be in '#{result_errors}'")
+ end
+ assert_equal(assert_messages.size, result_errors.size)
+ end
+
+ def assert_valid_schema(schema, valid, invalid, simple = false)
+ valid.each do |value|
+ assert_nothing_raised { Avro::SchemaValidator.validate!(schema, value) }
+ assert_nothing_raised { Avro::SchemaValidator.validate!(schema, value, recursive: false) } if simple
+ end
+
+ invalid.each do |value|
+ assert_raise { Avro::SchemaValidator.validate!(schema, value) }
+ assert_raise { Avro::SchemaValidator.validate!(schema, value, recursive: false) } if simple
+ assert_nothing_raised { Avro::SchemaValidator.validate!(schema, value, recursive: false) } unless simple
+ end
+ end
+
+ def test_validate_nil
+ schema = hash_to_schema(type: 'null', name: 'name')
+
+ assert_nothing_raised { validate!(schema, nil) }
+ assert_nothing_raised { validate_simple!(schema, nil) }
+
+ assert_failed_validation('at . expected type null, got int with value 1') do
+ validate!(schema, 1)
+ end
+
+ assert_failed_validation('at . expected type null, got int with value 1') do
+ validate_simple!(schema, 1)
+ end
+ end
+
+ def test_validate_boolean
+ schema = hash_to_schema(type: 'boolean', name: 'name')
+
+ assert_nothing_raised { validate!(schema, true) }
+ assert_nothing_raised { validate!(schema, false) }
+ assert_nothing_raised { validate_simple!(schema, true) }
+ assert_nothing_raised { validate_simple!(schema, false) }
+
+ assert_failed_validation('at . expected type boolean, got int with value 1') do
+ validate!(schema, 1)
+ end
+ assert_failed_validation('at . expected type boolean, got int with value 1') do
+ validate_simple!(schema, 1)
+ end
+
+ assert_failed_validation('at . expected type boolean, got null') do
+ validate!(schema, nil)
+ end
+ assert_failed_validation('at . expected type boolean, got null') do
+ validate_simple!(schema, nil)
+ end
+ end
+
+ def test_fixed_size_string
+ schema = hash_to_schema(type: 'fixed', name: 'some', size: 3)
+
+ assert_nothing_raised { validate!(schema, 'baf') }
+ assert_nothing_raised { validate_simple!(schema, 'baf') }
+
+ assert_failed_validation('at . expected fixed with size 3, got "some" with size 4') do
+ validate!(schema, 'some')
+ end
+ assert_failed_validation('at . expected fixed with size 3, got "some" with size 4') do
+ validate_simple!(schema, 'some')
+ end
+
+ assert_failed_validation('at . expected fixed with size 3, got null') do
+ validate!(schema, nil)
+ end
+ assert_failed_validation('at . expected fixed with size 3, got null') do
+ validate_simple!(schema, nil)
+ end
+
+ assert_failed_validation("at . expected fixed with size 3, got \"a\u2014b\" with size 5") do
+ validate!(schema, "a\u2014b")
+ end
+ assert_failed_validation("at . expected fixed with size 3, got \"a\u2014b\" with size 5") do
+ validate_simple!(schema, "a\u2014b")
+ end
+ end
+
+ def test_original_validate_nil
+ schema = hash_to_schema(type: 'null', name: 'name')
+
+ assert_valid_schema(schema, [nil], ['something'], true)
+ end
+
+ def test_original_validate_boolean
+ schema = hash_to_schema(type: 'boolean', name: 'name')
+
+ assert_valid_schema(schema, [true, false], [nil, 1], true)
+ end
+
+ def test_validate_string
+ schema = hash_to_schema(type: 'string', name: 'name')
+
+ assert_valid_schema(schema, ['string'], [nil, 1], true)
+ end
+
+ def test_validate_bytes
+ schema = hash_to_schema(type: 'bytes', name: 'name')
+
+ assert_valid_schema(schema, ['string'], [nil, 1], true)
+ end
+
+ def test_validate_int
+ schema = hash_to_schema(type: 'int', name: 'name')
+
+ assert_valid_schema(
+ schema,
+ [Avro::Schema::INT_MIN_VALUE, Avro::Schema::INT_MAX_VALUE, 1],
+ [Avro::Schema::LONG_MIN_VALUE, Avro::Schema::LONG_MAX_VALUE, 'string'],
+ true
+ )
+ assert_failed_validation('at . out of bound value 9223372036854775807') do
+ validate!(schema, Avro::Schema::LONG_MAX_VALUE)
+ end
+ assert_failed_validation('at . out of bound value 9223372036854775807') do
+ validate_simple!(schema, Avro::Schema::LONG_MAX_VALUE)
+ end
+ end
+
+ def test_validate_long
+ schema = hash_to_schema(type: 'long', name: 'name')
+
+ assert_valid_schema(schema, [Avro::Schema::LONG_MIN_VALUE, Avro::Schema::LONG_MAX_VALUE, 1], [1.1, 'string'], true)
+ end
+
+ def test_validate_float
+ schema = hash_to_schema(type: 'float', name: 'name')
+
+ assert_valid_schema(schema, [1.1, 1, Avro::Schema::LONG_MAX_VALUE], ['string'], true)
+ end
+
+ def test_validate_double
+ schema = hash_to_schema(type: 'double', name: 'name')
+
+ assert_valid_schema(schema, [1.1, 1, Avro::Schema::LONG_MAX_VALUE], ['string'], true)
+ end
+
+ def test_validate_fixed
+ schema = hash_to_schema(type: 'fixed', name: 'name', size: 3)
+
+ assert_valid_schema(schema, ['abc'], ['ab', 1, 1.1, true], true)
+ end
+
+ def test_validate_original_num
+ schema = hash_to_schema(type: 'enum', name: 'name', symbols: %w(a b))
+
+ assert_valid_schema(schema, ['a', 'b'], ['c'], true)
+ end
+
+ def test_validate_record
+ schema = hash_to_schema(type: 'record', name: 'name', fields: [{ type: 'null', name: 'sub' }])
+
+ assert_valid_schema(schema, [{ 'sub' => nil }], [{ 'sub' => 1 }])
+ end
+
+ def test_validate_shallow_record
+ schema = hash_to_schema(
+ type: 'record', name: 'name', fields: [{ type: 'int', name: 'sub' }]
+ )
+
+ assert_nothing_raised { validate!(schema, 'sub' => 1) }
+ assert_nothing_raised { validate_simple!(schema, 'sub' => 1) }
+
+ assert_failed_validation('at .sub expected type int, got null') do
+ validate!(schema, {})
+ end
+ assert_nothing_raised { validate_simple!(schema, {}) }
+
+ assert_failed_validation('at . expected type record, got float with value 1.2') do
+ validate!(schema, 1.2)
+ end
+ assert_nothing_raised { validate_simple!(schema, 1.2) }
+
+ assert_failed_validation('at .sub expected type int, got float with value 1.2') do
+ validate!(schema, 'sub' => 1.2)
+ end
+ assert_nothing_raised { validate_simple!(schema, 'sub' => 1.2) }
+ end
+
+ def test_validate_array
+ schema = hash_to_schema(type: 'array',
+ name: 'person',
+ items: [{ type: 'int', name: 'height' }])
+
+ assert_nothing_raised { validate!(schema, []) }
+ assert_nothing_raised { validate_simple!(schema, []) }
+
+ assert_failed_validation 'at . expected type array, got null' do
+ validate!(schema, nil)
+ end
+ assert_nothing_raised { validate_simple!(schema, nil) }
+
+ assert_failed_validation('at .[0] expected type int, got null') do
+ validate!(schema, [nil])
+ end
+ assert_nothing_raised { validate_simple!(schema, [nil]) }
+
+ assert_failed_validation('at .[3] expected type int, got string with value "so wrong"') do
+ validate!(schema, [1, 3, 9, 'so wrong'])
+ end
+ assert_nothing_raised { validate_simple!(schema, [1, 3, 9, 'so wrong']) }
+ end
+
+ def test_validate_enum
+ schema = hash_to_schema(type: 'enum',
+ name: 'person',
+ symbols: %w(one two three))
+
+ assert_nothing_raised { validate!(schema, 'one') }
+ assert_nothing_raised { validate_simple!(schema, 'one') }
+
+ assert_failed_validation('at . expected enum with values ["one", "two", "three"], got string with value "five"') do
+ validate!(schema, 'five')
+ end
+ assert_failed_validation('at . expected enum with values ["one", "two", "three"], got string with value "five"') do
+ validate_simple!(schema, 'five')
+ end
+ end
+
+ def test_validate_union_on_primitive_types
+ schema = hash_to_schema(
+ name: 'should_not_matter',
+ type: 'record',
+ fields: [
+ { name: 'what_ever', type: %w(long string) }
+ ]
+ )
+
+ assert_failed_validation('at .what_ever expected union of [\'long\', \'string\'], got null') {
+ validate!(schema, 'what_ever' => nil)
+ }
+ assert_nothing_raised { validate_simple!(schema, 'what_ever' => nil) }
+ end
+
+ def test_validate_union_of_nil_and_record_inside_array
+ schema = hash_to_schema(
+ name: 'this does not matter',
+ type: 'record',
+ fields: [
+ {
+ name: 'person',
+ type: {
+ name: 'person_entry',
+ type: 'record',
+ fields: [
+ {
+ name: 'houses',
+ type: [
+ 'null',
+ {
+ name: 'houses_entry',
+ type: 'array',
+ items: [
+ {
+ name: 'house_entry',
+ type: 'record',
+ fields: [
+ { name: 'number_of_rooms', type: 'long' }
+ ]
+ }
+ ]
+ }
+ ],
+ }
+ ]
+ }
+ }
+ ]
+ )
+
+ assert_failed_validation('at .person expected type record, got null') {
+ validate!(schema, 'not at all' => nil)
+ }
+ assert_nothing_raised { validate_simple!(schema, 'person' => {}) }
+
+ assert_nothing_raised { validate!(schema, 'person' => {}) }
+ assert_nothing_raised { validate!(schema, 'person' => { houses: [] }) }
+ assert_nothing_raised { validate!(schema, 'person' => { 'houses' => [{ 'number_of_rooms' => 1 }] }) }
+
+ assert_nothing_raised { validate_simple!(schema, 'person' => {}) }
+ assert_nothing_raised { validate_simple!(schema, 'person' => { houses: [] }) }
+ assert_nothing_raised { validate_simple!(schema, 'person' => { 'houses' => [{ 'number_of_rooms' => 1 }] }) }
+
+ message = 'at .person.houses[1].number_of_rooms expected type long, got string with value "not valid at all"'
+ datum = {
+ 'person' => {
+ 'houses' => [
+ { 'number_of_rooms' => 2 },
+ { 'number_of_rooms' => 'not valid at all' }
+ ]
+ }
+ }
+ assert_failed_validation(message) { validate!(schema, datum) }
+ assert_nothing_raised { validate_simple!(schema, datum) }
+ end
+
+ def test_validate_map
+ schema = hash_to_schema(type: 'map',
+ name: 'numbers',
+ values: [
+ { name: 'some', type: 'int' }
+ ])
+
+ assert_nothing_raised { validate!(schema, 'some' => 1) }
+ assert_nothing_raised { validate_simple!(schema, 'some' => 1) }
+
+ assert_failed_validation('at .some expected type int, got string with value "nope"') do
+ validate!(schema, 'some' => 'nope')
+ end
+ assert_nothing_raised { validate_simple!(schema, 'some' => 'nope')}
+
+ assert_failed_validation("at . unexpected key type 'Symbol' in map") do
+ validate!(schema, some: 1)
+ end
+ assert_nothing_raised { validate_simple!(schema, some: 1) }
+
+ assert_failed_validation('at . expected type map, got null') do
+ validate!(schema, nil)
+ end
+ assert_nothing_raised { validate_simple!(schema, nil) }
+ end
+
+ def test_validate_deep_record
+ schema = hash_to_schema(type: 'record',
+ name: 'person',
+ fields: [
+ {
+ name: 'head',
+ type: {
+ name: 'head',
+ type: 'record',
+ fields: [
+ {
+ name: 'hair',
+ type: {
+ name: 'hair',
+ type: 'record',
+ fields: [
+ {
+ name: 'color',
+ type: 'string'
+ }
+ ]
+ }
+ }
+ ]
+ }
+ }
+ ])
+
+ assert_nothing_raised { validate!(schema, 'head' => { 'hair' => { 'color' => 'black' } }) }
+ assert_nothing_raised { validate_simple!(schema, 'head' => { 'hair' => { 'color' => 'black' } }) }
+
+ assert_failed_validation('at .head.hair.color expected type string, got null') do
+ validate!(schema, 'head' => { 'hair' => { 'color' => nil } })
+ end
+ assert_nothing_raised { validate_simple!(schema, 'head' => { 'hair' => { 'color' => nil } }) }
+
+ assert_failed_validation('at .head.hair.color expected type string, got null') do
+ validate!(schema, 'head' => { 'hair' => {} })
+ end
+ assert_nothing_raised { validate_simple!(schema, 'head' => { 'hair' => {} }) }
+
+ assert_failed_validation('at .head.hair expected type record, got null') do
+ validate!(schema, 'head' => {})
+ end
+ assert_nothing_raised { validate_simple!(schema, 'head' => {}) }
+
+ assert_failed_validation('at . expected type record, got null') do
+ validate!(schema, nil)
+ end
+ assert_nothing_raised { validate_simple!(schema, nil) }
+ end
+
+ def test_validate_deep_record_with_array
+ schema = hash_to_schema(type: 'record',
+ name: 'fruits',
+ fields: [
+ {
+ name: 'fruits',
+ type: {
+ name: 'fruits',
+ type: 'array',
+ items: [
+ {
+ name: 'fruit',
+ type: 'record',
+ fields: [
+ { name: 'name', type: 'string' },
+ { name: 'weight', type: 'float' }
+ ]
+ }
+ ]
+ }
+ }
+ ])
+ assert_nothing_raised { validate!(schema, 'fruits' => [{ 'name' => 'apple', 'weight' => 30.2 }]) }
+ assert_nothing_raised { validate_simple!(schema, 'fruits' => [{ 'name' => 'apple', 'weight' => 30.2 }]) }
+
+ assert_failed_validation('at .fruits[0].name expected type string, got null') do
+ validate!(schema, 'fruits' => [{ 'name' => nil, 'weight' => 30.2 }])
+ end
+ assert_nothing_raised { validate_simple!(schema, 'fruits' => [{ 'name' => nil, 'weight' => 30.2 }]) }
+
+ assert_failed_validation('at .fruits expected type array, got int with value 1') do
+ validate!(schema, 'fruits' => 1)
+ end
+ assert_nothing_raised { validate_simple!(schema, 'fruits' => 1) }
+ end
+
+ def test_validate_multiple_errors
+ schema = hash_to_schema(type: 'array',
+ name: 'ages',
+ items: [
+ { type: 'int', name: 'age' }
+ ])
+
+ exception = assert_raise(Avro::SchemaValidator::ValidationError) do
+ validate!(schema, [nil, 'e'])
+ end
+ assert_nothing_raised { validate_simple!(schema, [nil, 'e']) }
+ assert_equal 2, exception.result.errors.size
+ assert_equal(
+ "at .[0] expected type int, got null\nat .[1] expected type int, got string with value \"e\"",
+ exception.to_s
+ )
+ end
+
+ def test_validate_extra_fields
+ schema = hash_to_schema(
+ type: 'record',
+ name: 'fruits',
+ fields: [
+ {
+ name: 'veggies',
+ type: 'string'
+ }
+ ]
+ )
+ exception = assert_raise(Avro::SchemaValidator::ValidationError) do
+ validate!(schema, {'veggies' => 'tomato', 'bread' => 'rye'}, fail_on_extra_fields: true)
+ end
+ assert_equal(1, exception.result.errors.size)
+ assert_equal("at . extra field 'bread' - not in schema",
+ exception.to_s)
+ end
+
+ def test_validate_subrecord_extra_fields
+ schema = hash_to_schema(type: 'record',
+ name: 'top',
+ fields: [
+ {
+ name: 'fruit',
+ type: {
+ name: 'fruit',
+ type: 'record',
+ fields: [{ name: 'name', type: 'string' }]
+ }
+ }
+ ])
+ exception = assert_raise(Avro::SchemaValidator::ValidationError) do
+ validate!(schema, { 'fruit' => { 'name' => 'orange', 'color' => 'orange' } }, fail_on_extra_fields: true)
+ end
+ assert_equal(1, exception.result.errors.size)
+ assert_equal("at .fruit extra field 'color' - not in schema", exception.to_s)
+ end
+
+ def test_validate_array_extra_fields
+ schema = hash_to_schema(type: 'array',
+ items: {
+ name: 'fruit',
+ type: 'record',
+ fields: [{ name: 'name', type: 'string' }]
+ })
+ exception = assert_raise(Avro::SchemaValidator::ValidationError) do
+ validate!(schema, [{ 'name' => 'orange', 'color' => 'orange' }], fail_on_extra_fields: true)
+ end
+ assert_equal(1, exception.result.errors.size)
+ assert_equal("at .[0] extra field 'color' - not in schema", exception.to_s)
+ end
+
+ def test_validate_map_extra_fields
+ schema = hash_to_schema(type: 'map',
+ values: {
+ name: 'fruit',
+ type: 'record',
+ fields: [{ name: 'color', type: 'string' }]
+ })
+ exception = assert_raise(Avro::SchemaValidator::ValidationError) do
+ validate!(schema, { 'apple' => { 'color' => 'green', 'extra' => 1 } }, fail_on_extra_fields: true)
+ end
+ assert_equal(1, exception.result.errors.size)
+ assert_equal("at .apple extra field 'extra' - not in schema", exception.to_s)
+ end
+
+ def test_validate_union_extra_fields
+ schema = hash_to_schema([
+ 'null',
+ {
+ type: 'record',
+ name: 'fruit',
+ fields: [{ name: 'name', type: 'string' }]
+ }
+ ])
+ exception = assert_raise(Avro::SchemaValidator::ValidationError) do
+ validate!(schema, { 'name' => 'apple', 'color' => 'green' }, fail_on_extra_fields: true)
+ end
+ assert_equal(1, exception.result.errors.size)
+ assert_equal("at . extra field 'color' - not in schema", exception.to_s)
+ end
+end
diff --git a/lang/ruby/test/tool.rb b/lang/ruby/test/tool.rb
index 111b9d77..1a1f12ee 100644
--- a/lang/ruby/test/tool.rb
+++ b/lang/ruby/test/tool.rb
@@ -42,7 +42,6 @@ class GenericHandler < WEBrick::HTTPServlet::AbstractServlet
writer = Avro::IPC::FramedWriter.new(StringIO.new)
writer.write_framed_message(unframed_resp)
resp.body = writer.to_s
- @server.stop
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment