Created
February 29, 2024 11:39
-
-
Save amirrajan/c6329284929c2b5af4ddffc6fc84b631 to your computer and use it in GitHub Desktop.
Pure Ruby Marshal
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class MarshalWriteBuffer | |
attr_reader :object, :data | |
def initialize(object) | |
@object = object | |
@data = "\x04\b" # '4.8' (version) | |
end | |
def write(current_object = object) | |
case current_object | |
when nil then write_nil | |
when true then write_true | |
when false then write_false | |
when Fixnum | |
append('i') | |
write_fixnum(current_object) | |
when String | |
append('"') | |
write_string(current_object) | |
when Symbol | |
append(':') | |
write_symbol(current_object) | |
when Array | |
append('[') | |
write_array(current_object) | |
when Hash | |
append('{') | |
write_hash(current_object) | |
when Float | |
append('f') | |
write_float(current_object) | |
when Class | |
append('c') | |
write_class(current_object) | |
when Module | |
append('m') | |
write_module(current_object) | |
when Struct | |
append('S') | |
write_struct(current_object) | |
when Object | |
append('o') | |
write_object(current_object) | |
else raise NotImplementedError | |
end | |
data | |
end | |
def append(s) | |
@data += s | |
end | |
def write_nil | |
append('0') | |
end | |
def write_true | |
append('T') | |
end | |
def write_false | |
append('F') | |
end | |
def write_fixnum(n) | |
if n == 0 | |
append n.chr | |
elsif n > 0 && n < 123 | |
append (n + 5).chr | |
elsif n < 0 && n > -124 | |
append (256 + n - 5).chr | |
else | |
count = 0 | |
result = "" | |
4.times do |i| | |
b = n & 255 | |
result += b.chr | |
n >>= 8 | |
count += 1 | |
break if n == 0 || n == -1 | |
end | |
l_byte = n < 0 ? 256 - count : count | |
append(l_byte.chr) | |
append(result) | |
end | |
end | |
def write_string(s) | |
write_fixnum(s.length) | |
append(s) | |
end | |
def write_symbol(sym) | |
write_fixnum(sym.length) | |
append(sym.to_s) | |
end | |
def write_array(a) | |
write_fixnum(a.length) | |
a.each { |item| write(item) } | |
end | |
def write_hash(hash) | |
write_fixnum(hash.length) | |
hash.each do |k, v| | |
write(k) | |
write(v) | |
end | |
end | |
def write_float(f) | |
write_string(f.to_s) | |
end | |
def write_class(klass) | |
write_string(klass.name) | |
end | |
def write_module(mod) | |
write_string(mod.name) | |
end | |
def write_struct(struct) | |
write(struct.class.name.to_sym) | |
hash = struct.members.zip(struct.values) | |
write_hash(hash) | |
end | |
def write_object(object) | |
write(object.class.name.to_sym) | |
ivar_data = object.instance_variables.map do |ivar_name| | |
[ivar_name, object.instance_variable_get(ivar_name)] | |
end | |
ivar_data = Hash[ivar_data] | |
write_hash(ivar_data) | |
end | |
end | |
class MarshalReadBuffer | |
attr_reader :data, :major_version, :minor_version | |
def initialize(data) | |
@data = data.split('') | |
@major_version = read_byte | |
@minor_version = read_byte | |
@symbols_cache = [] | |
@objects_cache = [] | |
end | |
def read_byte | |
read_char.ord | |
end | |
def read_char | |
data.shift | |
end | |
def read | |
char = read_char | |
case char | |
when '0' then nil | |
when 'T' then true | |
when 'F' then false | |
when 'i' then read_integer | |
when ':' then read_symbol | |
when '"' then read_string | |
when 'I' then read | |
when '[' then read_array | |
when '{' then read_hash | |
when 'f' then read_float | |
when 'c' then read_class | |
when 'm' then read_module | |
when 'S' then read_struct | |
when 'o' then read_object | |
when 'C' then read_userclass | |
when 'e' then read_extended_object | |
when ';' then read_symbol_link | |
when '@' then read_object_link | |
else | |
raise NotImplementedError, "Unknown object type #{char}" | |
end | |
end | |
def read_integer | |
# c is our first byte | |
c = (read_byte ^ 128) - 128 | |
case c | |
when 0 | |
# 0 means 0 | |
0 | |
when (4..127) | |
# case for small positive numbers | |
c - 5 | |
when (1..3) | |
# c next bytes is our big positive number | |
c. | |
times. | |
map { |i| [i, read_byte] }. | |
inject(0) { |result, (i, byte)| result | (byte << (8*i)) } | |
when (-128..-6) | |
# case for small negative numbers | |
c + 5 | |
when (-5..-1) | |
# (-c) next bytes is our number | |
(-c). | |
times. | |
map { |i| [i, read_byte] }. | |
inject(-1) do |result, (i, byte)| | |
a = ~(0xff << (8*i)) | |
b = byte << (8*i) | |
(result & a) | b | |
end | |
end | |
end | |
def cache_object(&block) | |
object = block.call | |
@objects_cache << object | |
object | |
end | |
def read_symbol | |
symbol = read_integer.times.map { read_char }.join.to_sym | |
@symbols_cache << symbol | |
symbol | |
end | |
def read_string(cache: true) | |
string = read_integer.times.map { read_char }.join | |
@objects_cache << string if cache | |
string | |
end | |
def read_array | |
cache_object { | |
read_integer.times.map { read } | |
} | |
end | |
def read_hash(cache: true) | |
pairs = read_integer.times.map { [read, read] } | |
hash = Hash[pairs] | |
@objects_cache << hash if cache | |
hash | |
end | |
def read_float | |
cache_object { | |
read_string(cache: false).to_f | |
} | |
end | |
def marshal_const_get(const_name) | |
Object.const_get(const_name) | |
rescue NameError | |
raise ArgumentError, "undefined class/module #{const_name}" | |
end | |
def read_class | |
cache_object { | |
const_name = read_string | |
klass = marshal_const_get(const_name) | |
unless klass.instance_of?(Class) | |
raise ArgumentError, "#{const_name} does not refer to a Class" | |
end | |
klass | |
} | |
end | |
def read_module | |
cache_object { | |
const_name = read_string | |
klass = marshal_const_get(const_name) | |
unless klass.instance_of?(Module) | |
raise ArgumentError, "#{const_name} does not refer to a Module" | |
end | |
klass | |
} | |
end | |
def read_struct | |
cache_object { | |
klass = marshal_const_get(read) | |
attributes = read_hash(cache: false) | |
values = attributes.values_at(*klass.members) | |
klass.new(*values) | |
} | |
end | |
def read_object | |
cache_object { | |
klass = marshal_const_get(read) | |
ivars_data = read_hash(cache: false) | |
object = klass.allocate | |
ivars_data.each do |ivar_name, value| | |
object.instance_variable_set(ivar_name, value) | |
end | |
object | |
} | |
end | |
def read_userclass | |
cache_object { | |
klass = marshal_const_get(read) | |
data = read | |
klass.new(data) | |
} | |
end | |
def read_extended_object | |
cache_object { | |
mod = marshal_const_get(read) | |
object = read | |
object.extend(mod) | |
} | |
end | |
def read_symbol_link | |
@symbols_cache[read_integer] | |
end | |
def read_object_link | |
@objects_cache[read_integer] | |
end | |
end | |
class Marshal | |
def self.dump o | |
MarshalWriteBuffer.new(o).write | |
end | |
def self.load o | |
MarshalReadBuffer.new(o).read | |
end | |
end | |
a = { x: 100, y: 100 } | |
b = { w: 10, y: 10 } | |
a[:b] = b | |
b[:a] = a | |
m = Marshal.dump a | |
h_1 = Marshal.load m | |
puts m | |
puts h_1 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment