opal/corelib/marshal/read_buffer.rb
# backtick_javascript: true
# https://github.com/ruby/ruby/blob/trunk/doc/marshal.rdoc
# https://github.com/ruby/ruby/blob/trunk/marshal.c
module ::Marshal
class self::ReadBuffer
%x{
function stringToBytes(string) {
var i,
singleByte,
l = string.length,
result = [];
for (i = 0; i < l; i++) {
singleByte = string.charCodeAt(i);
result.push(singleByte);
}
return result;
}
}
attr_reader :version, :buffer, :index, :object_cache, :symbols_cache
def initialize(input)
@buffer = `stringToBytes(#{input.to_s})`
@index = 0
major = read_byte
minor = read_byte
if major != MAJOR_VERSION || minor != MINOR_VERSION
::Kernel.raise ::TypeError, "incompatible marshal file format (can't be read)"
end
@version = "#{major}.#{minor}"
@object_cache = []
@symbols_cache = []
@ivars = []
end
def length
@buffer.length
end
def read(cache: true)
code = read_char
# The first character indicates the type of the object
case code
when '0'
nil
when 'T'
true
when 'F'
false
when 'i'
read_fixnum
when 'f'
read_float
when 'l'
read_bignum
when '"'
read_string
when ':'
read_symbol
when ';'
read_cached_symbol
when '['
read_array
when '{'
read_hash
when '}'
read_hashdef
when '/'
read_regexp
when 'S'
read_struct
when 'c'
read_class
when 'm'
read_module
when 'o'
read_object
when '@'
read_cached_object
when 'e'
read_extended_object
when 'I'
read_primitive_with_ivars
when 'C'
read_user_class
when 'u'
read_user_defined
when 'U'
read_user_marshal
when 'M'
::Kernel.raise ::NotImplementedError, 'ModuleOld type cannot be demarshaled yet' # read_module_old
when 'd'
::Kernel.raise ::NotImplementedError, 'Data type cannot be demarshaled'
else
::Kernel.raise ::ArgumentError, 'dump format error'
end
end
def read_byte
if @index >= length
::Kernel.raise ::ArgumentError, 'marshal data too short'
end
result = @buffer[@index]
@index += 1
result
end
def read_char
`String.fromCharCode(#{read_byte})`
end
# Reads and returns a fixnum from an input stream
#
def read_fixnum
%x{
var x, i, c = (#{read_byte} ^ 128) - 128;
if (c === 0) {
return 0;
}
if (c > 0) {
if (4 < c && c < 128) {
return c - 5;
}
x = 0;
for (i = 0; i < c; i++) {
x |= (#{read_byte} << (8*i));
}
} else {
if (-129 < c && c < -4) {
return c + 5;
}
c = -c;
x = -1;
for (i = 0; i < c; i++) {
x &= ~(0xff << (8*i));
x |= (#{read_byte} << (8*i));
}
}
return x;
}
end
# Reads and returns Float from an input stream
#
# @example
# 123.456
# Is encoded as
# 'f', '123.456'
#
def read_float
s = read_string(cache: false)
result = if s == 'nan'
0.0 / 0
elsif s == 'inf'
1.0 / 0
elsif s == '-inf'
-1.0 / 0
else
s.to_f
end
@object_cache << result
result
end
# Reads and returns Bignum from an input stream
#
def read_bignum
sign = read_char == '-' ? -1 : 1
size = read_fixnum * 2
result = 0
(0...size).each do |exp|
result += read_char.ord * 2**(exp * 8)
end
result = result.to_i * sign
@object_cache << result
result
end
# Reads and returns a string from an input stream
# Sometimes string shouldn't be cached using
# an internal object cache, for a:
# + class/module name
# + string representation of float
# + string representation of regexp
#
def read_string(cache: true)
length = read_fixnum
%x{
var i, result = '';
for (i = 0; i < length; i++) {
result += #{read_char};
}
if (cache) {
self.object_cache.push(result);
}
return result;
}
end
# Reads and returns a symbol from an input stream
#
def read_symbol
length = read_fixnum
%x{
var i, result = '';
for (i = 0; i < length; i++) {
result += #{read_char};
}
self.symbols_cache.push(result);
return result;
}
end
# Reads a symbol that was previously cache by its link
#
# @example
# [:a, :a, :b, :b, :c, :c]
# Is encoded as
# '[', 6, :a, @0, :b, @1, :c, @2
#
def read_cached_symbol
symbols_cache[read_fixnum]
end
# Reads and returns an array from an input stream
#
# @example
# [100, 200, 300]
# is encoded as
# '[', 3, 100, 200, 300
#
def read_array
result = []
@object_cache << result
length = read_fixnum
%x{
if (length > 0) {
while (result.length < length) {
result.push(#{read});
}
}
return result;
}
end
# Reads and returns a hash from an input stream
# Sometimes hash shouldn't be cached using
# an internal object cache, for a:
# + hash of instance variables
# + hash of struct attributes
#
# @example
# {100 => 200, 300 => 400}
# is encoded as
# '{', 2, 100, 200, 300, 400
#
def read_hash(cache: true)
result = {}
if cache
@object_cache << result
end
length = read_fixnum
%x{
if (length > 0) {
var key, value, i;
for (i = 0; i < #{length}; i++) {
key = #{read};
value = #{read};
#{result[`key`] = `value`};
}
}
return result;
}
end
# Reads and returns a hash with default value
#
# @example
# Hash.new(:default).merge(100 => 200)
# is encoded as
# '}', 1, 100, 200, :default
#
def read_hashdef
hash = read_hash
default_value = read
hash.default = default_value
hash
end
# Reads and returns Regexp from an input stream
#
# @example
# r = /regexp/mix
# is encoded as
# '/', 'regexp', r.options.chr
#
def read_regexp
string = read_string(cache: false)
options = read_byte
result = ::Regexp.new(string, options)
@object_cache << result
result
end
# Reads and returns a Struct from an input stream
#
# @example
# Point = Struct.new(:x, :y)
# Point.new(100, 200)
# is encoded as
# 'S', :Point, {:x => 100, :y => 200}
#
def read_struct
klass_name = read(cache: false)
klass = safe_const_get(klass_name)
attributes = read_hash(cache: false)
args = attributes.values_at(*klass.members)
result = klass.new(*args)
@object_cache << result
result
end
# Reads and returns a Class from an input stream
#
# @example
# String
# is encoded as
# 'c', 'String'
#
def read_class
klass_name = read_string(cache: false)
result = safe_const_get(klass_name)
unless result.class == ::Class
::Kernel.raise ::ArgumentError, "#{klass_name} does not refer to a Class"
end
@object_cache << result
result
end
# Reads and returns a Module from an input stream
#
# @example
# Kernel
# is encoded as
# 'm', 'Kernel'
#
def read_module
mod_name = read_string(cache: false)
result = safe_const_get(mod_name)
unless result.class == ::Module
::Kernel.raise ::ArgumentError, "#{mod_name} does not refer to a Module"
end
@object_cache << result
result
end
# Reads and returns an abstract object from an input stream
#
# @example
# obj = Object.new
# obj.instance_variable_set(:@ivar, 100)
# obj
# is encoded as
# 'o', :Object, {:@ivar => 100}
#
# The only exception is a Range class (and its subclasses)
# For some reason in MRI isntances of this class have instance variables
# - begin
# - end
# - excl
# without '@' perfix.
#
def read_object
klass_name = read(cache: false)
klass = safe_const_get(klass_name)
object = klass.allocate
@object_cache << object
ivars = read_hash(cache: false)
ivars.each do |name, value|
if name[0] == '@'
object.instance_variable_set(name, value)
else
# MRI allows an object to have ivars that do not start from '@'
# https://github.com/ruby/ruby/blob/ab3a40c1031ff3a0535f6bcf26de40de37dbb1db/range.c#L1225
`object[name] = value`
end
end
object
end
# Reads an object that was cached previously by its link
#
# @example
# obj1 = Object.new
# obj2 = Object.new
# obj3 = Object.new
# [obj1, obj1, obj2, obj2, obj3, obj3]
# is encoded as
# [obj1, @1, obj2, @2, obj3, @3]
#
# NOTE: array itself is cached as @0, that's why obj1 is cached a @1, obj2 is @2, etc.
#
def read_cached_object
object_cache[read_fixnum]
end
# Reads an object that was dynamically extended before marshaling like
#
# @example
# M1 = Module.new
# M2 = Module.new
# obj = Object.new
# obj.extend(M1)
# obj.extend(M2)
# obj
# is encoded as
# 'e', :M2, :M1, obj
#
def read_extended_object
mod = safe_const_get(read)
object = read
object.extend(mod)
object
end
# Reads a primitive object with instance variables
# (classes that have their own marshalling rules, like Array/Hash/Regexp/etc)
#
# @example
# arr = [100, 200, 300]
# arr.instance_variable_set(:@ivar, :value)
# arr
# is encoded as
# 'I', [100, 200, 300], {:@ivar => value}
#
def read_primitive_with_ivars
object = read
primitive_ivars = read_hash(cache: false)
if primitive_ivars.any? && object.is_a?(String)
object = `new String(object)`
end
primitive_ivars.each do |name, value|
if name != 'E'
object.instance_variable_set(name, value)
end
end
object
end
# Reads and User Class (instance of String/Regexp/Array/Hash subclass)
#
# @example
# UserArray = Class.new(Array)
# UserArray[100, 200, 300]
# is encoded as
# 'C', :UserArray, [100, 200, 300]
#
def read_user_class
klass_name = read(cache: false)
klass = safe_const_get(klass_name)
value = read(cache: false)
result = if klass < Hash
klass[value]
else
klass.new(value)
end
@object_cache << result
result
end
# Reads a 'User Defined' object that has '_dump/self._load' methods
#
# @example
# class UserDefined
# def _dump(level)
# '_dumped'
# end
# end
#
# UserDefined.new
# is encoded as
# 'u', :UserDefined, '_dumped'
#
# To load it back UserDefined._load' must be used.
#
def read_user_defined
klass_name = read(cache: false)
klass = safe_const_get(klass_name)
data = read_string(cache: false)
result = klass._load(data)
@object_cache << result
result
end
# Reads a 'User Marshal' object that has 'marshal_dump/marshal_load' methods
#
# @example
# class UserMarshal < Struct.new(:a, :b)
# def marshal_dump
# [a, b]
# end
#
# def marshal_load(data)
# self.a, self.b = data
# end
# end
#
# UserMarshal.new(100, 200)
# is encoded as
# 'U', :UserMarshal, [100, 200]
#
# To load it back `UserMarshal.allocate` and `UserMarshal#marshal_load` must be called
#
def read_user_marshal
klass_name = read(cache: false)
klass = safe_const_get(klass_name)
result = klass.allocate
@object_cache << result
data = read(cache: false)
result.marshal_load(data)
result
end
# Returns a constant by passed const_name,
# re-raises Marshal-specific error when it's missing
#
def safe_const_get(const_name)
::Object.const_get(const_name)
rescue ::NameError
::Kernel.raise ::ArgumentError, "undefined class/module #{const_name}"
end
end
end