opal/corelib/marshal/read_buffer.rb

Summary

Maintainability
A
2 hrs
Test Coverage
# backtick_javascript: true

# https://github.com/ruby/ruby/blob/trunk/doc/marshal.rdoc
# https://github.com/ruby/ruby/blob/trunk/marshal.c

module ::Marshal
  class self::ReadBuffer
    %x{
      function stringToBytes(string) {
        var i,
            singleByte,
            l = string.length,
            result = [];

        for (i = 0; i < l; i++) {
          singleByte = string.charCodeAt(i);
          result.push(singleByte);
        }
        return result;
      }
    }

    attr_reader :version, :buffer, :index, :object_cache, :symbols_cache

    def initialize(input)
      @buffer = `stringToBytes(#{input.to_s})`
      @index = 0
      major = read_byte
      minor = read_byte
      if major != MAJOR_VERSION || minor != MINOR_VERSION
        ::Kernel.raise ::TypeError, "incompatible marshal file format (can't be read)"
      end
      @version = "#{major}.#{minor}"
      @object_cache = []
      @symbols_cache = []
      @ivars = []
    end

    def length
      @buffer.length
    end

    def read(cache: true)
      code = read_char
      # The first character indicates the type of the object
      case code
      when '0'
        nil
      when 'T'
        true
      when 'F'
        false
      when 'i'
        read_fixnum
      when 'f'
        read_float
      when 'l'
        read_bignum
      when '"'
        read_string
      when ':'
        read_symbol
      when ';'
        read_cached_symbol
      when '['
        read_array
      when '{'
        read_hash
      when '}'
        read_hashdef
      when '/'
        read_regexp
      when 'S'
        read_struct
      when 'c'
        read_class
      when 'm'
        read_module
      when 'o'
        read_object
      when '@'
        read_cached_object
      when 'e'
        read_extended_object
      when 'I'
        read_primitive_with_ivars
      when 'C'
        read_user_class
      when 'u'
        read_user_defined
      when 'U'
        read_user_marshal
      when 'M'
        ::Kernel.raise ::NotImplementedError, 'ModuleOld type cannot be demarshaled yet' # read_module_old
      when 'd'
        ::Kernel.raise ::NotImplementedError, 'Data type cannot be demarshaled'
      else
        ::Kernel.raise ::ArgumentError, 'dump format error'
      end
    end

    def read_byte
      if @index >= length
        ::Kernel.raise ::ArgumentError, 'marshal data too short'
      end
      result = @buffer[@index]
      @index += 1
      result
    end

    def read_char
      `String.fromCharCode(#{read_byte})`
    end

    # Reads and returns a fixnum from an input stream
    #
    def read_fixnum
      %x{
        var x, i, c = (#{read_byte} ^ 128) - 128;
        if (c === 0) {
          return 0;
        }

        if (c > 0) {
          if (4 < c && c < 128) {
            return c - 5;
          }
          x = 0;
          for (i = 0; i < c; i++) {
            x |= (#{read_byte} << (8*i));
          }
        } else {
          if (-129 < c && c < -4) {
            return c + 5;
          }

          c = -c;
          x = -1;

          for (i = 0; i < c; i++) {
            x &= ~(0xff << (8*i));
            x |= (#{read_byte} << (8*i));
          }
        }

        return x;
      }
    end

    # Reads and returns Float from an input stream
    #
    # @example
    #   123.456
    # Is encoded as
    #   'f', '123.456'
    #
    def read_float
      s = read_string(cache: false)
      result = if s == 'nan'
                 0.0 / 0
               elsif s == 'inf'
                 1.0 / 0
               elsif s == '-inf'
                 -1.0 / 0
               else
                 s.to_f
               end
      @object_cache << result
      result
    end

    # Reads and returns Bignum from an input stream
    #
    def read_bignum
      sign = read_char == '-' ? -1 : 1
      size = read_fixnum * 2
      result = 0
      (0...size).each do |exp|
        result += read_char.ord * 2**(exp * 8)
      end
      result = result.to_i * sign
      @object_cache << result
      result
    end

    # Reads and returns a string from an input stream
    # Sometimes string shouldn't be cached using
    # an internal object cache, for a:
    #  + class/module name
    #  + string representation of float
    #  + string representation of regexp
    #
    def read_string(cache: true)
      length = read_fixnum
      %x{
        var i, result = '';

        for (i = 0; i < length; i++) {
          result += #{read_char};
        }

        if (cache) {
          self.object_cache.push(result);
        }

        return result;
      }
    end

    # Reads and returns a symbol from an input stream
    #
    def read_symbol
      length = read_fixnum
      %x{
        var i, result = '';

        for (i = 0; i < length; i++) {
          result += #{read_char};
        }

        self.symbols_cache.push(result);

        return result;
      }
    end

    # Reads a symbol that was previously cache by its link
    #
    # @example
    #   [:a, :a, :b, :b, :c, :c]
    # Is encoded as
    #   '[', 6, :a, @0, :b, @1, :c, @2
    #
    def read_cached_symbol
      symbols_cache[read_fixnum]
    end

    # Reads and returns an array from an input stream
    #
    # @example
    #   [100, 200, 300]
    # is encoded as
    #   '[', 3, 100, 200, 300
    #
    def read_array
      result = []
      @object_cache << result
      length = read_fixnum
      %x{
        if (length > 0) {
          while (result.length < length) {
            result.push(#{read});
          }
        }

        return result;
      }
    end

    # Reads and returns a hash from an input stream
    # Sometimes hash shouldn't  be cached using
    # an internal object cache, for a:
    #  + hash of instance variables
    #  + hash of struct attributes
    #
    # @example
    #   {100 => 200, 300 => 400}
    # is encoded as
    #   '{', 2, 100, 200, 300, 400
    #
    def read_hash(cache: true)
      result = {}

      if cache
        @object_cache << result
      end

      length = read_fixnum
      %x{
        if (length > 0) {
          var key, value, i;
          for (i = 0; i < #{length}; i++) {
            key = #{read};
            value = #{read};
            #{result[`key`] = `value`};
          }
        }
        return result;
      }
    end

    # Reads and returns a hash with default value
    #
    # @example
    #   Hash.new(:default).merge(100 => 200)
    # is encoded as
    #   '}', 1, 100, 200, :default
    #
    def read_hashdef
      hash = read_hash
      default_value = read
      hash.default = default_value
      hash
    end

    # Reads and returns Regexp from an input stream
    #
    # @example
    #   r = /regexp/mix
    # is encoded as
    #   '/', 'regexp', r.options.chr
    #
    def read_regexp
      string = read_string(cache: false)
      options = read_byte

      result = ::Regexp.new(string, options)
      @object_cache << result
      result
    end

    # Reads and returns a Struct from an input stream
    #
    # @example
    #   Point = Struct.new(:x, :y)
    #   Point.new(100, 200)
    # is encoded as
    #   'S', :Point, {:x => 100, :y => 200}
    #
    def read_struct
      klass_name = read(cache: false)
      klass = safe_const_get(klass_name)
      attributes = read_hash(cache: false)
      args = attributes.values_at(*klass.members)
      result = klass.new(*args)
      @object_cache << result
      result
    end

    # Reads and returns a Class from an input stream
    #
    # @example
    #   String
    # is encoded as
    #   'c', 'String'
    #
    def read_class
      klass_name = read_string(cache: false)
      result = safe_const_get(klass_name)
      unless result.class == ::Class
        ::Kernel.raise ::ArgumentError, "#{klass_name} does not refer to a Class"
      end
      @object_cache << result
      result
    end

    # Reads and returns a Module from an input stream
    #
    # @example
    #   Kernel
    # is encoded as
    #   'm', 'Kernel'
    #
    def read_module
      mod_name = read_string(cache: false)
      result = safe_const_get(mod_name)
      unless result.class == ::Module
        ::Kernel.raise ::ArgumentError, "#{mod_name} does not refer to a Module"
      end
      @object_cache << result
      result
    end

    # Reads and returns an abstract object from an input stream
    #
    # @example
    #   obj = Object.new
    #   obj.instance_variable_set(:@ivar, 100)
    #   obj
    # is encoded as
    #   'o', :Object, {:@ivar => 100}
    #
    # The only exception is a Range class (and its subclasses)
    # For some reason in MRI isntances of this class have instance variables
    # - begin
    # - end
    # - excl
    # without '@' perfix.
    #
    def read_object
      klass_name = read(cache: false)
      klass = safe_const_get(klass_name)

      object = klass.allocate
      @object_cache << object

      ivars = read_hash(cache: false)
      ivars.each do |name, value|
        if name[0] == '@'
          object.instance_variable_set(name, value)
        else
          # MRI allows an object to have ivars that do not start from '@'
          # https://github.com/ruby/ruby/blob/ab3a40c1031ff3a0535f6bcf26de40de37dbb1db/range.c#L1225
          `object[name] = value`
        end
      end

      object
    end

    # Reads an object that was cached previously by its link
    #
    # @example
    #   obj1 = Object.new
    #   obj2 = Object.new
    #   obj3 = Object.new
    #   [obj1, obj1, obj2, obj2, obj3, obj3]
    # is encoded as
    #   [obj1, @1, obj2, @2, obj3, @3]
    #
    # NOTE: array itself is cached as @0, that's why obj1 is cached a @1, obj2 is @2, etc.
    #
    def read_cached_object
      object_cache[read_fixnum]
    end

    # Reads an object that was dynamically extended before marshaling like
    #
    # @example
    #   M1 = Module.new
    #   M2 = Module.new
    #   obj = Object.new
    #   obj.extend(M1)
    #   obj.extend(M2)
    #   obj
    # is encoded as
    #   'e', :M2, :M1, obj
    #
    def read_extended_object
      mod = safe_const_get(read)
      object = read
      object.extend(mod)
      object
    end

    # Reads a primitive object with instance variables
    # (classes that have their own marshalling rules, like Array/Hash/Regexp/etc)
    #
    # @example
    #   arr = [100, 200, 300]
    #   arr.instance_variable_set(:@ivar, :value)
    #   arr
    # is encoded as
    #   'I', [100, 200, 300], {:@ivar => value}
    #
    def read_primitive_with_ivars
      object = read

      primitive_ivars = read_hash(cache: false)

      if primitive_ivars.any? && object.is_a?(String)
        object = `new String(object)`
      end

      primitive_ivars.each do |name, value|
        if name != 'E'
          object.instance_variable_set(name, value)
        end
      end

      object
    end

    # Reads and User Class (instance of String/Regexp/Array/Hash subclass)
    #
    # @example
    #   UserArray = Class.new(Array)
    #   UserArray[100, 200, 300]
    # is encoded as
    #   'C', :UserArray, [100, 200, 300]
    #
    def read_user_class
      klass_name = read(cache: false)
      klass = safe_const_get(klass_name)
      value = read(cache: false)

      result = if klass < Hash
                 klass[value]
               else
                 klass.new(value)
               end

      @object_cache << result

      result
    end

    # Reads a 'User Defined' object that has '_dump/self._load' methods
    #
    # @example
    #   class UserDefined
    #     def _dump(level)
    #       '_dumped'
    #     end
    #   end
    #
    #   UserDefined.new
    # is encoded as
    #   'u', :UserDefined, '_dumped'
    #
    # To load it back UserDefined._load' must be used.
    #
    def read_user_defined
      klass_name = read(cache: false)
      klass = safe_const_get(klass_name)
      data = read_string(cache: false)
      result = klass._load(data)

      @object_cache << result

      result
    end

    # Reads a 'User Marshal' object that has 'marshal_dump/marshal_load' methods
    #
    # @example
    #   class UserMarshal < Struct.new(:a, :b)
    #     def marshal_dump
    #       [a, b]
    #     end
    #
    #     def marshal_load(data)
    #       self.a, self.b = data
    #     end
    #   end
    #
    #   UserMarshal.new(100, 200)
    # is encoded as
    #   'U', :UserMarshal, [100, 200]
    #
    # To load it back `UserMarshal.allocate` and `UserMarshal#marshal_load` must be called
    #
    def read_user_marshal
      klass_name = read(cache: false)
      klass = safe_const_get(klass_name)

      result = klass.allocate
      @object_cache << result

      data = read(cache: false)
      result.marshal_load(data)
      result
    end

    # Returns a constant by passed const_name,
    #  re-raises Marshal-specific error when it's missing
    #
    def safe_const_get(const_name)
      ::Object.const_get(const_name)
    rescue ::NameError
      ::Kernel.raise ::ArgumentError, "undefined class/module #{const_name}"
    end
  end
end