mongodb/bson-ruby

View on GitHub
perf/bench_test.rb

Summary

Maintainability
F
3 days
Test Coverage
# rubocop:todo all
# Copyright (C) 2009-2020 MongoDB Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

$:.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
require 'bson'
require 'json'
require 'stringio'
require 'test/unit'
require 'benchmark'
require 'ruby-prof' unless RUBY_PLATFORM =~ /java/

class BenchTest < Test::Unit::TestCase
  RESET = 'reset'
  NON_ZERO_TIME = 0.0000000001 # 10^-10

  def setup
    puts
    @label_width = 30
  end

  def teardown
    puts
  end

  def gc_allocated
    gc_stat = []
    GC.start
    gc_stat << GC.stat
    result = yield
    GC.start
    gc_stat << GC.stat
    [ result, gc_stat[1][:total_allocated_object] - gc_stat[0][:total_allocated_object] ]
  end

  def print_measurement_and_gain(measurement, j)
    h = measurement[j]
    h[:allocated] /= h[:count]
    if j > 0
      h[:base] = measurement[0][:utime]
      h[:gain] = 1.0 - h[:utime] / (h[:base] + NON_ZERO_TIME)
    end
    [
        [ "label: \"%s\"", :label ],
        [ ", allocated: %d", :allocated ],
        [ ", user: %.1f", :utime ],
        [ ", base: %.1f", :base ],
        [ ", gain: %.2f", :gain ]
    ].each do |format, key|
      print (format % h[key]) if h[key]
    end
    puts
  end

  def benchmark_methods_with_gc(count, method_label_pairs)
    measurement = []
    method_label_pairs.each_with_index do |method_label_pair, j|
      meth, label = method_label_pair
      meth.call
      htms, allocated = gc_allocated do
        tms = Benchmark.measure(label) do
          count.times.each_with_index {|i| yield i }
        end
        Hash[*[:label, :utime, :stime, :cutime, :cstime, :real].zip(tms.to_a).flatten]
      end
      htms.merge!({allocated: allocated, count: count})
      measurement << htms
      print_measurement_and_gain(measurement, j)
    end
    reset_method = method_label_pairs.find(method_label_pairs.first){|ml| ml[2] && ml[2] == RESET}.first
    reset_method.call
  end

  # Optimization committed --------------------------------------------------------------------------------------------

  def old_array_index
    BSON.module_eval <<-EVAL
      module Array
        def to_bson(encoded = ''.force_encoding(BINARY), hint = nil)
          encode_with_placeholder_and_null(BSON_ADJUST, encoded) do |encoded|
            each_with_index do |value, index|
              encoded << value.bson_type
              index.to_s.to_bson_key(encoded)
              value.to_bson(encoded)
            end
          end
        end
      end
    EVAL
  end

  def new_array_index_optimize
    BSON.module_eval <<-EVAL
      module Array
        @@_BSON_INDEX_SIZE = 1024
        @@_BSON_INDEX_ARRAY = ::Array.new(@@_BSON_INDEX_SIZE){|i| (i.to_s.force_encoding(BINARY) << NULL_BYTE).freeze}.freeze
        def to_bson(encoded = ''.force_encoding(BINARY), hint = nil)
          encode_with_placeholder_and_null(BSON_ADJUST, encoded) do |encoded|
            each_with_index do |value, index|
              encoded << value.bson_type
              if index < @@_BSON_INDEX_SIZE
                encoded << @@_BSON_INDEX_ARRAY[index]
              else
                index.to_s.to_bson_cstring(encoded)
              end
              value.to_bson(encoded)
            end
          end
        end
      end
    EVAL
  end

  def test_array_index_optimization
    size = 1024
    array = Array.new(size){|i| i}
    method_label_pairs = [
      [ method(:old_array_index),          'Array index optimize none' ],
      [ method(:new_array_index_optimize), 'Array index optimize 1024', RESET ] # Xeon user: 20.3, base: 33.2, gain: 0.39
    ]
    benchmark_methods_with_gc(1_000, method_label_pairs) { array.to_bson }
  end

  def old_encode_bson_with_placeholder
    BSON.module_eval <<-EVAL
      module Encodable
        def encode_with_placeholder_and_null(adjust, encoded = ''.force_encoding(BINARY))
          pos = encoded.bytesize
          encoded << PLACEHOLDER
          yield(encoded)
          encoded << NULL_BYTE
          encoded[pos, 4] = (encoded.bytesize - pos + adjust).to_bson
          encoded
        end
      end
    EVAL
  end

  def new_encode_bson_with_placeholder_to_bson_int32
    BSON.module_eval <<-EVAL
      module Encodable
        def encode_with_placeholder_and_null(adjust, encoded = ''.force_encoding(BINARY))
          pos = encoded.bytesize
          encoded << PLACEHOLDER
          yield(encoded)
          encoded << NULL_BYTE
          encoded[pos, 4] = (encoded.bytesize - pos + adjust).to_bson_int32('')
          encoded
        end
      end
    EVAL
  end

  def new_encode_bson_with_placeholder_set_int32
    BSON.module_eval <<-EVAL
      module Encodable
        def encode_with_placeholder_and_null(adjust, encoded = ''.force_encoding(BINARY))
          pos = encoded.bytesize
          encoded << PLACEHOLDER
          yield(encoded)
          encoded << NULL_BYTE
          encoded.set_int32(pos, encoded.bytesize - pos + adjust)
          encoded
        end
      end
    EVAL
  end

  def test_encode_bson_with_placeholder
    size = 1
    hash = Hash[*(0..size).to_a.collect{|i| [ ('a' + i.to_s), i.to_s]}.flatten]
    method_label_pairs = [
        [ method(:old_encode_bson_with_placeholder),              'Encode bson optimize to_bson' ],
        [ method(:new_encode_bson_with_placeholder_to_bson_int32), 'Encode bson optimize to_bson_int32' ],  # user: 22.2, base: 28.5, gain: 0.22
        [ method(:new_encode_bson_with_placeholder_set_int32),    'Encode bson optimize set_int32', RESET ] # user: 22.2, base: 28.5, gain: 0.22
    ]
    benchmark_methods_with_gc(1_000_000, method_label_pairs) { hash.to_bson }
  end

  def old_integer_to_bson
    BSON.module_eval <<-EVAL
      module Integer
        def to_bson(encoded = ''.force_encoding(BINARY), hint = nil)
          unless bson_int64?
            out_of_range!
          else
            bson_int32? ? to_bson_int32(encoded) : to_bson_int64(encoded)
          end
        end
      end
    EVAL
  end

  def new_integer_to_bson
    BSON.module_eval <<-EVAL
      module Integer
        def to_bson(encoded = ''.force_encoding(BINARY), hint = nil)
          if bson_int32?
            to_bson_int32(encoded)
          elsif bson_int64?
            to_bson_int64(encoded)
          else
            out_of_range!
          end
        end
      end
    EVAL
  end

  def test_integer_to_bson_optimization
    size = 1024
    hash = Hash[*(0..size).to_a.collect{|i| [ ('a' + i.to_s).to_sym, i]}.flatten]
    method_label_pairs = [
      [ method(:old_integer_to_bson), 'Integer to_bson optimize none' ],
      [ method(:new_integer_to_bson), 'Integer to_bson optimize test order', RESET ]
    ]
    benchmark_methods_with_gc(2_000, method_label_pairs) { hash.to_bson }
  end

  def old_nilclass_to_bson
    BSON.module_eval <<-EVAL
      module NilClass
        def to_bson(encoded = ''.force_encoding(BINARY), hint = nil)
          encoded << NO_VALUE
        end
      end
    EVAL
  end

  def new_nilclass_to_bson
    BSON.module_eval <<-EVAL
      module NilClass
        def to_bson(encoded = ''.force_encoding(BINARY), hint = nil)
          encoded
        end
      end
    EVAL
  end

  def test_nilclass_to_bson_optimization
    method_label_pairs = [
      [ method(:old_nilclass_to_bson), 'Nil to_bson optimize none' ],
      [ method(:new_nilclass_to_bson), 'Nil to_bson optimize noop', RESET ] # Core2 user: 4.9, base: 5.7, gain: 0.14
    ]
    benchmark_methods_with_gc(20_000_000, method_label_pairs) { nil.to_bson }
  end

  # C extension -------------------------------------------------------------------------------------------------------

  def benchmark_for_ext(count, label)
    htms, allocated = gc_allocated do
      tms = Benchmark.measure(label) do
        count.times.each_with_index {|i| yield i }
      end
      Hash[*[:label, :utime, :stime, :cutime, :cstime, :real].zip(tms.to_a).flatten]
    end
    htms.merge!({allocated: allocated, count: count})
  end

  #label: "test_ext_rb_string_check_for_illegal_characters!", utime: 19.3, real: 19.7, allocated: 3
  #label: "test_ext_rb_string_check_for_illegal_characters!", utime: 16.3, real: 16.6, allocated: 4
  #gain: 0.15
  def test_ext_rb_string_check_for_illegal_characters!
    p (benchmark_for_ext(10_000_000, __method__) { "Hello World!".to_bson_cstring })
  end

  #label: test_ext_rb_float_to_bson, utime: 15.4, real: 16.1, allocated: 3
  #label: test_ext_rb_float_to_bson, utime: 6.1, real: 6.3, allocated: 1
  #gain: 0.61
  def test_ext_rb_float_to_bson
    p (benchmark_for_ext(10_000_000, __method__) { 3.14159.to_bson })
  end

  #label: "test_ext_rb_time_to_bson", utime: 26.5, real: 26.6, allocated: 6
  #label: "test_ext_rb_time_to_bson", utime: 13.3, real: 13.4, allocated: 4
  #gain: 0.50
  def test_ext_rb_time_to_bson
    t = Time.now
    p (benchmark_for_ext(10_000_000, __method__) { t.to_bson })
  end

  #label: "test_ext_rb_integer_to_bson_key_large", utime: 18.9, real: 19.1, allocated: 1
  #label: "test_ext_rb_integer_to_bson_key_large", utime: 3.7, real: 3.8, allocated: 0
  #gain: 0.80
  def test_ext_rb_integer_to_bson_key_large
    bson = String.new.force_encoding(BSON::BINARY)
    p (benchmark_for_ext(10_000_000, __method__) {|i| i.to_bson_key(bson); bson.clear })
  end

  #label: "test_ext_rb_integer_to_bson_key_small", utime: 33.5, real: 34.2, allocated: 0
  #label: "test_ext_rb_integer_to_bson_key_small", utime: 25.4, real: 25.8, allocated: 0
  #gain: 0.24
  def test_ext_rb_integer_to_bson_key_small
    bson = String.new.force_encoding(BSON::BINARY)
    p (benchmark_for_ext(10_0000_000, __method__) {|i| 1023.to_bson_key(bson); bson.clear })
  end

  #label: "test_ext_rb_symbol_to_bson", utime: 36.5, real: 37.0, allocated: 5
  #label: "test_ext_rb_symbol_to_bson", utime: 24.2, real: 24.3, allocated: 3
  #gain: 0.34
  # rb_symbol_to_bson - no C ext, just benefit from other C ext functions
  def test_ext_rb_symbol_to_bson
    bson = String.new.force_encoding(BSON::BINARY)
    p (benchmark_for_ext(10_000_000, __method__) { :my_symbol.to_bson })
  end

  # Optimization NOT committed ----------------------------------------------------------------------------------------

  # MongoDB driver overrides ------------------------------------------------------------------------------------------

  def old_string_to_bson_key
    BSON.module_eval <<-EVAL
      module String
        def to_bson_key(encoded = ''.force_encoding(BINARY))
          to_bson_cstring(encoded)
        end
      end
    EVAL
  end

  def new_string_to_bson_key_flag
    BSON.module_eval <<-EVAL
      module String
        def to_bson_key(encoded = ''.force_encoding(BINARY))
          nil if encoded.instance_variable_get(:@bson_key_check_skip)
          to_bson_cstring(encoded)
        end
      end
    EVAL
  end

  def new_string_to_bson_key_mongodb
    BSON.module_eval <<-EVAL
      module String
        def to_bson_key(encoded = ''.force_encoding(BINARY))
          check_for_illegal_mongodb_key_characters!(encoded)
          to_bson_cstring(encoded)
        end

        def check_for_illegal_mongodb_key_characters!(encoded)
          unless encoded.instance_variable_get(:@bson_key_check_skip)
            raise "key \#{self.inspect} must not start with '$'" if self[0] == ?$
            raise "key \#{self.inspect} must not contain '.'"   if self.include? ?.
          end
        end
      end
    EVAL
  end

  #label: "string to_bson_key", allocated: 2, user: 18.3
  #label: "string to_bson_key flag check", allocated: 2, user: 19.6, base: 18.3, gain: -0.07
  #label: "string to_bson_key mongodb", allocated: 2, user: 20.6, base: 18.3, gain: -0.12
  def test_string_to_bson_key_mongodb
    encoded = ''
    encoded.instance_variable_set(:@bson_key_check_skip, true)
    method_label_pairs = [
      [ method(:old_string_to_bson_key),         'string to_bson_key', RESET ],
      [ method(:new_string_to_bson_key_flag),    'string to_bson_key flag check' ],
      [ method(:new_string_to_bson_key_mongodb), 'string to_bson_key mongodb' ] # Core2 user: 29.5, base: 29.0, gain: -0.02
    ]
    benchmark_methods_with_gc(10_000_000, method_label_pairs) { 'email_address'.to_bson_key(encoded); encoded.clear }
  end

  # Discarded as not worthy -------------------------------------------------------------------------------------------

  #                                     user     system      total        real
  #test_encode_twitter            289.520000   0.900000 290.420000 (294.547515) to_bson no hint pure
  #allocated: 11563746 allocated/line: 224
  #test_encode_twitter            293.320000   0.910000 294.230000 (298.737329) to_bson hint pure
  #allocated: 11423424 allocated/line: 222

  def old_hash_to_bson_no_hint
    BSON.module_eval <<-EVAL
      module Hash
        def to_bson(encoded = ''.force_encoding(BINARY), hint = nil)
          encode_with_placeholder_and_null(BSON_ADJUST, encoded) do |encoded|
            each do |field, value|
              encoded << value.bson_type
              field.to_bson_key(encoded)
              value.to_bson(encoded)
            end
          end
        end
      end
    EVAL
  end

  def new_hash_to_bson_hint
    BSON.module_eval <<-EVAL
      module Hash
        def to_bson(encoded = ''.force_encoding(BINARY), hint = nil)
          encode_with_placeholder_and_null(BSON_ADJUST, encoded) do |encoded|
            each do |field, value|
              encoded << (bson_type = value.bson_type)
              field.to_bson_key(encoded)
              value.to_bson(encoded, bson_type)
            end
          end
        end
      end
      module Integer
        def to_bson(encoded = ''.force_encoding(BINARY), hint = nil)
          if hint == Int32::BSON_TYPE
            to_bson_int32(encoded)
          elsif hint == Int64::BSON_TYPE
            to_bson_int64(encoded)
          elsif bson_int32?
            to_bson_int32(encoded)
          elsif bson_int64?
            to_bson_int64(encoded)
          else
            out_of_range!
          end
        end
      end
      module String
        def to_bson(encoded = ''.force_encoding(BINARY), hint = nil)
          encode_with_placeholder_and_null(STRING_ADJUST, encoded) do |encoded|
            to_bson_string(encoded)
          end
        end
      end
    EVAL
  end

  def test_hash_integer_to_bson_hint
    size = 1024
    hash = Hash[*(0..size).to_a.collect{|i| [ ('a' + i.to_s), i]}.flatten]
    method_label_pairs = [
        [ method(:old_hash_to_bson_no_hint), 'Hash integer to_bson no hint' ],
        [ method(:new_hash_to_bson_hint),    'Hash integer to_bson hint', RESET ], # Core2 user: 25.1, base: 33.8, gain: 0.26
    ]
    benchmark_methods_with_gc(4_000, method_label_pairs) { hash.to_bson }
  end

  def test_hash_string_to_bson_hint # to check overhead of hint setting and passing
    size = 1024
    hash = Hash[*(0..size).to_a.collect{|i| [ ('a' + i.to_s), i.to_s]}.flatten]
    method_label_pairs = [
        [ method(:old_hash_to_bson_no_hint), 'Hash string to_bson no hint', RESET ],
        [ method(:new_hash_to_bson_hint),    'Hash string to_bson hint' ], # Core2 user: 19.8, base: 19.7, gain: -0.00
    ]
    benchmark_methods_with_gc(4_000, method_label_pairs) { hash.to_bson }
  end

  def old_hash_to_bson
    BSON.module_eval <<-EVAL
      module Hash
        def to_bson(encoded = ''.force_encoding(BINARY), hint = nil)
          encode_with_placeholder_and_null(BSON_ADJUST, encoded) do |encoded|
            each do |field, value|
              encoded << value.bson_type
              field.to_bson_key(encoded)
              value.to_bson(encoded)
            end
          end
        end
      end
    EVAL
  end

  def new_hash_to_bson_v0
    # if-else seems to work better than setting a variable to method
    # pending - mutex
    BSON.module_eval <<-EVAL
      module Hash
        @@_memo_threshold = 65535
        @@_memo_hash = ::Hash.new
        @@_memo_mutex = Mutex.new
        def _memo_set(field)
          @@_memo_mutex.synchronize do
            @@_memo_hash[field] = @@_memo_hash.fetch(field) { yield }
          end
        end
        def _memo_fetch(field)
          @@_memo_mutex.synchronize do
            @@_memo_hash.fetch(field) { yield }
          end
        end
        def to_bson(encoded = ''.force_encoding(BINARY), hint = nil)
          if size < @@_memo_threshold
            encode_with_placeholder_and_null(BSON_ADJUST, encoded) do |encoded|
              each do |field, value|
                encoded << value.bson_type
                encoded << _memo_set(field) { field.to_bson_key }
                value.to_bson(encoded)
              end
            end
          else
            encode_with_placeholder_and_null(BSON_ADJUST, encoded) do |encoded|
              each do |field, value|
                encoded << value.bson_type
                encoded << _memo_fetch(field) { field.to_bson_key }
                value.to_bson(encoded)
              end
            end
          end
        end
      end
    EVAL
  end

  def new_hash_to_bson_v1
    BSON.module_eval <<-EVAL
      module Hash
        @@_memo_hash = ::Hash.new
        def _memo(field)
          @@_memo_hash[field] = @@_memo_hash.fetch(field) { yield }
        end
        def to_bson(encoded = ''.force_encoding(BINARY), hint = nil)
          encode_with_placeholder_and_null(BSON_ADJUST, encoded) do |encoded|
            each do |field, value|
              encoded << value.bson_type
              encoded << _memo(field) { field.to_bson_key }
              value.to_bson(encoded)
            end
          end
        end
      end
    EVAL
  end

  def new_hash_to_bson_integer
    BSON.module_eval <<-EVAL
      module Integer
        def bson_type
          Integer::INT32_TYPE
        end
      end
      module Hash
        def to_bson(encoded = ''.force_encoding(BINARY), hint = nil)
          encode_with_placeholder_and_null(BSON_ADJUST, encoded) do |encoded|
            each do |field, value|
              pos = encoded.bytesize
              encoded << (bson_type = value.bson_type)
              field.to_bson_key(encoded)
              mark = encoded.bytesize
              value.to_bson(encoded)
              encoded[pos] = Integer::INT64_TYPE if bson_type == Integer::INT32_TYPE && encoded.bytesize - mark == 8
            end
          end
        end
      end
    EVAL
  end

  # without extension 0.23 gain, with extension -0.11 gain
  def test_integer_optimization
    size = 1024
    hash = Hash[*(0..size).to_a.collect{|i| [ ('a' + i.to_s).to_sym, i]}.flatten]
    method_label_pairs = [
      [ method(:old_hash_to_bson),         'Integer optimize none', RESET ],
      [ method(:new_hash_to_bson_integer), 'Integer optimize int32' ], # Core2 user: 68.2, base: 88.1, gain: 0.23
    ]
    benchmark_methods_with_gc(4_000, method_label_pairs) { hash.to_bson }
  end

  def test_symbol_key_optimization
    size = 1024
    hash = Hash[*(0..size).to_a.collect{|i| [ ('a' + i.to_s).to_sym, i]}.flatten]
    method_label_pairs = [
      [ method(:old_hash_to_bson),    'Symbol key optimize none', RESET ],
      [ method(:new_hash_to_bson_v0), 'Symbol key optimize hash key v0' ], # Xeon user: 33.4, base: 35.9, gain: 0.07
      [ method(:new_hash_to_bson_v1), 'Symbol key optimize hash key v1' ]  # Xeon user: 26.4, base: 35.9, gain: 0.26
    ]
    benchmark_methods_with_gc(2_000, method_label_pairs) { hash.to_bson }
  end

  def test_string_key_optimization
    size = 1024
    hash = Hash[*(0..size).to_a.collect{|i| [ ('a' + i.to_s), i]}.flatten]
    method_label_pairs = [
        [ method(:old_hash_to_bson),    'Symbol key optimize none', RESET ],
        [ method(:new_hash_to_bson_v0), 'Symbol key optimize hash key v0' ], # Xeon user: 34.5, base: 32.6, gain: -0.06
        [ method(:new_hash_to_bson_v1), 'Symbol key optimize hash key v1' ] # Xeon user: 27.5, base: 32.6, gain: 0.15
    ]
    benchmark_methods_with_gc(2_000, method_label_pairs) { hash.to_bson }
  end

  def old_time_to_bson
    BSON.module_eval <<-EVAL
      module Time
        def to_bson(encoded = ''.force_encoding(BINARY), hint = nil)
          encoded << [ (to_f * 1000.0).to_i ].pack(Int64::PACK)
        end
      end
    EVAL
  end

  def new_time_to_bson
    BSON.module_eval <<-EVAL
      module Time
        def to_bson(encoded = ''.force_encoding(BINARY), hint = nil)
          encoded << [ (sec * 1000 + usec / 1000) ].pack(Int64::PACK)
        end
      end
    EVAL
  end

  def test_time_to_bson_optimization
    t = Time.now
    method_label_pairs = [
      [ method(:old_time_to_bson), 'time to_bson optimize none' ],
      [ method(:new_time_to_bson), 'time to_bson optimize sec usec', RESET ] # Core2 user: 29.5, base: 29.0, gain: -0.02
    ]
    benchmark_methods_with_gc(10_000_000, method_label_pairs) { t.to_bson }
  end

  def old_hash_from_bson
    BSON.module_eval <<-EVAL
      module Hash
        def from_bson(bson)
          hash = new
          bson.read(4) # Swallow the first four bytes.
          while (type = bson.readbyte.chr) != NULL_BYTE
            field = bson.gets(NULL_BYTE).from_bson_string.chop!
            hash[field] = Registry.get(type).from_bson(bson)
          end
          hash
        end
      end
    EVAL
  end

  def new_hash_from_bson
    BSON.module_eval <<-EVAL
      module Hash
        def from_bson(bson)
        hash = new
          bson.seek(4, IO::SEEK_CUR) # Swallow the first four bytes.
          while (type = bson.readbyte.chr) != NULL_BYTE
            field = bson.gets(NULL_BYTE).from_bson_string.chop!
            hash[field] = Registry.get(type).from_bson(bson)
          end
          hash
        end
      end
    EVAL
  end

  #review with just op, without hash overhead, check allocation
  def test_seek
    size = 1
    hash = Hash[*(0..size).to_a.collect{|i| [ ('a' + i.to_s), i.to_s]}.flatten]
    method_label_pairs = [
        [ method(:old_hash_from_bson), 'Encode bson optimize none', RESET ],
        [ method(:new_hash_from_bson), 'Encode bson optimize seek' ] # Xeon user: 28.2, base: 28.3, gain: 0.00
    ]
    benchmark_methods_with_gc(2_000_000, method_label_pairs) { hash.to_bson }
  end

  def old_integer_bson_int32?
    BSON.module_eval <<-EVAL
      module Integer
        def bson_int32?
          (MIN_32BIT <= self) && (self <= MAX_32BIT)
        end
      end
    EVAL
  end

  def new_integer_bson_int32?
    BSON.module_eval <<-EVAL
      module Integer
        @@FIXNUM_HIGHBITS32 = (-1 << 32)
        def bson_int32?
          (self & @@FIXNUM_HIGHBITS32) == 0
        end
      end
    EVAL
  end

  def test_bson_int32?
    method_label_pairs = [
        [ method(:old_integer_bson_int32?),     'Integer#bson_int32? old', RESET ],
        [ method(:new_integer_bson_int32?), 'Integer#bson_int32? new' ] # user: 34.9, base: 21.4, gain: -0.63
    ]
    benchmark_methods_with_gc(100_000_000, method_label_pairs) {|i| i.bson_int32? }
  end

  # Statistics and Ruby-prof profiling --------------------------------------------------------------------------------

  # pure Ruby - Core2
  #utime: 13.92, allocated: 10, label: "BSON::CodeWithScope"
  #utime: 6.12, allocated:  4, label: "Hash"
  #utime: 6.11, allocated:  4, label: "BSON::Document"
  #utime: 4.61, allocated:  5, label: "Regexp"
  #utime: 4.09, allocated:  4, label: "Array"
  #utime: 3.70, allocated:  4, label: "Symbol"
  #utime: 3.50, allocated: 10, label: "Bignum"
  #utime: 3.27, allocated:  3, label: "BSON::Code"
  #utime: 3.21, allocated:  3, label: "String"
  #utime: 2.54, allocated:  5, label: "Time"
  #utime: 2.08, allocated:  2, label: "BSON::Binary"
  #utime: 1.57, allocated:  0, label: "BSON::Timestamp"
  #utime: 1.04, allocated:  2, label: "Float"
  #utime: 0.93, allocated:  0, label: "Fixnum"
  #utime: 0.42, allocated:  0, label: "BSON::ObjectId"
  #utime: 0.29, allocated:  0, label: "TrueClass"
  #utime: 0.29, allocated:  0, label: "FalseClass"
  #utime: 0.19, allocated:  0, label: "BSON::MinKey"
  #utime: 0.19, allocated:  0, label: "BSON::MaxKey"
  #utime: 0.19, allocated:  0, label: "NilClass"
  #utime: 0.19, allocated:  0, label: "BSON::Undefined"
  # with C extension - Core2
  #utime: 6.12, allocated:  6, label: "BSON::CodeWithScope"
  #utime: 3.70, allocated:  7, label: "Regexp"
  #utime: 3.17, allocated:  3, label: "Hash"
  #utime: 3.16, allocated:  3, label: "BSON::Document"
  #utime: 1.89, allocated:  3, label: "Symbol"
  #utime: 1.89, allocated:  2, label: "Array"
  #utime: 1.53, allocated:  2, label: "BSON::Code"
  #utime: 1.47, allocated:  2, label: "String"
  #utime: 1.30, allocated:  3, label: "Time"
  #utime: 0.97, allocated:  0, label: "BSON::Binary"
  #utime: 0.57, allocated:  0, label: "Bignum"
  #utime: 0.38, allocated:  0, label: "BSON::ObjectId"
  #utime: 0.38, allocated:  0, label: "BSON::Timestamp"
  #utime: 0.29, allocated:  0, label: "Fixnum"
  #utime: 0.22, allocated:  0, label: "Float"
  #utime: 0.21, allocated:  0, label: "FalseClass"
  #utime: 0.20, allocated:  0, label: "TrueClass"
  #utime: 0.20, allocated:  0, label: "BSON::MaxKey"
  #utime: 0.20, allocated:  0, label: "BSON::MinKey"
  #utime: 0.20, allocated:  0, label: "BSON::Undefined"
  #utime: 0.19, allocated:  0, label: "NilClass"
  def test_to_bson_object_allocation
    count = 1_000_000
    t = Time.now
    expression = [
      Array[1],
      BSON::Binary.new("xyzzy"),
      BSON::Code.new("new Object;"),
      BSON::CodeWithScope.new("new Object;", {x: 1}),
      BSON::Document['x', 1],
      false,
      3.14159,
      Hash['x', 1],
      2**31 - 1,
      2**63 - 1,
      BSON::MaxKey.new,
      BSON::MinKey.new,
      nil,
      BSON::ObjectId.new,
      /xyzzy/,
      'xyzzy',
      :xyzzy,
      Time.now,
      BSON::Timestamp.new(t.sec, t.usec),
      true,
      BSON::Undefined.new
    ]
    result = expression.collect do |x|
      htms, allocated = gc_allocated do
        tms = Benchmark.measure(x.class.name) do
          encoded = ''.force_encoding(BSON::BINARY)
          count.times { x.to_bson(encoded); encoded.clear }
        end
        Hash[*[:label, :utime, :stime, :cutime, :cstime, :real].zip(tms.to_a).flatten]
      end
      htms.merge!({allocated: allocated, count: count})
    end
    result.sort!{|a,b| b[:utime] <=> a[:utime]}
    result.each do |h|
      puts "utime: #{'%.2f' % h[:utime]}, allocated: #{'%2d' % (h[:allocated]/h[:count])}, label: #{h[:label].inspect}"
    end
  end

  def doc_stats(tally, obj)
    tally[obj.class.name] += 1
    case obj.class.name
      when 'Array'; obj.each {|elem| doc_stats(tally, elem) }
      when 'FalseClass'; return
      when 'Fixnum'; return
      when 'Float'; return
      when 'Hash'; obj.each {|elem| doc_stats(tally, elem) }
      when 'NilClass'; return
      when 'String'; return
      when 'TrueClass'; return
      else p obj.class; exit
    end
  end

  #0.44 String 811731
  #0.35 Array 646586
  #0.07 NilClass 120515
  #0.06 Fixnum 120181
  #0.05 FalseClass 89655
  #0.02 Hash 44144
  #0.01 TrueClass 18245
  #0.00 Float 996
  #objects: 1852053
  #objects/doc: 185
  def test_doc_stats
    json_filename = '../../training/data/sampledata/twitter.json'
    line_limit = 10_000
    twitter = nil
    File.open(json_filename, 'r') do |f|
      twitter = line_limit.times.collect { JSON.parse(f.gets) }
    end
    tally = Hash.new(0)
    doc_stats(tally, twitter)
    obj_count = tally.inject(0){|sum, elem| sum + elem[1]}
    tally = tally.to_a.sort{|a,b| b[1] <=> a[1]}
    tally.each {|a| puts "#{'%.2f' % (a[1].to_f / obj_count.to_f)} #{a[0]} #{a[1]}" }
    puts "objects: #{obj_count}"
    puts "objects/doc: #{obj_count/line_limit}"
  end

  def get_twitter_data(line_limit, bson)
    json_filename = '../../training/data/sampledata/twitter.json'
    File.open(json_filename, 'r') do |f|
      f.readlines[0..line_limit].collect {|line| doc = JSON.parse(line); bson ? StringIO.new(doc.to_bson) : doc }
    end
  end

  def test_encode_twitter
    twitter = get_twitter_data(-1, false)
    allocated = nil
    Benchmark.bm(@label_width) do |bench|
      bench.report(__method__) do
        result, allocated = gc_allocated do
          twitter.each {|doc| doc.to_bson }
        end
      end
    end
    puts "allocated: #{allocated} allocated/line: #{allocated/twitter.size}"
  end

  def test_decode_twitter
    twitter = get_twitter_data(-1, false)
    allocated = nil
    Benchmark.bm(@label_width) do |bench|
      bench.report(__method__) do
        result, allocated = gc_allocated do
          twitter = get_twitter_data(-1, true)
        end
      end
    end
    puts "allocated: #{allocated} allocated/line: #{allocated/twitter.size}"
  end

  def ruby_prof(label, bson, file_name)
    allocated = nil
    line_limit = nil
    profile = nil
    Benchmark.bm(@label_width) do |bench|
      bench.report(label) do
        result, allocated = gc_allocated do
          RubyProf.start
          line_limit = yield
          profile = RubyProf.stop
        end
      end
    end
    puts "allocated: #{allocated} allocated/line: #{allocated/line_limit}"
    File.open(file_name, 'w') do |f|
      RubyProf::FlatPrinter.new(profile).print(f)
      RubyProf::GraphPrinter.new(profile).print(f, {})
    end
  end

  def test_encode_ruby_prof
    twitter = get_twitter_data(10_000, false)
    ruby_prof('test encode ruby prof', false, 'encode-ruby-prof.out') do
      twitter.each {|doc| doc.to_bson }
      twitter.size
    end
  end

  def test_decode_ruby_prof
    twitter = get_twitter_data(10_000, true)
    ruby_prof('test decode ruby prof', true, 'decode-ruby-prof.out') do
      twitter.each {|io| io.rewind; Hash.from_bson(io) }
      twitter.size
    end
  end

end