lib/bson/binary.rb
# frozen_string_literal: true
# Copyright (C) 2009-2020 MongoDB Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
require 'base64'
module BSON
# Represents binary data.
#
# @see http://bsonspec.org/#/specification
#
# @since 2.0.0
class Binary
include JSON
# A binary is type 0x05 in the BSON spec.
#
# @since 2.0.0
BSON_TYPE = ::String.new(5.chr, encoding: BINARY).freeze
# The mappings of subtypes to their single byte identifiers.
#
# @note subtype 6 (ciphertext) is used for the Client-Side Encryption
# feature. Data represented by this subtype is often encrypted, but
# may also be plaintext. All instances of this subtype necessary for
# Client-Side Encryption will be created internally by the Ruby driver.
# An application should not create new BSON::Binary objects of this subtype.
#
# @since 2.0.0
SUBTYPES = {
generic: 0.chr,
function: 1.chr,
old: 2.chr,
uuid_old: 3.chr,
uuid: 4.chr,
md5: 5.chr,
ciphertext: 6.chr,
column: 7.chr,
sensitive: 8.chr,
user: 128.chr,
}.freeze
# The starting point of the user-defined subtype range.
USER_SUBTYPE = 0x80
# The mappings of single byte subtypes to their symbol counterparts.
#
# @since 2.0.0
TYPES = SUBTYPES.invert.freeze
# @return [ String ] The raw binary data.
#
# The string is always stored in BINARY encoding.
#
# @since 2.0.0
attr_reader :data
# @return [ Symbol ] The binary type.
attr_reader :type
# @return [ String ] The raw type value, as an encoded integer.
attr_reader :raw_type
# Determine if this binary object is equal to another object.
#
# @example Check the binary equality.
# binary == other
#
# @param [ Object ] other The object to compare against.
#
# @return [ true, false ] If the objects are equal.
#
# @since 2.0.0
def ==(other)
return false unless other.is_a?(Binary)
type == other.type && data == other.data
end
alias eql? ==
# Generates a Fixnum hash value for this object.
#
# Allows using Binary as hash keys.
#
# @return [ Fixnum ]
#
# @since 2.3.1
def hash
[ data, type ].hash
end
# Return a representation of the object for use in
# application-level JSON serialization. Since BSON::Binary
# is used exclusively in BSON-related contexts, this
# method returns the canonical Extended JSON representation.
#
# @return [ Hash ] The extended json representation.
def as_json(*_args)
as_extended_json
end
# Converts this object to a representation directly serializable to
# Extended JSON (https://github.com/mongodb/specifications/blob/master/source/extended-json.rst).
#
# @option opts [ nil | :relaxed | :legacy ] :mode Serialization mode
# (default is canonical extended JSON)
#
# @return [ Hash ] The extended json representation.
def as_extended_json(**options)
subtype = @raw_type.each_byte.map { |c| c.to_s(16) }.join
subtype = "0#{subtype}" if subtype.length == 1
value = Base64.encode64(data).strip
if options[:mode] == :legacy
{ '$binary' => value, '$type' => subtype }
else
{ '$binary' => { 'base64' => value, 'subType' => subtype } }
end
end
# Instantiate the new binary object.
#
# This method accepts a string in any encoding; however, if a string is
# of a non-BINARY encoding, the encoding is set to BINARY. This does not
# change the bytes of the string but it means that applications referencing
# the data of a Binary instance cannot assume it is in a non-binary
# encoding, even if the string given to the constructor was in such an
# encoding.
#
# @example Instantiate a binary.
# BSON::Binary.new(data, :md5)
#
# @param [ String ] data The raw binary data.
# @param [ Symbol ] type The binary type.
#
# @since 2.0.0
def initialize(data = '', type = :generic)
initialize_instance(data, type)
end
# For legacy deserialization support where BSON::Binary objects are
# expected to have a specific internal representation (with only
# @type and @data instance variables).
#
# @api private
def init_with(coder)
initialize_instance(coder['data'], coder['type'])
end
# Get a nice string for use with object inspection.
#
# @example Inspect the binary.
# object_id.inspect
#
# @return [ String ] The binary in form BSON::Binary:object_id
#
# @since 2.3.0
def inspect
"<BSON::Binary:0x#{object_id} type=#{type} data=0x#{data[0, 8].unpack1('H*')}...>"
end
# Returns a string representation of the UUID stored in this Binary.
#
# If the Binary is of subtype 4 (:uuid), this method returns the UUID
# in RFC 4122 format. If the representation parameter is provided, it
# must be the value :standard as a symbol or a string.
#
# If the Binary is of subtype 3 (:uuid_old), this method requires that
# the representation parameter is provided and is one of :csharp_legacy,
# :java_legacy or :python_legacy or the equivalent strings. In this case
# the method assumes the Binary stores the UUID in the specified format,
# transforms the stored bytes to the standard RFC 4122 representation
# and returns the UUID in RFC 4122 format.
#
# If the Binary is of another subtype, this method raises TypeError.
#
# @param [ Symbol ] representation How to interpret the UUID.
#
# @return [ String ] The string representation of the UUID.
#
# @raise [ TypeError ] If the subtype of Binary is not :uuid nor :uuid_old.
# @raise [ ArgumentError ] If the representation other than :standard
# is requested for Binary subtype 4 (:uuid), if :standard representation
# is requested for Binary subtype 3 (:uuid_old), or if an invalid
# representation is requested.
#
# @api experimental
def to_uuid(representation = nil)
if representation.is_a?(String)
raise ArgumentError,
"Representation must be given as a symbol: #{representation.inspect}"
end
case type
when :uuid
from_uuid_to_uuid(representation || :standard)
when :uuid_old
from_uuid_old_to_uuid(representation)
else
raise TypeError, "The type of Binary must be :uuid or :uuid_old, this object is: #{type.inspect}"
end
end
# Encode the binary type
#
# @example Encode the binary.
# binary.to_bson
#
# @return [ BSON::ByteBuffer ] The buffer with the encoded object.
#
# @see http://bsonspec.org/#/specification
#
# @since 2.0.0
def to_bson(buffer = ByteBuffer.new)
position = buffer.length
buffer.put_int32(0)
buffer.put_byte(@raw_type)
buffer.put_int32(data.bytesize) if type == :old
buffer.put_bytes(data)
buffer.replace_int32(position, buffer.length - position - 5)
end
# Deserialize the binary data from BSON.
#
# @param [ ByteBuffer ] buffer The byte buffer.
#
# @option options [ nil | :bson ] :mode Decoding mode to use.
#
# @return [ Binary ] The decoded binary data.
#
# @see http://bsonspec.org/#/specification
#
# @since 2.0.0
def self.from_bson(buffer, **_options)
length = buffer.get_int32
type_byte = buffer.get_byte
if type_byte.bytes.first < USER_SUBTYPE
type = TYPES[type_byte]
if type.nil?
raise Error::UnsupportedBinarySubtype,
"BSON data contains unsupported binary subtype #{'0x%02x' % type_byte.ord}"
end
else
type = type_byte
end
length = buffer.get_int32 if type == :old
data = buffer.get_bytes(length)
new(data, type)
end
# Creates a BSON::Binary from a string representation of a UUID.
#
# The UUID may be given in either 00112233-4455-6677-8899-aabbccddeeff or
# 00112233445566778899AABBCCDDEEFF format - specifically, any dashes in
# the UUID are removed and both upper and lower case letters are acceptable.
#
# The input UUID string is always interpreted to be in the RFC 4122 format.
#
# If representation is not provided, this method creates a BSON::Binary
# of subtype 4 (:uuid). If representation is provided, it must be one of
# :standard, :csharp_legacy, :java_legacy or :python_legacy. If
# representation is :standard, this method creates a subtype 4 (:uuid)
# binary which is the same behavior as if representation was not provided.
# For other representations, this method creates a Binary of subtype 3
# (:uuid_old) with the UUID converted to the appropriate legacy MongoDB
# UUID storage format.
#
# @param [ String ] uuid The string representation of the UUID.
# @param [ Symbol ] representation How to interpret the UUID.
#
# @return [ Binary ] The binary.
#
# @raise [ ArgumentError ] If invalid representation is requested.
#
# @api experimental
def self.from_uuid(uuid, representation = nil)
raise ArgumentError, "Representation must be given as a symbol: #{representation}" if representation.is_a?(String)
uuid_binary = uuid.delete('-').scan(/../).map(&:hex).map(&:chr).join
representation ||= :standard
handler = :"from_#{representation}_uuid"
raise ArgumentError, "Invalid representation: #{representation}" unless respond_to?(handler)
send(handler, uuid_binary)
end
# Constructs a new binary object from a standard-format binary UUID
# representation.
#
# @param [ String ] uuid_binary the UUID data
#
# @return [ BSON::Binary ] the Binary object
#
# @api private
def self.from_standard_uuid(uuid_binary)
new(uuid_binary, :uuid)
end
# Constructs a new binary object from a csharp legacy-format binary UUID
# representation.
#
# @param [ String ] uuid_binary the UUID data
#
# @return [ BSON::Binary ] the Binary object
#
# @api private
def self.from_csharp_legacy_uuid(uuid_binary)
uuid_binary.sub!(/\A(.)(.)(.)(.)(.)(.)(.)(.)(.{8})\z/, '\4\3\2\1\6\5\8\7\9')
new(uuid_binary, :uuid_old)
end
# Constructs a new binary object from a java legacy-format binary UUID
# representation.
#
# @param [ String ] uuid_binary the UUID data
#
# @return [ BSON::Binary ] the Binary object
#
# @api private
def self.from_java_legacy_uuid(uuid_binary)
uuid_binary.sub!(/\A(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)\z/) do
(::Regexp.last_match[1..8].reverse + ::Regexp.last_match[9..16].reverse).join
end
new(uuid_binary, :uuid_old)
end
# Constructs a new binary object from a python legacy-format binary UUID
# representation.
#
# @param [ String ] uuid_binary the UUID data
#
# @return [ BSON::Binary ] the Binary object
#
# @api private
def self.from_python_legacy_uuid(uuid_binary)
new(uuid_binary, :uuid_old)
end
private
# initializes an instance of BSON::Binary.
#
# @param [ String ] data the data to initialize the object with
# @param [ Symbol ] type the type to assign the binary object
def initialize_instance(data, type)
@type = validate_type!(type)
# The Binary class used to force encoding to BINARY when serializing to
# BSON. Instead of doing that during serialization, perform this
# operation during Binary construction to make it clear that once
# the string is given to the Binary, the data is treated as a binary
# string and not a text string in any encoding.
data = data.dup.force_encoding('BINARY') unless data.encoding == Encoding.find('BINARY')
@data = data
end
# Converts the Binary UUID object to a UUID of the given representation.
# Currently, only :standard representation is supported.
#
# @param [ Symbol ] representation The representation to target (must be
# :standard)
#
# @return [ String ] the UUID as a string
def from_uuid_to_uuid(representation)
if representation != :standard
raise ArgumentError,
'Binary of type :uuid can only be stringified to :standard representation, ' \
"requested: #{representation.inspect}"
end
data
.chars
.map { |n| '%02x' % n.ord }
.join
.sub(/\A(.{8})(.{4})(.{4})(.{4})(.{12})\z/, '\1-\2-\3-\4-\5')
end
# Converts the UUID-old object to a UUID of the given representation.
#
# @param [ Symbol ] representation The representation to target
#
# @return [ String ] the UUID as a string
def from_uuid_old_to_uuid(representation)
if representation.nil?
raise ArgumentError, 'Representation must be specified for BSON::Binary objects of type :uuid_old'
end
hex = data.chars.map { |n| '%02x' % n.ord }.join
handler = :"from_uuid_old_to_#{representation}_uuid"
raise ArgumentError, "Invalid representation: #{representation}" unless respond_to?(handler, true)
send(handler, hex)
.sub(/\A(.{8})(.{4})(.{4})(.{4})(.{12})\z/, '\1-\2-\3-\4-\5')
end
# Tries to convert a UUID-old object to a standard representation, which is
# not supported.
#
# @param [ String ] hex The hexadecimal string to convert
#
# @raise [ ArgumentError ] because standard representation is not supported
def from_uuid_old_to_standard_uuid(_hex)
raise ArgumentError, 'BSON::Binary objects of type :uuid_old cannot be stringified to :standard representation'
end
# Converts a UUID-old object to a csharp-legacy representation.
#
# @param [ String ] hex The hexadecimal string to convert
#
# @return [ String ] the csharp-legacy-formatted UUID
def from_uuid_old_to_csharp_legacy_uuid(hex)
hex.sub(/\A(..)(..)(..)(..)(..)(..)(..)(..)(.{16})\z/, '\4\3\2\1\6\5\8\7\9')
end
# Converts a UUID-old object to a java-legacy representation.
#
# @param [ String ] hex The hexadecimal string to convert
#
# @return [ String ] the java-legacy-formatted UUID
def from_uuid_old_to_java_legacy_uuid(hex)
hex.sub(/\A(..)(..)(..)(..)(..)(..)(..)(..)(..)(..)(..)(..)(..)(..)(..)(..)\z/) do
(::Regexp.last_match[1..8].reverse + ::Regexp.last_match[9..16].reverse).join
end
end
# Converts a UUID-old object to a python-legacy representation.
#
# @param [ String ] hex The hexadecimal string to convert
#
# @return [ String ] the python-legacy-formatted UUID
def from_uuid_old_to_python_legacy_uuid(hex)
hex
end
# Validate the provided type is a valid type.
#
# @api private
#
# @example Validate the type.
# binary.validate_type!(:user)
#
# @param [ Symbol | String | Integer ] type The provided type.
#
# @return [ Symbol ] the symbolic type corresponding to the argument.
#
# @raise [ BSON::Error::InvalidBinaryType ] The the type is invalid.
#
# @since 2.0.0
def validate_type!(type)
case type
when Integer then validate_integer_type!(type)
when String
if type.length > 1
validate_symbol_type!(type.to_sym)
else
validate_integer_type!(type.bytes.first)
end
when Symbol then validate_symbol_type!(type)
else raise BSON::Error::InvalidBinaryType, type
end
end
# Test that the given integer type is valid.
#
# @param [ Integer ] type the provided type
#
# @return [ Symbol ] the symbolic type corresponding to the argument.
#
# @raise [ BSON::Error::InvalidBinaryType] if the type is invalid.
def validate_integer_type!(type)
@raw_type = type.chr.force_encoding('BINARY').freeze
if type < USER_SUBTYPE
raise BSON::Error::InvalidBinaryType, type unless TYPES.key?(@raw_type)
return TYPES[@raw_type]
end
:user
end
# Test that the given symbol type is valid.
#
# @param [ Symbol ] type the provided type
#
# @return [ Symbol ] the symbolic type corresponding to the argument.
#
# @raise [ BSON::Error::InvalidBinaryType] if the type is invalid.
def validate_symbol_type!(type)
raise BSON::Error::InvalidBinaryType, type unless SUBTYPES.key?(type)
@raw_type = SUBTYPES[type]
type
end
# Register this type when the module is loaded.
#
# @since 2.0.0
Registry.register(BSON_TYPE, self)
end
end