ext/bson/read.c
/*
* Copyright (C) 2009-2020 MongoDB Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "bson-native.h"
#include <ruby/encoding.h>
static void pvt_raise_decode_error(volatile VALUE msg);
static int32_t pvt_validate_length(byte_buffer_t *b);
static uint8_t pvt_get_type_byte(byte_buffer_t *b);
static VALUE pvt_get_int32(byte_buffer_t *b);
static VALUE pvt_get_uint32(byte_buffer_t *b);
static VALUE pvt_get_int64(byte_buffer_t *b, int argc, VALUE *argv);
static VALUE pvt_get_double(byte_buffer_t *b);
static VALUE pvt_get_string(byte_buffer_t *b, const char *data_type);
static VALUE pvt_get_symbol(byte_buffer_t *b, VALUE rb_buffer, int argc, VALUE *argv);
static VALUE pvt_get_boolean(byte_buffer_t *b);
static VALUE pvt_read_field(byte_buffer_t *b, VALUE rb_buffer, uint8_t type, int argc, VALUE *argv);
static void pvt_skip_cstring(byte_buffer_t *b);
static size_t pvt_strnlen(const byte_buffer_t *b);
void pvt_raise_decode_error(volatile VALUE msg) {
VALUE klass = pvt_const_get_3("BSON", "Error", "BSONDecodeError");
rb_exc_raise(rb_exc_new_str(klass, msg));
}
/**
* validate the buffer contains the amount of bytes the array / hash claimns
* and that it is null terminated
*/
int32_t pvt_validate_length(byte_buffer_t *b)
{
int32_t length;
ENSURE_BSON_READ(b, 4);
memcpy(&length, READ_PTR(b), 4);
length = BSON_UINT32_TO_LE(length);
/* minimum valid length is 4 (byte count) + 1 (terminating byte) */
if(length >= 5){
ENSURE_BSON_READ(b, length);
/* The last byte should be a null byte: it should be at length - 1 */
if( *(READ_PTR(b) + length - 1) != 0 ){
rb_raise(rb_eRangeError, "Buffer should have contained null terminator at %zu but contained %d", b->read_position + (size_t)length, (int)*(READ_PTR(b) + length));
}
b->read_position += 4;
}
else{
rb_raise(rb_eRangeError, "Buffer contained invalid length %d at %zu", length, b->read_position);
}
return length;
}
/**
* Read a single field from a hash or array
*/
VALUE pvt_read_field(byte_buffer_t *b, VALUE rb_buffer, uint8_t type, int argc, VALUE *argv)
{
switch(type) {
case BSON_TYPE_INT32: return pvt_get_int32(b);
case BSON_TYPE_INT64: return pvt_get_int64(b, argc, argv);
case BSON_TYPE_DOUBLE: return pvt_get_double(b);
case BSON_TYPE_STRING: return pvt_get_string(b, "String");
case BSON_TYPE_SYMBOL: return pvt_get_symbol(b, rb_buffer, argc, argv);
case BSON_TYPE_ARRAY: return rb_bson_byte_buffer_get_array(argc, argv, rb_buffer);
case BSON_TYPE_DOCUMENT: return rb_bson_byte_buffer_get_hash(argc, argv, rb_buffer);
case BSON_TYPE_BOOLEAN: return pvt_get_boolean(b);
default:
{
VALUE klass = rb_funcall(rb_bson_registry, rb_intern("get"), 1, INT2FIX(type));
VALUE value;
if (argc > 1) {
rb_raise(rb_eArgError, "At most one argument is allowed");
} else if (argc > 0) {
VALUE call_args[2];
call_args[0] = rb_buffer;
Check_Type(argv[0], T_HASH);
call_args[1] = argv[0];
#ifdef RB_PASS_KEYWORDS /* Ruby 2.7+ */
value = rb_funcallv_kw(klass, rb_intern("from_bson"), 2, call_args, RB_PASS_KEYWORDS);
#else /* Ruby 2.6 and below */
value = rb_funcallv(klass, rb_intern("from_bson"), 2, call_args);
#endif
} else {
value = rb_funcall(klass, rb_intern("from_bson"), 1, rb_buffer);
}
RB_GC_GUARD(klass);
return value;
}
}
}
/**
* Get a single byte from the buffer.
*/
VALUE rb_bson_byte_buffer_get_byte(VALUE self)
{
byte_buffer_t *b;
VALUE byte;
TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b);
ENSURE_BSON_READ(b, 1);
byte = rb_str_new(READ_PTR(b), 1);
b->read_position += 1;
return byte;
}
uint8_t pvt_get_type_byte(byte_buffer_t *b){
int8_t byte;
ENSURE_BSON_READ(b, 1);
byte = *READ_PTR(b);
b->read_position += 1;
return (uint8_t)byte;
}
/**
* Get bytes from the buffer.
*/
VALUE rb_bson_byte_buffer_get_bytes(VALUE self, VALUE i)
{
byte_buffer_t *b;
VALUE bytes;
const uint32_t length = FIX2LONG(i);
TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b);
ENSURE_BSON_READ(b, length);
bytes = rb_str_new(READ_PTR(b), length);
b->read_position += length;
return bytes;
}
VALUE pvt_get_boolean(byte_buffer_t *b){
VALUE result = Qnil;
char byte_value;
ENSURE_BSON_READ(b, 1);
byte_value = *READ_PTR(b);
switch (byte_value) {
case 1:
result = Qtrue;
break;
case 0:
result = Qfalse;
break;
default:
pvt_raise_decode_error(rb_sprintf("Invalid boolean byte value: %d", (int) byte_value));
}
b->read_position += 1;
return result;
}
/**
* Get a string from the buffer.
*/
VALUE rb_bson_byte_buffer_get_string(VALUE self)
{
byte_buffer_t *b;
TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b);
return pvt_get_string(b, "String");
}
VALUE pvt_get_string(byte_buffer_t *b, const char *data_type)
{
int32_t length_le;
int32_t length;
char *str_ptr;
VALUE string;
unsigned char last_byte;
ENSURE_BSON_READ(b, 4);
memcpy(&length_le, READ_PTR(b), 4);
length = BSON_UINT32_FROM_LE(length_le);
if (length < 0) {
pvt_raise_decode_error(rb_sprintf("String length is negative: %d", length));
}
if (length == 0) {
pvt_raise_decode_error(rb_str_new_cstr("String length is zero but string must be null-terminated"));
}
ENSURE_BSON_READ(b, 4 + length);
str_ptr = READ_PTR(b) + 4;
last_byte = *(READ_PTR(b) + 4 + length - 1);
if (last_byte != 0) {
pvt_raise_decode_error(rb_sprintf("Last byte of the string is not null: 0x%x", (int) last_byte));
}
rb_bson_utf8_validate(str_ptr, length - 1, true, data_type);
string = rb_enc_str_new(str_ptr, length - 1, rb_utf8_encoding());
b->read_position += 4 + length;
return string;
}
/**
* Reads a UTF-8 string out of the byte buffer. If the argc/argv arguments
* have a :mode option with the value of :bson, wraps the string in a
* BSON::Symbol::Raw. Otherwise consults the BSON registry to determine
* which class to instantiate (String in bson-ruby, overridden to Symbol by
* the Ruby driver). Returns either a BSON::Symbol::Raw, Symbol or String
* value.
*/
VALUE pvt_get_symbol(byte_buffer_t *b, VALUE rb_buffer, int argc, VALUE *argv)
{
VALUE value, klass;
if (pvt_get_mode_option(argc, argv) == BSON_MODE_BSON) {
value = pvt_get_string(b, "Symbol");
klass = pvt_const_get_3("BSON", "Symbol", "Raw");
value = rb_funcall(klass, rb_intern("new"), 1, value);
} else {
klass = rb_funcall(rb_bson_registry, rb_intern("get"), 1, INT2FIX(BSON_TYPE_SYMBOL));
value = rb_funcall(klass, rb_intern("from_bson"), 1, rb_buffer);
}
RB_GC_GUARD(klass);
return value;
}
/**
* Get a cstring from the buffer.
*/
VALUE rb_bson_byte_buffer_get_cstring(VALUE self)
{
byte_buffer_t *b;
VALUE string;
int length;
TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b);
length = (int)pvt_strnlen(b);
ENSURE_BSON_READ(b, length);
string = rb_enc_str_new(READ_PTR(b), length, rb_utf8_encoding());
b->read_position += length + 1;
return string;
}
/**
* Reads but does not return a cstring from the buffer.
*/
void pvt_skip_cstring(byte_buffer_t *b)
{
int length;
length = (int)pvt_strnlen(b);
ENSURE_BSON_READ(b, length);
b->read_position += length + 1;
}
/**
* Get a int32 from the buffer.
*/
VALUE rb_bson_byte_buffer_get_int32(VALUE self)
{
byte_buffer_t *b;
TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b);
return pvt_get_int32(b);
}
VALUE pvt_get_int32(byte_buffer_t *b)
{
int32_t i32;
ENSURE_BSON_READ(b, 4);
memcpy(&i32, READ_PTR(b), 4);
b->read_position += 4;
return INT2NUM(BSON_UINT32_FROM_LE(i32));
}
/**
* Get an unsigned int32 from the buffer.
*/
VALUE rb_bson_byte_buffer_get_uint32(VALUE self)
{
byte_buffer_t *b;
TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b);
return pvt_get_uint32(b);
}
VALUE pvt_get_uint32(byte_buffer_t *b)
{
uint32_t i32;
ENSURE_BSON_READ(b, 4);
memcpy(&i32, READ_PTR(b), 4);
b->read_position += 4;
return UINT2NUM(BSON_UINT32_FROM_LE(i32));
}
/**
* Get a int64 from the buffer.
*/
VALUE rb_bson_byte_buffer_get_int64(VALUE self)
{
byte_buffer_t *b;
TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b);
return pvt_get_int64(b, 0, NULL);
}
/**
* Reads a 64-bit integer out of the byte buffer into a Ruby Integer instance.
* If the argc/argv arguments have a :mode option with the value of :bson,
* wraps the integer in a BSON::Int64. Returns either the Integer or the
* BSON::Int64 instance.
*/
VALUE pvt_get_int64(byte_buffer_t *b, int argc, VALUE *argv)
{
int64_t i64;
VALUE num;
ENSURE_BSON_READ(b, 8);
memcpy(&i64, READ_PTR(b), 8);
b->read_position += 8;
num = LL2NUM(BSON_UINT64_FROM_LE(i64));
if (pvt_get_mode_option(argc, argv) == BSON_MODE_BSON) {
VALUE klass = rb_funcall(rb_bson_registry,rb_intern("get"),1, INT2FIX(BSON_TYPE_INT64));
VALUE value = rb_funcall(klass, rb_intern("new"), 1, num);
RB_GC_GUARD(klass);
return value;
} else {
return num;
}
RB_GC_GUARD(num);
}
/**
* Get a double from the buffer.
*/
VALUE rb_bson_byte_buffer_get_double(VALUE self)
{
byte_buffer_t *b;
TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b);
return pvt_get_double(b);
}
VALUE pvt_get_double(byte_buffer_t *b)
{
double d;
ENSURE_BSON_READ(b, 8);
memcpy(&d, READ_PTR(b), 8);
b->read_position += 8;
return DBL2NUM(BSON_DOUBLE_FROM_LE(d));
}
/**
* Get the 16 bytes representing the decimal128 from the buffer.
*/
VALUE rb_bson_byte_buffer_get_decimal128_bytes(VALUE self)
{
byte_buffer_t *b;
VALUE bytes;
TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b);
ENSURE_BSON_READ(b, 16);
bytes = rb_str_new(READ_PTR(b), 16);
b->read_position += 16;
return bytes;
}
/**
* This duplicates the DBRef validation code in DBRef constructor.
*/
static int pvt_is_dbref(VALUE doc) {
VALUE ref, id, db;
ref = rb_hash_aref(doc, _ref_str);
if (NIL_P(ref) || !RB_TYPE_P(ref, T_STRING)) {
return 0;
}
id = rb_hash_aref(doc, _id_str);
if (NIL_P(id)) {
return 0;
}
db = rb_hash_aref(doc, _db_str);
if (!NIL_P(db) && !RB_TYPE_P(db, T_STRING)) {
return 0;
}
return 1;
}
VALUE rb_bson_byte_buffer_get_hash(int argc, VALUE *argv, VALUE self){
VALUE doc = Qnil;
byte_buffer_t *b = NULL;
uint8_t type;
VALUE cDocument = pvt_const_get_2("BSON", "Document");
int32_t length;
char *start_ptr;
TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b);
start_ptr = READ_PTR(b);
length = pvt_validate_length(b);
doc = rb_funcall(cDocument, rb_intern("allocate"), 0);
while((type = pvt_get_type_byte(b)) != 0){
VALUE field = rb_bson_byte_buffer_get_cstring(self);
rb_hash_aset(doc, field, pvt_read_field(b, self, type, argc, argv));
RB_GC_GUARD(field);
}
if (READ_PTR(b) - start_ptr != length) {
pvt_raise_decode_error(rb_sprintf("Expected to read %d bytes for the hash but read %ld bytes", length, READ_PTR(b) - start_ptr));
}
if (pvt_is_dbref(doc)) {
VALUE cDBRef = pvt_const_get_2("BSON", "DBRef");
doc = rb_funcall(cDBRef, rb_intern("new"), 1, doc);
}
return doc;
}
VALUE rb_bson_byte_buffer_get_array(int argc, VALUE *argv, VALUE self){
byte_buffer_t *b;
VALUE array = Qnil;
uint8_t type;
int32_t length;
char *start_ptr;
TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b);
start_ptr = READ_PTR(b);
length = pvt_validate_length(b);
array = rb_ary_new();
while((type = pvt_get_type_byte(b)) != 0){
pvt_skip_cstring(b);
rb_ary_push(array, pvt_read_field(b, self, type, argc, argv));
}
RB_GC_GUARD(array);
if (READ_PTR(b) - start_ptr != length) {
pvt_raise_decode_error(rb_sprintf("Expected to read %d bytes for the hash but read %ld bytes", length, READ_PTR(b) - start_ptr));
}
return array;
}
/**
* Returns the length of the given string `str`. If no null-terminating byte
* is present when `len` bytes have been scanned, then `len` is
* returned.
*/
size_t pvt_strnlen(const byte_buffer_t *b) {
const char *ptr = memchr(READ_PTR(b), '\0', READ_SIZE(b));
if (!ptr)
rb_raise(rb_eRangeError, "string is too long (possibly not null-terminated)");
return (size_t)(ptr - READ_PTR(b));
}