SciRuby/nmatrix

View on GitHub
ext/nmatrix/storage/dense/dense.cpp

Summary

Maintainability
Test Coverage
/////////////////////////////////////////////////////////////////////
// = NMatrix
//
// A linear algebra library for scientific computation in Ruby.
// NMatrix is part of SciRuby.
//
// NMatrix was originally inspired by and derived from NArray, by
// Masahiro Tanaka: http://narray.rubyforge.org
//
// == Copyright Information
//
// SciRuby is Copyright (c) 2010 - 2014, Ruby Science Foundation
// NMatrix is Copyright (c) 2012 - 2014, John Woods and the Ruby Science Foundation
//
// Please see LICENSE.txt for additional copyright notices.
//
// == Contributing
//
// By contributing source code to SciRuby, you agree to be bound by
// our Contributor Agreement:
//
// * https://github.com/SciRuby/sciruby/wiki/Contributor-Agreement
//
// == dense.c
//
// Dense n-dimensional matrix storage.

/*
 * Standard Includes
 */

#include <ruby.h>

/*
 * Project Includes
 */
#include "../../data/data.h"
#include "../../math/long_dtype.h"
#include "../../math/gemm.h"
#include "../../math/gemv.h"
#include "../../math/math.h"
#include "../common.h"
#include "dense.h"

/*
 * Macros
 */

/*
 * Global Variables
 */

/*
 * Forward Declarations
 */

namespace nm { namespace dense_storage {

  template<typename LDType, typename RDType>
  void ref_slice_copy_transposed(const DENSE_STORAGE* rhs, DENSE_STORAGE* lhs);

  template <typename LDType, typename RDType>
  DENSE_STORAGE* cast_copy(const DENSE_STORAGE* rhs, nm::dtype_t new_dtype);

  template <typename LDType, typename RDType>
  bool eqeq(const DENSE_STORAGE* left, const DENSE_STORAGE* right);

  template <typename DType>
  static DENSE_STORAGE* matrix_multiply(const STORAGE_PAIR& casted_storage, size_t* resulting_shape, bool vector);

  template <typename DType>
  bool is_hermitian(const DENSE_STORAGE* mat, int lda);

  template <typename DType>
  bool is_symmetric(const DENSE_STORAGE* mat, int lda);


  /*
   * Recursive slicing for N-dimensional matrix.
   */
  template <typename LDType, typename RDType>
  static void slice_copy(DENSE_STORAGE *dest, const DENSE_STORAGE *src, size_t* lengths, size_t pdest, size_t psrc, size_t n) {
    if (src->dim - n > 1) {
      for (size_t i = 0; i < lengths[n]; ++i) {
        slice_copy<LDType,RDType>(dest, src, lengths,
                   pdest + dest->stride[n]*i,
                   psrc + src->stride[n]*i,
                   n + 1);
      }
    } else {
      for (size_t p = 0; p < dest->shape[n]; ++p) {
        reinterpret_cast<LDType*>(dest->elements)[p+pdest] = reinterpret_cast<RDType*>(src->elements)[p+psrc];
      }
      /*memcpy((char*)dest->elements + pdest*DTYPE_SIZES[dest->dtype],
          (char*)src->elements + psrc*DTYPE_SIZES[src->dtype],
          dest->shape[n]*DTYPE_SIZES[dest->dtype]); */
    }

  }

  /*
   * Recursive function, sets multiple values in a matrix from a single source value. Same basic pattern as slice_copy.
   */
  template <typename D>
  static void slice_set(DENSE_STORAGE* dest, size_t* lengths, size_t pdest, size_t rank, D* const v, size_t v_size, size_t& v_offset) {
    if (dest->dim - rank > 1) {
      for (size_t i = 0; i < lengths[rank]; ++i) {
        slice_set<D>(dest, lengths, pdest + dest->stride[rank] * i, rank + 1, v, v_size, v_offset);
      }
    } else {
      for (size_t p = 0; p < lengths[rank]; ++p, ++v_offset) {
        if (v_offset >= v_size) v_offset %= v_size;

        D* elem = reinterpret_cast<D*>(dest->elements);
        elem[p + pdest] = v[v_offset];
      }
    }
  }


  /*
   * Dense storage set/slice-set function, templated version.
   */
  template <typename D>
  void set(VALUE left, SLICE* slice, VALUE right) {
    NM_CONSERVATIVE(nm_register_value(&left));
    NM_CONSERVATIVE(nm_register_value(&right));

    DENSE_STORAGE* s = NM_STORAGE_DENSE(left);

    std::pair<NMATRIX*,bool> nm_and_free =
      interpret_arg_as_dense_nmatrix(right, s->dtype);

    // Map the data onto D* v.
    D*     v;
    size_t v_size = 1;

    if (nm_and_free.first) {
      DENSE_STORAGE* t = reinterpret_cast<DENSE_STORAGE*>(nm_and_free.first->storage);
      v                = reinterpret_cast<D*>(t->elements);
      v_size           = nm_storage_count_max_elements(t);

    } else if (RB_TYPE_P(right, T_ARRAY)) {
      
      v_size = RARRAY_LEN(right);
      v      = NM_ALLOC_N(D, v_size);
      if (s->dtype == nm::RUBYOBJ)
        nm_register_values(reinterpret_cast<VALUE*>(v), v_size);

      for (size_t m = 0; m < v_size; ++m) {
        rubyval_to_cval(rb_ary_entry(right, m), s->dtype, &(v[m]));
      }

    } else {
      v = reinterpret_cast<D*>(rubyobj_to_cval(right, NM_DTYPE(left)));
      if (s->dtype == nm::RUBYOBJ)
        nm_register_values(reinterpret_cast<VALUE*>(v), v_size);
    }

    if (slice->single) {
      reinterpret_cast<D*>(s->elements)[nm_dense_storage_pos(s, slice->coords)] = *v;
    } else {
      size_t v_offset = 0;
      slice_set(s, slice->lengths, nm_dense_storage_pos(s, slice->coords), 0, v, v_size, v_offset);
    }

    // Only free v if it was allocated in this function.
    if (nm_and_free.first) {
      if (nm_and_free.second) {
        nm_delete(nm_and_free.first);
      }
    } else {
      if (s->dtype == nm::RUBYOBJ)
        nm_unregister_values(reinterpret_cast<VALUE*>(v), v_size);
      NM_FREE(v);
    }
    NM_CONSERVATIVE(nm_unregister_value(&left));
    NM_CONSERVATIVE(nm_unregister_value(&right));

  }

}} // end of namespace nm::dense_storage


extern "C" {

static size_t* stride(size_t* shape, size_t dim);
static void slice_copy(DENSE_STORAGE *dest, const DENSE_STORAGE *src, size_t* lengths, size_t pdest, size_t psrc, size_t n);

/*
 * Functions
 */

///////////////
// Lifecycle //
///////////////


/*
 * This creates a dummy with all the properties of dense storage, but no actual elements allocation.
 *
 * elements will be NULL when this function finishes. You can clean up with nm_dense_storage_delete, which will
 * check for that NULL pointer before freeing elements.
 */
static DENSE_STORAGE* nm_dense_storage_create_dummy(nm::dtype_t dtype, size_t* shape, size_t dim) {
  DENSE_STORAGE* s = NM_ALLOC( DENSE_STORAGE );

  s->dim        = dim;
  s->shape      = shape;
  s->dtype      = dtype;

  s->offset     = NM_ALLOC_N(size_t, dim);
  memset(s->offset, 0, sizeof(size_t)*dim);

  s->stride     = stride(shape, dim);
  s->count      = 1;
  s->src        = s;

  s->elements   = NULL;

  return s;
}


/*
 * Note that elements and elements_length are for initial value(s) passed in.
 * If they are the correct length, they will be used directly. If not, they
 * will be concatenated over and over again into a new elements array. If
 * elements is NULL, the new elements array will not be initialized.
 */
DENSE_STORAGE* nm_dense_storage_create(nm::dtype_t dtype, size_t* shape, size_t dim, void* elements, size_t elements_length) {
  if (dtype == nm::RUBYOBJ)
    nm_register_values(reinterpret_cast<VALUE*>(elements), elements_length);

  DENSE_STORAGE* s = nm_dense_storage_create_dummy(dtype, shape, dim);
  size_t count  = nm_storage_count_max_elements(s);

  if (elements_length == count) {
    s->elements = elements;
    
    if (dtype == nm::RUBYOBJ)
      nm_unregister_values(reinterpret_cast<VALUE*>(elements), elements_length);

  } else {

    s->elements = NM_ALLOC_N(char, DTYPE_SIZES[dtype]*count);

    if (dtype == nm::RUBYOBJ)
      nm_unregister_values(reinterpret_cast<VALUE*>(elements), elements_length);

    size_t copy_length = elements_length;

    if (elements_length > 0) {
      // Repeat elements over and over again until the end of the matrix.
      for (size_t i = 0; i < count; i += elements_length) {

        if (i + elements_length > count) {
          copy_length = count - i;
        }

        memcpy((char*)(s->elements)+i*DTYPE_SIZES[dtype], (char*)(elements)+(i % elements_length)*DTYPE_SIZES[dtype], copy_length*DTYPE_SIZES[dtype]);
      }

      // Get rid of the init_val.
      NM_FREE(elements);
    }
  }

  return s;
}


/*
 * Destructor for dense storage. Make sure when you update this you also update nm_dense_storage_delete_dummy.
 */
void nm_dense_storage_delete(STORAGE* s) {
  // Sometimes Ruby passes in NULL storage for some reason (probably on copy construction failure).
  if (s) {
    DENSE_STORAGE* storage = (DENSE_STORAGE*)s;
    if(storage->count-- == 1) {
      NM_FREE(storage->shape);
      NM_FREE(storage->offset);
      NM_FREE(storage->stride);
      if (storage->elements != NULL) {// happens with dummy objects
        NM_FREE(storage->elements);
      }
      NM_FREE(storage);
    }
  }
}

/*
 * Destructor for dense storage references (slicing).
 */
void nm_dense_storage_delete_ref(STORAGE* s) {
  // Sometimes Ruby passes in NULL storage for some reason (probably on copy construction failure).
  if (s) {
    DENSE_STORAGE* storage = (DENSE_STORAGE*)s;
    nm_dense_storage_delete( reinterpret_cast<STORAGE*>(storage->src) );
    NM_FREE(storage->shape);
    NM_FREE(storage->offset);
    NM_FREE(storage);
  }
}

/*
 * Mark values in a dense matrix for garbage collection. This may not be necessary -- further testing required.
 */
void nm_dense_storage_mark(STORAGE* storage_base) {

  DENSE_STORAGE* storage = (DENSE_STORAGE*)storage_base;

  if (storage && storage->dtype == nm::RUBYOBJ) {
    VALUE* els = reinterpret_cast<VALUE*>(storage->elements);

    if (els) {
      rb_gc_mark_locations(els, &(els[nm_storage_count_max_elements(storage)-1]));
    }
    //for (size_t index = nm_storage_count_max_elements(storage); index-- > 0;) {
    //  rb_gc_mark(els[index]);
    //}
  }
}

/**
 * Register a dense storage struct as in-use to avoid garbage collection of the
 * elements stored.
 *
 * This function will check dtype and ignore non-object dtype, so its safe to pass any dense storage in.
 *
 */
void nm_dense_storage_register(const STORAGE* s) {
  const DENSE_STORAGE* storage = reinterpret_cast<const DENSE_STORAGE*>(s);
  if (storage->dtype == nm::RUBYOBJ && storage->elements) {
    nm_register_values(reinterpret_cast<VALUE*>(storage->elements), nm_storage_count_max_elements(storage));
  }
}

/**
 * Unregister a dense storage struct to allow normal garbage collection of the
 * elements stored.
 *
 * This function will check dtype and ignore non-object dtype, so its safe to pass any dense storage in.
 *
 */
void nm_dense_storage_unregister(const STORAGE* s) {
  const DENSE_STORAGE* storage = reinterpret_cast<const DENSE_STORAGE*>(s);
  if (storage->dtype == nm::RUBYOBJ && storage->elements) {
    nm_unregister_values(reinterpret_cast<VALUE*>(storage->elements), nm_storage_count_max_elements(storage));
  }
}

///////////////
// Accessors //
///////////////



/*
 * map_pair iterator for dense matrices (for element-wise operations)
 */
VALUE nm_dense_map_pair(VALUE self, VALUE right) {

  NM_CONSERVATIVE(nm_register_value(&self));
  NM_CONSERVATIVE(nm_register_value(&right));

  RETURN_SIZED_ENUMERATOR_PRE
  NM_CONSERVATIVE(nm_unregister_value(&right));
  NM_CONSERVATIVE(nm_unregister_value(&self));
  RETURN_SIZED_ENUMERATOR(self, 0, 0, nm_enumerator_length);

  DENSE_STORAGE *s = NM_STORAGE_DENSE(self),
                *t = NM_STORAGE_DENSE(right);

  size_t* coords = NM_ALLOCA_N(size_t, s->dim);
  memset(coords, 0, sizeof(size_t) * s->dim);

  size_t *shape_copy = NM_ALLOC_N(size_t, s->dim);
  memcpy(shape_copy, s->shape, sizeof(size_t) * s->dim);

  size_t count = nm_storage_count_max_elements(s);

  DENSE_STORAGE* result = nm_dense_storage_create(nm::RUBYOBJ, shape_copy, s->dim, NULL, 0);

  VALUE* result_elem = reinterpret_cast<VALUE*>(result->elements);
  nm_dense_storage_register(result);

  for (size_t k = 0; k < count; ++k) {
    nm_dense_storage_coords(result, k, coords);
    size_t s_index = nm_dense_storage_pos(s, coords),
           t_index = nm_dense_storage_pos(t, coords);

    VALUE sval = NM_DTYPE(self) == nm::RUBYOBJ ? reinterpret_cast<VALUE*>(s->elements)[s_index] : nm::rubyobj_from_cval((char*)(s->elements) + s_index*DTYPE_SIZES[NM_DTYPE(self)], NM_DTYPE(self)).rval;
    nm_register_value(&sval);
    VALUE tval = NM_DTYPE(right) == nm::RUBYOBJ ? reinterpret_cast<VALUE*>(t->elements)[t_index] : nm::rubyobj_from_cval((char*)(t->elements) + t_index*DTYPE_SIZES[NM_DTYPE(right)], NM_DTYPE(right)).rval;
    result_elem[k] = rb_yield_values(2, sval, tval);
    nm_unregister_value(&sval);
  }

  VALUE klass = CLASS_OF(self);
  NMATRIX* m = nm_create(nm::DENSE_STORE, reinterpret_cast<STORAGE*>(result));
  nm_register_nmatrix(m);
  VALUE to_return = Data_Wrap_Struct(klass, nm_mark, nm_delete, m);

  nm_unregister_nmatrix(m);
  nm_dense_storage_unregister(result);
  NM_CONSERVATIVE(nm_unregister_value(&self));
  NM_CONSERVATIVE(nm_unregister_value(&right));

  return to_return;

}

/*
 * map enumerator for dense matrices.
 */
VALUE nm_dense_map(VALUE self) {

  NM_CONSERVATIVE(nm_register_value(&self));

  RETURN_SIZED_ENUMERATOR_PRE
  NM_CONSERVATIVE(nm_unregister_value(&self));
  RETURN_SIZED_ENUMERATOR(self, 0, 0, nm_enumerator_length);

  DENSE_STORAGE *s = NM_STORAGE_DENSE(self);

  size_t* coords = NM_ALLOCA_N(size_t, s->dim);
  memset(coords, 0, sizeof(size_t) * s->dim);

  size_t *shape_copy = NM_ALLOC_N(size_t, s->dim);
  memcpy(shape_copy, s->shape, sizeof(size_t) * s->dim);

  size_t count = nm_storage_count_max_elements(s);

  DENSE_STORAGE* result = nm_dense_storage_create(nm::RUBYOBJ, shape_copy, s->dim, NULL, 0);

  VALUE* result_elem = reinterpret_cast<VALUE*>(result->elements);

  nm_dense_storage_register(result);

  for (size_t k = 0; k < count; ++k) {
    nm_dense_storage_coords(result, k, coords);
    size_t s_index = nm_dense_storage_pos(s, coords);

    result_elem[k] = rb_yield(NM_DTYPE(self) == nm::RUBYOBJ ? reinterpret_cast<VALUE*>(s->elements)[s_index] : nm::rubyobj_from_cval((char*)(s->elements) + s_index*DTYPE_SIZES[NM_DTYPE(self)], NM_DTYPE(self)).rval);
  }

  VALUE klass = CLASS_OF(self);

  NMATRIX* m = nm_create(nm::DENSE_STORE, reinterpret_cast<STORAGE*>(result));
  nm_register_nmatrix(m);

  VALUE to_return = Data_Wrap_Struct(klass, nm_mark, nm_delete, m);

  nm_unregister_nmatrix(m);
  nm_dense_storage_unregister(result);
  NM_CONSERVATIVE(nm_unregister_value(&self));

  return to_return;
}


/*
 * each_with_indices iterator for dense matrices.
 */
VALUE nm_dense_each_with_indices(VALUE nmatrix) {

  NM_CONSERVATIVE(nm_register_value(&nmatrix));
  
  RETURN_SIZED_ENUMERATOR_PRE
  NM_CONSERVATIVE(nm_unregister_value(&nmatrix));
  RETURN_SIZED_ENUMERATOR(nmatrix, 0, 0, nm_enumerator_length); // fourth argument only used by Ruby2+
  DENSE_STORAGE* s = NM_STORAGE_DENSE(nmatrix);

  // Create indices and initialize them to zero
  size_t* coords = NM_ALLOCA_N(size_t, s->dim);
  memset(coords, 0, sizeof(size_t) * s->dim);

  size_t slice_index;
  size_t* shape_copy = NM_ALLOC_N(size_t, s->dim);
  memcpy(shape_copy, s->shape, sizeof(size_t) * s->dim);

  DENSE_STORAGE* sliced_dummy = nm_dense_storage_create_dummy(s->dtype, shape_copy, s->dim);

  for (size_t k = 0; k < nm_storage_count_max_elements(s); ++k) {
    nm_dense_storage_coords(sliced_dummy, k, coords);
    slice_index = nm_dense_storage_pos(s, coords);
    VALUE ary = rb_ary_new();
    nm_register_value(&ary);
    if (NM_DTYPE(nmatrix) == nm::RUBYOBJ) rb_ary_push(ary, reinterpret_cast<VALUE*>(s->elements)[slice_index]);
    else rb_ary_push(ary, nm::rubyobj_from_cval((char*)(s->elements) + slice_index*DTYPE_SIZES[NM_DTYPE(nmatrix)], NM_DTYPE(nmatrix)).rval);

    for (size_t p = 0; p < s->dim; ++p) {
      rb_ary_push(ary, INT2FIX(coords[p]));
    }

    // yield the array which now consists of the value and the indices
    rb_yield(ary);
    nm_unregister_value(&ary);
  }

  nm_dense_storage_delete(sliced_dummy);

  NM_CONSERVATIVE(nm_unregister_value(&nmatrix));

  return nmatrix;

}


/*
 * Borrowed this function from NArray. Handles 'each' iteration on a dense
 * matrix.
 *
 * Additionally, handles separately matrices containing VALUEs and matrices
 * containing other types of data.
 */
VALUE nm_dense_each(VALUE nmatrix) {

  NM_CONSERVATIVE(nm_register_value(&nmatrix));

  RETURN_SIZED_ENUMERATOR_PRE
  NM_CONSERVATIVE(nm_unregister_value(&nmatrix));
  RETURN_SIZED_ENUMERATOR(nmatrix, 0, 0, nm_enumerator_length);

  DENSE_STORAGE* s = NM_STORAGE_DENSE(nmatrix);

  size_t* temp_coords = NM_ALLOCA_N(size_t, s->dim);
  size_t sliced_index;
  size_t* shape_copy = NM_ALLOC_N(size_t, s->dim);
  memcpy(shape_copy, s->shape, sizeof(size_t) * s->dim);
  DENSE_STORAGE* sliced_dummy = nm_dense_storage_create_dummy(s->dtype, shape_copy, s->dim);

  if (NM_DTYPE(nmatrix) == nm::RUBYOBJ) {

    // matrix of Ruby objects -- yield those objects directly
    for (size_t i = 0; i < nm_storage_count_max_elements(s); ++i) {
      nm_dense_storage_coords(sliced_dummy, i, temp_coords);
      sliced_index = nm_dense_storage_pos(s, temp_coords);
      rb_yield( reinterpret_cast<VALUE*>(s->elements)[sliced_index] );
    }

  } else {

    // We're going to copy the matrix element into a Ruby VALUE and then operate on it. This way user can't accidentally
    // modify it and cause a seg fault.
    for (size_t i = 0; i < nm_storage_count_max_elements(s); ++i) {
      nm_dense_storage_coords(sliced_dummy, i, temp_coords);
      sliced_index = nm_dense_storage_pos(s, temp_coords);
      VALUE v = nm::rubyobj_from_cval((char*)(s->elements) + sliced_index*DTYPE_SIZES[NM_DTYPE(nmatrix)], NM_DTYPE(nmatrix)).rval;
      rb_yield( v ); // yield to the copy we made
    }
  }

  nm_dense_storage_delete(sliced_dummy);
  NM_CONSERVATIVE(nm_unregister_value(&nmatrix));

  return nmatrix;

}


/*
 * Non-templated version of nm::dense_storage::slice_copy
 */
static void slice_copy(DENSE_STORAGE *dest, const DENSE_STORAGE *src, size_t* lengths, size_t pdest, size_t psrc, size_t n) {
  NAMED_LR_DTYPE_TEMPLATE_TABLE(slice_copy_table, nm::dense_storage::slice_copy, void, DENSE_STORAGE*, const DENSE_STORAGE*, size_t*, size_t, size_t, size_t)

  slice_copy_table[dest->dtype][src->dtype](dest, src, lengths, pdest, psrc, n);
}


/*
 * Get a slice or one element, using copying.
 *
 * FIXME: Template the first condition.
 */
void* nm_dense_storage_get(const STORAGE* storage, SLICE* slice) {
  DENSE_STORAGE* s = (DENSE_STORAGE*)storage;
  if (slice->single)
    return (char*)(s->elements) + nm_dense_storage_pos(s, slice->coords) * DTYPE_SIZES[s->dtype];
  else {
    nm_dense_storage_register(s);
    size_t *shape      = NM_ALLOC_N(size_t, s->dim);
    for (size_t i = 0; i < s->dim; ++i) {
      shape[i]  = slice->lengths[i];
    }

    DENSE_STORAGE* ns = nm_dense_storage_create(s->dtype, shape, s->dim, NULL, 0);

    slice_copy(ns,
        reinterpret_cast<const DENSE_STORAGE*>(s->src),
        slice->lengths,
        0,
        nm_dense_storage_pos(s, slice->coords),
        0);

    nm_dense_storage_unregister(s);
    return ns;
  }
}

/*
 * Get a slice or one element by reference (no copy).
 *
 * FIXME: Template the first condition.
 */
void* nm_dense_storage_ref(const STORAGE* storage, SLICE* slice) {
  DENSE_STORAGE* s = (DENSE_STORAGE*)storage;

  if (slice->single)
    return (char*)(s->elements) + nm_dense_storage_pos(s, slice->coords) * DTYPE_SIZES[s->dtype];

  else {
    nm_dense_storage_register(s);
    DENSE_STORAGE* ns = NM_ALLOC( DENSE_STORAGE );
    ns->dim        = s->dim;
    ns->dtype      = s->dtype;
    ns->offset     = NM_ALLOC_N(size_t, ns->dim);
    ns->shape      = NM_ALLOC_N(size_t, ns->dim);

    for (size_t i = 0; i < ns->dim; ++i) {
      ns->offset[i] = slice->coords[i] + s->offset[i];
      ns->shape[i]  = slice->lengths[i];
    }

    ns->stride     = s->stride;
    ns->elements   = s->elements;

    s->src->count++;
    ns->src = s->src;

    nm_dense_storage_unregister(s);
    return ns;
  }
}




/*
 * Set a value or values in a dense matrix. Requires that right be either a single value or an NMatrix (ref or real).
 */
void nm_dense_storage_set(VALUE left, SLICE* slice, VALUE right) {
  NAMED_DTYPE_TEMPLATE_TABLE(ttable, nm::dense_storage::set, void, VALUE, SLICE*, VALUE)
  nm::dtype_t dtype = NM_DTYPE(left);
  ttable[dtype](left, slice, right);
}


///////////
// Tests //
///////////

/*
 * Do these two dense matrices have the same contents?
 *
 * TODO: Test the shape of the two matrices.
 * TODO: See if using memcmp is faster when the left- and right-hand matrices
 *        have the same dtype.
 */
bool nm_dense_storage_eqeq(const STORAGE* left, const STORAGE* right) {
  LR_DTYPE_TEMPLATE_TABLE(nm::dense_storage::eqeq, bool, const DENSE_STORAGE*, const DENSE_STORAGE*)

  if (!ttable[left->dtype][right->dtype]) {
    rb_raise(nm_eDataTypeError, "comparison between these dtypes is undefined");
    return false;
  }

  return ttable[left->dtype][right->dtype]((const DENSE_STORAGE*)left, (const DENSE_STORAGE*)right);
}

/*
 * Test to see if the matrix is Hermitian.  If the matrix does not have a
 * dtype of Complex64 or Complex128 this is the same as testing for symmetry.
 */
bool nm_dense_storage_is_hermitian(const DENSE_STORAGE* mat, int lda) {
  if (mat->dtype == nm::COMPLEX64) {
    return nm::dense_storage::is_hermitian<nm::Complex64>(mat, lda);

  } else if (mat->dtype == nm::COMPLEX128) {
    return nm::dense_storage::is_hermitian<nm::Complex128>(mat, lda);

  } else {
    return nm_dense_storage_is_symmetric(mat, lda);
  }
}

/*
 * Is this dense matrix symmetric about the diagonal?
 */
bool nm_dense_storage_is_symmetric(const DENSE_STORAGE* mat, int lda) {
  DTYPE_TEMPLATE_TABLE(nm::dense_storage::is_symmetric, bool, const DENSE_STORAGE*, int);

  return ttable[mat->dtype](mat, lda);
}

//////////
// Math //
//////////


/*
 * Dense matrix-matrix multiplication.
 */
STORAGE* nm_dense_storage_matrix_multiply(const STORAGE_PAIR& casted_storage, size_t* resulting_shape, bool vector) {
  DTYPE_TEMPLATE_TABLE(nm::dense_storage::matrix_multiply, DENSE_STORAGE*, const STORAGE_PAIR& casted_storage, size_t* resulting_shape, bool vector);

  return ttable[casted_storage.left->dtype](casted_storage, resulting_shape, vector);
}

/////////////
// Utility //
/////////////

/*
 * Determine the linear array position (in elements of s) of some set of coordinates
 * (given by slice).
 */
size_t nm_dense_storage_pos(const DENSE_STORAGE* s, const size_t* coords) {
  size_t pos = 0;

  for (size_t i = 0; i < s->dim; ++i)
    pos += (coords[i] + s->offset[i]) * s->stride[i];

  return pos;

}

/*
 * Determine the a set of slice coordinates from linear array position (in elements
 * of s) of some set of coordinates (given by slice).  (Inverse of
 * nm_dense_storage_pos).
 *
 * The parameter coords_out should be a pre-allocated array of size equal to s->dim.
 */
void nm_dense_storage_coords(const DENSE_STORAGE* s, const size_t slice_pos, size_t* coords_out) {

  size_t temp_pos = slice_pos;

  for (size_t i = 0; i < s->dim; ++i) {
    coords_out[i] = (temp_pos - temp_pos % s->stride[i])/s->stride[i] - s->offset[i];
    temp_pos = temp_pos % s->stride[i];
  }
}

/*
 * Calculate the stride length.
 */
static size_t* stride(size_t* shape, size_t dim) {
  size_t i, j;
  size_t* stride = NM_ALLOC_N(size_t, dim);

  for (i = 0; i < dim; ++i) {
    stride[i] = 1;
    for (j = i+1; j < dim; ++j) {
      stride[i] *= shape[j];
    }
  }

  return stride;
}


/////////////////////////
// Copying and Casting //
/////////////////////////

/*
 * Copy dense storage, changing dtype if necessary.
 */
STORAGE* nm_dense_storage_cast_copy(const STORAGE* rhs, nm::dtype_t new_dtype, void* dummy) {
  NAMED_LR_DTYPE_TEMPLATE_TABLE(ttable, nm::dense_storage::cast_copy, DENSE_STORAGE*, const DENSE_STORAGE* rhs, nm::dtype_t new_dtype);

  if (!ttable[new_dtype][rhs->dtype]) {
    rb_raise(nm_eDataTypeError, "cast between these dtypes is undefined");
    return NULL;
  }

  return (STORAGE*)ttable[new_dtype][rhs->dtype]((DENSE_STORAGE*)rhs, new_dtype);
}

/*
 * Copy dense storage without a change in dtype.
 */
DENSE_STORAGE* nm_dense_storage_copy(const DENSE_STORAGE* rhs) {
  nm_dense_storage_register(rhs);

  size_t  count = 0;
  size_t *shape  = NM_ALLOC_N(size_t, rhs->dim);

  // copy shape and offset
  for (size_t i = 0; i < rhs->dim; ++i) {
    shape[i]  = rhs->shape[i];
  }

  DENSE_STORAGE* lhs = nm_dense_storage_create(rhs->dtype, shape, rhs->dim, NULL, 0);
  count = nm_storage_count_max_elements(lhs);


  // Ensure that allocation worked before copying.
  if (lhs && count) {
    if (rhs == rhs->src) // not a reference
      memcpy(lhs->elements, rhs->elements, DTYPE_SIZES[rhs->dtype] * count);
    else { // slice whole matrix
      nm_dense_storage_register(lhs);
      size_t *offset = NM_ALLOC_N(size_t, rhs->dim);
      memset(offset, 0, sizeof(size_t) * rhs->dim);

      slice_copy(lhs,
           reinterpret_cast<const DENSE_STORAGE*>(rhs->src),
           rhs->shape,
           0,
           nm_dense_storage_pos(rhs, offset),
           0);

      nm_dense_storage_unregister(lhs);
    }
  }

  nm_dense_storage_unregister(rhs);

  return lhs;
}


/*
 * Transpose dense storage into a new dense storage object. Basically a copy constructor.
 *
 * Not much point in templating this as it's pretty straight-forward.
 */
STORAGE* nm_dense_storage_copy_transposed(const STORAGE* rhs_base) {
  DENSE_STORAGE* rhs = (DENSE_STORAGE*)rhs_base;

  nm_dense_storage_register(rhs);
  size_t *shape = NM_ALLOC_N(size_t, rhs->dim);

  // swap shape
  shape[0] = rhs->shape[1];
  shape[1] = rhs->shape[0];

  DENSE_STORAGE *lhs = nm_dense_storage_create(rhs->dtype, shape, rhs->dim, NULL, 0);

  nm_dense_storage_register(lhs);

  if (rhs_base->src == rhs_base) {
    nm_math_transpose_generic(rhs->shape[0], rhs->shape[1], rhs->elements, rhs->shape[1], lhs->elements, lhs->shape[1], DTYPE_SIZES[rhs->dtype]);
  } else {
    NAMED_LR_DTYPE_TEMPLATE_TABLE(ttable, nm::dense_storage::ref_slice_copy_transposed, void, const DENSE_STORAGE* rhs, DENSE_STORAGE* lhs);

    if (!ttable[lhs->dtype][rhs->dtype]) {
      nm_dense_storage_unregister(rhs);
      nm_dense_storage_unregister(lhs);      
      rb_raise(nm_eDataTypeError, "transposition between these dtypes is undefined");
    }

    ttable[lhs->dtype][rhs->dtype](rhs, lhs);
  }

  nm_dense_storage_unregister(rhs);
  nm_dense_storage_unregister(lhs);

  return (STORAGE*)lhs;
}

} // end of extern "C" block

namespace nm {

/*
 * Used for slice setting. Takes the right-hand of the equal sign, a single VALUE, and massages
 * it into the correct form if it's not already there (dtype, non-ref, dense). Returns a pair of the NMATRIX* and a
 * boolean. If the boolean is true, the calling function is responsible for calling nm_delete on the NMATRIX*.
 * Otherwise, the NMATRIX* still belongs to Ruby and Ruby will free it.
 */
std::pair<NMATRIX*,bool> interpret_arg_as_dense_nmatrix(VALUE right, nm::dtype_t dtype) {
  NM_CONSERVATIVE(nm_register_value(&right));
  if (IsNMatrixType(right)) {
    NMATRIX *r;
    if (NM_STYPE(right) != DENSE_STORE || NM_DTYPE(right) != dtype || NM_SRC(right) != NM_STORAGE(right)) {
      UnwrapNMatrix( right, r );
      NMATRIX* ldtype_r = nm_cast_with_ctype_args(r, nm::DENSE_STORE, dtype, NULL);
      NM_CONSERVATIVE(nm_unregister_value(&right));
      return std::make_pair(ldtype_r,true);
    } else {  // simple case -- right-hand matrix is dense and is not a reference and has same dtype
      UnwrapNMatrix( right, r );
      NM_CONSERVATIVE(nm_unregister_value(&right));
      return std::make_pair(r, false);
    }
    // Do not set v_alloc = true for either of these. It is the responsibility of r/ldtype_r
  } else if (RB_TYPE_P(right, T_DATA)) {
    NM_CONSERVATIVE(nm_unregister_value(&right));
    rb_raise(rb_eTypeError, "unrecognized type for slice assignment");
  }

  NM_CONSERVATIVE(nm_unregister_value(&right));
  return std::make_pair<NMATRIX*,bool>(NULL, false);
}


namespace dense_storage {

/////////////////////////
// Templated Functions //
/////////////////////////

template<typename LDType, typename RDType>
void ref_slice_copy_transposed(const DENSE_STORAGE* rhs, DENSE_STORAGE* lhs) {

  nm_dense_storage_register(rhs);
  nm_dense_storage_register(lhs);

  LDType* lhs_els = reinterpret_cast<LDType*>(lhs->elements);
  RDType* rhs_els = reinterpret_cast<RDType*>(rhs->elements);
  size_t count = nm_storage_count_max_elements(lhs);;
  size_t* temp_coords = NM_ALLOCA_N(size_t, lhs->dim);
  size_t coord_swap_temp;

  while (count-- > 0) {
    nm_dense_storage_coords(lhs, count, temp_coords);
    NM_SWAP(temp_coords[0], temp_coords[1], coord_swap_temp);
    size_t r_coord = nm_dense_storage_pos(rhs, temp_coords);
    lhs_els[count] = rhs_els[r_coord];
  }

  nm_dense_storage_unregister(rhs);
  nm_dense_storage_unregister(lhs);

}

template <typename LDType, typename RDType>
DENSE_STORAGE* cast_copy(const DENSE_STORAGE* rhs, dtype_t new_dtype) {
  nm_dense_storage_register(rhs);

  size_t  count = nm_storage_count_max_elements(rhs);

  size_t *shape = NM_ALLOC_N(size_t, rhs->dim);
  memcpy(shape, rhs->shape, sizeof(size_t) * rhs->dim);

  DENSE_STORAGE* lhs = nm_dense_storage_create(new_dtype, shape, rhs->dim, NULL, 0);

  nm_dense_storage_register(lhs);

  // Ensure that allocation worked before copying.
  if (lhs && count) {
    if (rhs->src != rhs) { // Make a copy of a ref to a matrix.
      size_t* offset      = NM_ALLOCA_N(size_t, rhs->dim);
      memset(offset, 0, sizeof(size_t) * rhs->dim);

      slice_copy(lhs, reinterpret_cast<const DENSE_STORAGE*>(rhs->src),
                 rhs->shape, 0,
                 nm_dense_storage_pos(rhs, offset), 0);

    } else {              // Make a regular copy.
      RDType* rhs_els          = reinterpret_cast<RDType*>(rhs->elements);
      LDType* lhs_els          = reinterpret_cast<LDType*>(lhs->elements);

      for (size_t i = 0; i < count; ++i)
        lhs_els[i] = rhs_els[i];
    }
  }

  nm_dense_storage_unregister(rhs);
  nm_dense_storage_unregister(lhs);

  return lhs;
}

template <typename LDType, typename RDType>
bool eqeq(const DENSE_STORAGE* left, const DENSE_STORAGE* right) {
  nm_dense_storage_register(left);
  nm_dense_storage_register(right);

  size_t index;
  DENSE_STORAGE *tmp1, *tmp2;
  tmp1 = NULL; tmp2 = NULL;
  bool result = true;
  /* FIXME: Very strange behavior! The GC calls the method directly with non-initialized data. */

  LDType* left_elements    = (LDType*)left->elements;
  RDType* right_elements  = (RDType*)right->elements;

  // Copy elements in temp matrix if you have reference to the right.
  if (left->src != left) {
    tmp1 = nm_dense_storage_copy(left);
    nm_dense_storage_register(tmp1);
    left_elements = (LDType*)tmp1->elements;
  }
  if (right->src != right) {
    tmp2 = nm_dense_storage_copy(right);
    nm_dense_storage_register(tmp2);
    right_elements = (RDType*)tmp2->elements;
  }



  for (index = nm_storage_count_max_elements(left); index-- > 0;) {
    if (left_elements[index] != right_elements[index]) {
      result = false;
      break;
    }
  }

  if (tmp1) {
    nm_dense_storage_unregister(tmp1);
    NM_FREE(tmp1);
  }
  if (tmp2) {
    nm_dense_storage_unregister(tmp2);
    NM_FREE(tmp2);
  }

  nm_dense_storage_unregister(left);
  nm_dense_storage_unregister(right);
  return result;
}

template <typename DType>
bool is_hermitian(const DENSE_STORAGE* mat, int lda) {
  unsigned int i, j;
  DType complex_conj;

  const DType* els = (DType*) mat->elements;

  for (i = mat->shape[0]; i-- > 0;) {
    for (j = i + 1; j < mat->shape[1]; ++j) {
      complex_conj    = els[j*lda + i];
      complex_conj.i  = -complex_conj.i;

      if (els[i*lda+j] != complex_conj) {
        return false;
      }
    }
  }

  return true;
}

template <typename DType>
bool is_symmetric(const DENSE_STORAGE* mat, int lda) {
  unsigned int i, j;
  const DType* els = (DType*) mat->elements;

  for (i = mat->shape[0]; i-- > 0;) {
    for (j = i + 1; j < mat->shape[1]; ++j) {
      if (els[i*lda+j] != els[j*lda+i]) {
        return false;
      }
    }
  }

  return true;
}



/*
 * DType-templated matrix-matrix multiplication for dense storage.
 */
template <typename DType>
static DENSE_STORAGE* matrix_multiply(const STORAGE_PAIR& casted_storage, size_t* resulting_shape, bool vector) {
  DENSE_STORAGE *left  = (DENSE_STORAGE*)(casted_storage.left),
                *right = (DENSE_STORAGE*)(casted_storage.right);

  nm_dense_storage_register(left);
  nm_dense_storage_register(right);

  // Create result storage.
  DENSE_STORAGE* result = nm_dense_storage_create(left->dtype, resulting_shape, 2, NULL, 0);

  nm_dense_storage_register(result);

  DType *pAlpha = NM_ALLOCA_N(DType, 1),
        *pBeta  = NM_ALLOCA_N(DType, 1);

  *pAlpha = 1;
  *pBeta = 0;
  // Do the multiplication
  if (vector) nm::math::gemv<DType>(CblasNoTrans, left->shape[0], left->shape[1], pAlpha,
                                    reinterpret_cast<DType*>(left->elements), left->shape[1],
                                    reinterpret_cast<DType*>(right->elements), 1, pBeta,
                                    reinterpret_cast<DType*>(result->elements), 1);
  else        nm::math::gemm<DType>(CblasRowMajor, CblasNoTrans, CblasNoTrans, left->shape[0], right->shape[1], left->shape[1],
                                    pAlpha, reinterpret_cast<DType*>(left->elements), left->shape[1],
                                    reinterpret_cast<DType*>(right->elements), right->shape[1], pBeta,
                                    reinterpret_cast<DType*>(result->elements), result->shape[1]);


  nm_dense_storage_unregister(left);
  nm_dense_storage_unregister(right);
  nm_dense_storage_unregister(result);

  return result;
}

}} // end of namespace nm::dense_storage