princelab/mspire

View on GitHub
lib/mspire/mzml/data_array.rb

Summary

Maintainability
B
6 hrs
Test Coverage
require 'base64'
require 'zlib'
require 'mspire/paramable'

module Mspire
  class Mzml
  end
end


# Mspire::Mzml::DataArray's are currently implemented as a standard Ruby
# array.  Data may be input or output with less precision, but a standard data
# array will be accessible from ruby as Float (float64).  Thus, the params
# merely alter what will be output to xml, so, to alter what is written with
# to_xml, change the params.  If no params are changed in the data array it
# will be written with the same precision as it was read in with.
class Mspire::Mzml::DataArray < Array 
  alias_method :array_init, :initialize
  include Mspire::Paramable

  DEFAULT_DTYPE_ACC = 'MS:1000523' # float64
  DEFAULT_COMPRESSION_ACC = 'MS:1000574'
  COMPRESSION_ACC = 'MS:1000574'
  NO_COMPRESSION_ACC = 'MS:1000576'
  ACC_TO_DTYPE = {
    'MS:1000523' => :float64,
    'MS:1000521' => :float32,
    #'MS:1000520' => :float16,  # <- not supported w/o other gems
    'MS:1000522' => :int64, # signed
    'MS:1000519' => :int32, # signed
  }
  ACC_TO_UNPACK_CODE = {
    'MS:1000523' => 'E*',
    'MS:1000521' => 'e*',
    #'MS:1000520' => :float16,  # <- not supported w/o other gems
    'MS:1000522' => 'q<*',
    'MS:1000519' => 'l<*',
  }

  DTYPE_ACCS = ACC_TO_DTYPE.keys

  # unless data type (see DTYPE_TO_ACC) or TYPE
  def initialize(*args)
    params_init # paramable
    array_init(*args)
  end

  # (optional) the DataProcessing object associated with this DataArray
  attr_accessor :data_processing

  # set this if the data is written to an external file (such as the ibd
  # file for imzML files)
  attr_accessor :external

  # returns a DataArray object. Analogous to [] for creating an array.
  def self.[](*data)
    self.new(data)
  end

  # returns an array of DataArray objects based on the given arrays
  def self.from_arrays(arrays)
    arrays.map {|ar| self.new(ar) }
  end

    # returns an array of DataArray objects (2)
  def self.empty_data_arrays(num=2)
    Array.new(num) { self.new }
  end

  def self.data_arrays_from_xml(xml, link)
    data_arrays = xml.children.map do |binary_data_array_n|
      Mspire::Mzml::DataArray.from_xml(binary_data_array_n, link)
    end
    (data_arrays.size > 0) ? data_arrays : empty_data_arrays
  end

  def self.from_xml(xml, link)
    da = self.new 
    binary_n = da.describe_from_xml!(xml, link[:ref_hash])

    if (dp_id = xml[:dataProcessingRef])
      da.data_processing = link[:data_processing_hash][dp_id]
    end

    zlib_compression = nil
    precision_unpack = nil
    # could also implement with set or hash lookup (need to test for
    # speed)
    da.each_accessionable_param do |param|
      acc = param.accession
      unless zlib_compression || zlib_compression == false
        case acc
        when 'MS:1000574' then zlib_compression = true
        when 'MS:1000576' then zlib_compression = false
        end
      end
      unless precision_unpack
        case acc
        when 'MS:1000523' then precision_unpack = 'E*'
        when 'MS:1000521' then precision_unpack = 'e*'
        end
      end
    end

    data = binary_n.text.unpack("m*").first

    # some implementations leave data blank if there aren't peaks
    # even if they say it is zlib compressed...
    unzipped = 
      if data.size > 0 then ( zlib_compression ? Zlib::Inflate.inflate(data) : data )
      else data end
    da.replace( unzipped.unpack(precision_unpack) )
    da
  end

  # creates a base64 binary string based on the objects params.  If no dtype
  # or compression are specified, then it will be set (i.e., params added to
  # the object).
  def to_binary
    # single pass over params for speed
    pack_code = nil
    compression = nil
    each_accessionable_param do |param|
      acc = param.accession
      if !pack_code && (code=ACC_TO_UNPACK_CODE[acc])
        pack_code = code
      end
      if compression.nil?
        compression = 
          case acc
          when COMPRESSION_ACC then true
          when NO_COMPRESSION_ACC then false
          end
      end
    end
    # can speed these up:
    unless pack_code
      describe! DEFAULT_DTYPE_ACC
      pack_code = ACC_TO_UNPACK_CODE[DEFAULT_DTYPE_ACC]
    end
    if compression.nil?
      describe! DEFAULT_COMPRESSION_ACC
      compression = 
        case DEFAULT_COMPRESSION_ACC
        when COMPRESSION_ACC then true
        when NO_COMPRESSION_ACC then false
        end
    end

    # TODO: support faster pack method with nmatrix or narray
    string = self.pack(pack_code) 
    string = Zlib::Deflate.deflate(string) if compression
    Base64.strict_encode64(string)
  end

  # will set the data type to DEFAULT_DTYPE and compression if n
  def to_xml(builder)
    encoded_length = 
      if @external
        0
      else
        base64 = to_binary
        base64.bytesize
      end

    builder.binaryDataArray(encodedLength: encoded_length) do |bda_n|
      super(bda_n)
      bda_n.binary(base64) unless self.external
    end
  end

  # takes an array of DataArray objects or other kinds of objects
  def self.list_xml(arrays, builder)
    builder.binaryDataArrayList(count: arrays.size) do |bdal_n|
      arrays.each do |ar|
        ar.to_xml(bdal_n)
      end
    end
  end
end