lib/mspire/mzml.rb
require 'mspire'
require 'builder'
require 'core_ext/enumerable'
require 'mspire/mzml/reader'
require 'mspire/mzml/scan_settings'
module Mspire
# Reading an mzml file:
#
# Mspire::Mzml.open("somefile.mzML") do |mzml|
# mzml.each do |spectrum|
# scan = spectrum.scan
# spectrum.mzs # array of m/zs
# spectrum.intensities # array of intensities
# spectrum.peaks do |mz,intensity|
# puts "mz: #{mz} intensity: #{intensity}"
# end
#
# spectrum.params # list all the params associated with an object
#
# # true if key exists and no value, the value if present, or false
# if spectrum.fetch_by_acc('MS:1000128')
# puts "this is a profile spectrum!"
# end
#
# if spectrum.ms_level == 2
# low_mz = spectrum.scan_list.first.scan_windows.first.to_i
# puts "begin scan at #{low_mz} m/z"
# end
# end
#
# mzml.each_chromatogram do |chrm|
# chrm.times
# chrm.intensities
# end
# end
#
# Note that the mzml object supports random spectrum access (even if the
# mzml was not indexed):
#
# mzml[22] # retrieve spectrum at index 22
#
# Writing an mzml file from scratch:
#
# spec1 = Mspire::Mzml::Spectrum.new('scan=1') do |spec|
# # profile and ms_level 1
# spec.describe_many!(['MS:1000128', ['MS:1000511', 1]])
# spec.data_arrays = [
# Mspire::Mzml::DataArray[1,2,3].describe!('MS:1000514'),
# Mspire::Mzml::DataArray[4,5,6].describe!('MS:1000515')
# ]
# spec.scan_list = Mspire::Mzml::ScanList.new do |sl|
# scan = Mspire::Mzml::Scan.new do |scan|
# # retention time of 42 seconds
# scan.describe! 'MS:1000016', 40.0, 'UO:0000010'
# end
# sl << scan
# end
# end
#
# spec2 = Mspire::Mzml::Spectrum.new('scan=2') do |spec|
# # centroid, ms_level 2, MSn spectrum,
# spec.describe_many!(['MS:1000127', ['MS:1000511', 2], "MS:1000580"])
# spec.data_arrays = [
# Mspire::Mzml::DataArray[1,2,3.5].describe!('MS:1000514'),
# Mspire::Mzml::DataArray[5,6,5].describe!('MS:1000515')
# ]
# spec.scan_list = Mspire::Mzml::ScanList.new do |sl|
# scan = Mspire::Mzml::Scan.new do |scan|
# # retention time of 42 seconds
# scan.describe! 'MS:1000016', 45.0, 'UO:0000010'
# end
# sl << scan
# end
# precursor = Mspire::Mzml::Precursor.new( spec1.id )
# si = Mspire::Mzml::SelectedIon.new
# # the selected ion m/z:
# si.describe! "MS:1000744", 2.0
# # the selected ion charge state
# si.describe! "MS:1000041", 2
# # the selected ion intensity
# si.describe! "MS:1000042", 5
# precursor.selected_ions = [si]
# spec.precursors = [precursor]
# end
#
# mzml = Mspire::Mzml.new do |mzml|
# mzml.id = 'ms1_and_ms2'
# mzml.cvs = Mspire::Mzml::CV::DEFAULT_CVS
# mzml.file_description = Mspire::Mzml::FileDescription.new do |fd|
# fd.file_content = Mspire::Mzml::FileContent.new
# fd.source_files << Mspire::Mzml::SourceFile.new
# end
# default_instrument_config = Mspire::Mzml::InstrumentConfiguration.new("IC").describe!('MS:1000031')
# mzml.instrument_configurations << default_instrument_config
# software = Mspire::Mzml::Software.new
# mzml.software_list << software
# default_data_processing = Mspire::Mzml::DataProcessing.new("did_nothing")
# mzml.data_processing_list << default_data_processing
# mzml.run = Mspire::Mzml::Run.new("little_run", default_instrument_config) do |run|
# spectrum_list = Mspire::Mzml::SpectrumList.new(default_data_processing, [spec1, spec2])
# run.spectrum_list = spectrum_list
# end
# end
#
# mzml.write("writtenxml.mzML")
class Mzml
include Enumerable # each_spectrum
class << self
# read-only right now
def open(filename, &block)
File.open(filename) do |io|
block.call(self.new(io))
end
end
def foreach(filename, &block)
return to_enum(__method__, filename) unless block
open(filename) do |mzml|
mzml.each(&block)
end
end
end
module Default
NAMESPACE = {
:xmlns => "http://psi.hupo.org/ms/mzml",
"xmlns:xsi" => "http://www.w3.org/2001/XMLSchema-instance",
"xmlns:xsd" => "http://www.w3.org/2001/XMLSchema",
"xsi:schemaLocation" => "http://psi.hupo.org/ms/mzml http://psidev.info/files/ms/mzML/xsd/mzML1.1.0.xsd"
}
VERSION = '1.1.0'
end
###############################################
# ATTRIBUTES
###############################################
# (optional) an id for accessing from external files
attr_accessor :id
# (required) the Mzml document version
attr_accessor :version
# (optional) e.g. a PRIDE accession number
attr_accessor :accession
###############################################
# SUBELEMENTS
###############################################
# (required) an array of Mspire::Mzml::CV objects
attr_accessor :cvs
# (required) an Mspire::Mzml::FileDescription
attr_accessor :file_description
# (optional) an array of CV::ReferenceableParamGroup objects
attr_accessor :referenceable_param_groups
# (optional) an array of Mspire::Mzml::Sample objects
attr_accessor :samples
# (required) an array of Mspire::Mzml::Software objects
attr_accessor :software_list
# (optional) an array of Mspire::Mzml::ScanSettings objects
attr_accessor :scan_settings_list
# (required) an array of Mspire::Mzml::InstrumentConfiguration objects
attr_accessor :instrument_configurations
# (required) an array of Mspire::Mzml::DataProcessing objects
attr_accessor :data_processing_list
# (required) an Mspire::Mzml::Run object
attr_accessor :run
# the io object of the mzml file
attr_accessor :io
# Mspire::Mzml::IndexList object associated with the file (only expected when reading
# mzml files at the moment)
attr_accessor :index_list
# xml file encoding
attr_accessor :encoding
# arg must be an IO object for automatic index and header parsing to
# occur. If arg is a hash, then attributes are set. In addition (or
# alternatively) a block called that yields self to setup the object.
#
# io must respond_to?(:size), giving the size of the io object in bytes
# which allows seeking. get_index_list is called to get or create the
# index list.
def initialize(arg=nil, &block)
%w(cvs software_list instrument_configurations samples data_processing_list).each {|guy| self.send( guy + '=', [] ) }
case arg
when IO
set_from_xml_io!(arg)
when Hash
arg.each {|k,v| self.send("#{k}=", v) }
end
block.call(self) if block
end
module Convenience
def each_chromatogram(&block)
@run.chromatogram_list.each(&block)
end
def each_spectrum(&block)
@run.spectrum_list.each(&block)
end
alias_method :each, :each_spectrum
# @param [Object] arg an index number (Integer) or id string (String)
# @return [Mspire::Mzml::Spectrum] a spectrum object
def spectrum(arg)
run.spectrum_list[arg]
end
alias_method :'[]', :spectrum
# @param [Object] arg an index number (Integer) or id string (String)
# @return [Mspire::Mzml::Chromatogram] a spectrum object
def chromatogram(arg)
run.chromatogram_list[arg]
end
def num_chromatograms
run.chromatogram_list.size
end
# returns the number of spectra
def length
run.spectrum_list.size
end
alias_method :size, :length
# @param [Integer] scan_num the scan number
# @return [Mspire::Spectrum] a spectrum object, or nil if not found
# @raise [ScanNumbersNotUnique] if scan numbers are not unique
# @raise [ScanNumbersNotFound] if spectra exist but scan numbers were not
# found
def spectrum_from_scan_num(scan_num)
@scan_to_index ||= @index_list[0].create_scan_to_index
raise ScanNumbersNotUnique if @scan_to_index == false
raise ScanNumbersNotFound if @scan_to_index == nil
spectrum(@scan_to_index[scan_num])
end
end
include Convenience
# Because mzml files are often very large, we try to avoid storing the
# entire object tree in memory before writing.
#
# takes a filename and uses builder to write to it
# if no filename is given, returns a string
def to_xml(filename=nil)
# TODO: support indexed mzml files
io = filename ? File.open(filename, 'w') : StringIO.new
xml = Builder::XmlMarkup.new(:target => io, :indent => 2)
xml.instruct!
mzml_atts = Default::NAMESPACE.dup
mzml_atts[:version] = @version || Default::VERSION
mzml_atts[:accession] = @accession if @accession
mzml_atts[:id] = @id if @id
xml.mzML(mzml_atts) do |mzml_n|
# the 'if' statements capture whether or not the list is required or not
raise "#{self.class}#cvs must have > 0 Mspire::Mzml::CV objects" unless @cvs.size > 0
Mspire::Mzml::CV.list_xml(@cvs, mzml_n)
@file_description.to_xml(mzml_n)
if @referenceable_param_groups
Mspire::Mzml::ReferenceableParamGroup.list_xml(@referenceable_param_groups, mzml_n)
end
if @samples && @samples.size > 0
Mspire::Mzml::Sample.list_xml(@samples, mzml_n)
end
Mspire::Mzml::Software.list_xml(@software_list, mzml_n)
if @scan_settings_list && @scan_settings_list.size > 0
Mspire::Mzml::ScanSettings.list_xml(@scan_settings_list, mzml_n)
end
icl = Mspire::Mzml::InstrumentConfiguration.list_xml(@instrument_configurations, mzml_n)
Mspire::Mzml::DataProcessing.list_xml(@data_processing_list, mzml_n)
@run.to_xml(mzml_n)
end
if filename
io.close
self
else
io.string
end
end
alias_method :write, :to_xml
class ScanNumbersNotUnique < Exception
end
class ScanNumbersNotFound < Exception
end
end
end