bin/msplinter
#!/usr/bin/env ruby
require 'trollop'
require 'rubabel'
require 'rubabel/molecule/fragmentable'
require 'pry'
require 'mspire/spectrum'
require 'mspire/mzml'
default_ph = 2.5
Fragment = Struct.new(:frag, :id, :title, :mz, :mass, :charge, :smiles, :pairing)
progname = File.basename(__FILE__)
parser = Trollop::Parser.new do
banner "usage: #{progname} [OPTIONS|RULES] <SMARTS> ..."
text "\noptions:"
opt :ph, "the pH to use (experimental option)", :default => default_ph
opt :images, "print out svg images of fragments"
opt :format, "format of the molecules", :default => 'smiles'
opt :lmids, "take a list of lmids", type: :strings
opt :mzml, "output mzml file of the fragmentation spectra"
opt :mgf, "output mgf file of the fragmentation spectra"
opt :randomize_intensity, "Outputs randomized intensity values in mzml"
opt :adducts, "Accepts adducts", type: :strings
opt :outdir, "File outputs are redirects to specified directory", type: :string
text "Possible adducts are: #{Rubabel::Molecule::ADDUCTS_LEGEND.keys.join(',')}"
#opt :uniq, "no repeated fragments", :default => false
text "\nrules:"
Rubabel::Molecule::Fragmentable::RULES.each do |rule|
# opt rule, rule.to_s.gsub("_",' ')
end
text "\nexample:"
text " #{progname} -xeh 'CCC(=O)OCCC' 'CCC(=O)OCCC(=O)O'"
end
options = parser.parse(ARGV)
opts = {rules: []}
opts[:uniq] = options.delete(:uniq)
ph = options.delete(:ph)
opts[:rules] = Rubabel::Molecule::Fragmentable::RULES.map do |rule|
rule if options["#{rule}_given".to_sym]
end.compact
# parse adduct names into chemical formulas
options[:adducts].map! {|a| Rubabel::Molecule::ADDUCTS_LEGEND[a.to_sym]}.compact if options[:adducts]
if ARGV.size == 0 and options[:lmids].nil?
parser.educate && exit
end
if options[:format] == 'smiles'
ARGV.each do |smiles|
mol = Rubabel[smiles, options[:format].to_sym]
puts "\nmolecule: #{mol.csmiles}"
mol.correct_for_ph!(ph)
puts "at ph #{ph}: #{mol.csmiles}"
fragment_sets = mol.fragment(opts)
puts %w(mz mass charge title smiles pairing).join("\t")
frags = []
fragment_sets.each_with_index do |frag_set,i|
frag_set.each_with_index do |frag,j|
unless frag.charge == 0
mz = (frag.mass / frag.charge).round(5)
end
frag.title = "#{i}-#{j}pair_" + (mz ? "#{mz}_mz" : "#{frag.mass.round(3)}_Mass")
frag_obj = Fragment.new(frag, frag.title, frag.title, mz, frag.exact_mass, frag.charge, frag.csmiles, i)
frags << frag_obj
end
end
frags = frags.sort_by {|frag| [-frag.charge, frag.mz] }
if options[:images]
frags.each do |frag|
fn = "#{frag.title}.svg"
frag.frag.write(fn)
end
end
frags.each do |frag|
puts [:mz, :mass, :charge, :title, :smiles, :pairing].map {|cat| frag.send(cat) }.join("\t")
end
end
end
if options[:lmids]
options[:lmids].map do |lmid|
begin
m = Rubabel[lmid, :lmid]
rescue
next
end
m.adducts.push(*options[:adducts]) if options[:adducts]
mols = m.fragment
masses = mols.flatten.map(&:mass_with_adduct).flatten.sort
intensities = Array.new(masses.size) { options[:randomize_intensity] ? rand(1000) : 1000 }
spectra = Mspire::Mzml::Spectrum.from_arrays(0, [masses, intensities] )
if options[:mzml]
outfile = lmid + '.mzML'
outfile = File.join(options[:outdir],outfile) if options[:outdir]
puts "writing outfile: #{outfile}"
require 'mspire/mzml'
file = Mspire::Mzml.new do |mzml|
mzml.id = 'ms1_and_ms2'
mzml.cvs = Mspire::Mzml::CV::DEFAULT_CVS
mzml.file_description = Mspire::Mzml::FileDescription.new do |fd|
fd.file_content = Mspire::Mzml::FileContent.new
fd.source_files << Mspire::Mzml::SourceFile.new
end
default_instrument_config = Mspire::Mzml::InstrumentConfiguration.new("IC").describe!('MS:1000031')
mzml.instrument_configurations << default_instrument_config
mspire = Mspire::Mzml::Software.new
mzml.software_list.push(mspire).uniq_by(&:id)
spectra_generation = Mspire::Mzml::DataProcessing.new("msplinter fragmentation prediction") do |dp|
# "MS:1001458" -> spectrum_generation_information
dp.processing_methods << Mspire::Mzml::ProcessingMethod.new(mspire).describe!('MS:1001458')
end
mzml.data_processing_list << spectra_generation
mzml.run = Mspire::Mzml::Run.new("artificial_run", default_instrument_config) do |run|
spectrum_list = Mspire::Mzml::SpectrumList.new(spectra_generation, [spectra])
run.spectrum_list = spectrum_list
end
end
file.write(outfile)
end #mzml
if options[:mgf]
outfile = lmid + '.mgf'
outfile = File.join(options[:outdir],outfile) if options[:outdir]
puts "writing outfile: #{outfile}"
File.open(outfile,'w') do |io|
io.puts "BEGIN IONS"
io.puts "TITLE=#{lmid}"
io.puts "PEPMASS=#{m.mass_with_adduct}"
io.puts "PEAK_COUNT=#{masses.size}"
io.puts "COMMENT=#{lmid}"
masses.zip(intensities).map {|r| io.puts r.join(" ")}
io.puts "END IONS"
end
end #MGF
end #LMID loop
end