SciRuby/gnuplotrb

View on GitHub
lib/gnuplotrb/fit.rb

Summary

Maintainability
A
35 mins
Test Coverage
module GnuplotRB
  ##
  # Contains methods relating to Gnuplot's fit function. Covered in
  # {fit notebook}[http://nbviewer.ipython.org/github/dilcom/gnuplotrb/blob/master/notebooks/fitting_data.ipynb].
  #
  # You can also see original gnuplot's fit in
  # {gnuplot doc}[http://www.gnuplot.info/docs_5.0/gnuplot.pdf] p. 122.
  module Fit
    ##
    # Fit given data with function.
    #
    # Fit waits for output from gnuplot Settings.max_fit_delay and throw exception if gets nothing.
    # One can change this value in order to wait longer (if huge datasets is fitted).
    #
    # @param data [#to_gnuplot_points] method accepts the same sources as Dataset.new
    #   and Dataset object
    # @param :function [String] function to fit data with
    # @param :initials [Hash] initial values for coefficients used in fitting
    # @param :term_options [Hash] terminal options that should be setted to terminal before fit.
    #   You can see them in Plot's documentation (or even better in gnuplot doc)
    #   Most useful here are ranges (xrange, yrange etc) and fit option which tunes fit parameters
    #   (see {gnuplot doc}[http://www.gnuplot.info/docs_5.0/gnuplot.pdf] p. 122)
    # @param options [Hash] options passed to Gnuplot's fit such as *using*. They are covered in
    #   {gnuplot doc}[http://www.gnuplot.info/docs_5.0/gnuplot.pdf] (pp. 69-74)
    #
    # @return [Hash] hash with four elements:
    #   - :formula_ds - dataset with best fit curve as data
    #   - :coefficients - hash of calculated coefficients. So if you gave
    #     ``{ initials: {a: 1, b: 1, c: 1} }`` it will return hash with keys :a, :b, :c and its values
    #   - :deltas - Gnuplot calculates possible deltas for coefficients during fitting and
    #     deltas hash contains this deltas
    #   - :data - pointer to Datablock with given data
    # @example
    #   fit(some_data, function: 'exp(a/x)', initials: {a: 10}, term_option: { xrange: 1..100 })
    #   fit(some_dataset, using: '1:2:3')
    def fit(data, function: 'a2*x*x+a1*x+a0', initials: { a2: 1, a1: 1, a0: 1 }, term_options: {}, **options)
      dataset = data.is_a?(Dataset) ? Dataset.new(data.data) : Dataset.new(data)
      opts_str = OptionHandling.ruby_class_to_gnuplot(options)
      output = gnuplot_fit(function, dataset, opts_str, initials, term_options)
      res = parse_output(initials.keys, function, output)
      {
        formula_ds: Dataset.new(res[2], title: 'Fit formula'),
        coefficients: res[0],
        deltas: res[1],
        data: dataset
      }
    end

    ##
    # Shortcut for fit with polynomial. Degree here is max power of *x* in polynomial.
    #
    # @param data [#to_gnuplot_points] method accepts the same sources as Dataset.new
    #   and Dataset object
    # @param :degree [Integer] degree of polynomial
    # @param options [Hash] all of this options will be passed to #fit so you
    #   can set here any options listed in its docs. If you pass here :initials hash, it
    #   will be merged into default initals hash. Formula by default is
    #   "xn*x**n + ... + x0*x**0", initials by default "{ an: 1, ..., a0: 1 }"
    #
    # @return [Hash] hash with four elements:
    #   - :formula_ds - dataset with best fit curve as data
    #   - :coefficients - hash of calculated coefficients. So for degree = 3
    #     it will return hash with keys :a3, :a2, :a1, :a0 and calculated values
    #   - :deltas - Gnuplot calculates possible deltas for coefficients during fitting and
    #     deltas hash contains this deltas
    #   - :data - pointer to Datablock with given data
    # @example
    #   fit_poly(some_data, degree: 5, initials: { a4: 10, a2: -1 }, term_option: { xrange: 1..100 })
    #   #=> The same as:
    #   #=> fit(
    #   #=>   some_data,
    #   #=>   function: 'a5*x**5 + a4*x**4 + ... + a0*x**0',
    #   #=>   initals: {a5: 1, a4: 10, a3: 1, a2: -1, a1: 1, a0: 1},
    #   #=>   term_option: { xrange: 1..100 }
    #   #=> )
    def fit_poly(data, degree: 2, **options)
      sum_count = degree + 1
      initials = {}
      sum_count.times { |i| initials["a#{i}".to_sym] = 1 }
      options[:initials] = initials.merge(options[:initials] || {})
      function = sum_count.times.map { |i| "a#{i}*x**#{i}" }.join(' + ')
      fit(data, **options, function: function)
    end

    ##
    # @!method fit_exp(data, **options)
    # @!method fit_log(data, **options)
    # @!method fit_sin(data, **options)
    #
    # Shortcuts for fitting with several math functions (exp, log, sin).
    #
    # @param data [#to_gnuplot_points] method accepts the same sources as Dataset.new
    #   and Dataset object
    # @param options [Hash] all of this options will be passed to #fit so you
    #   can set here any options listed in its docs. If you pass here :initials hash, it
    #   will be merged into default initals hash. Formula by default is
    #   "yscale * (yoffset + #{function name}((x - xoffset) / xscale))", initials by default
    #   "{ yoffset: 0.1, xoffset: 0.1, yscale: 1, xscale: 1 }"
    #
    # @return [Hash] hash with four elements:
    #   - :formula_ds - dataset with best fit curve as data
    #   - :coefficients - hash of calculated coefficients. So for this case
    #     it will return hash with keys :yoffset, :xoffset, :yscale, :xscale and calculated values
    #   - :deltas - Gnuplot calculates possible deltas for coefficients during fitting and
    #     deltas hash contains this deltas
    #   - :data - pointer to Datablock with given data
    #
    # @example
    #   fit_exp(some_data, initials: { yoffset: -11 }, term_option: { xrange: 1..100 })
    #   #=> The same as:
    #   #=> fit(
    #   #=>   some_data,
    #   #=>   function: 'yscale * (yoffset + exp((x - xoffset) / xscale))',
    #   #=>   initals: { yoffset: -11, xoffset: 0.1, yscale: 1, xscale: 1 },
    #   #=>   term_option: { xrange: 1..100 }
    #   #=> )
    #   fit_log(...)
    #   fit_sin(...)
    %w(exp log sin).map do |fname|
      define_method("fit_#{fname}".to_sym) do |data, **options|
        options[:initials] = {
          yoffset: 0.1,
          xoffset: 0.1,
          yscale: 1,
          xscale: 1
        }.merge(options[:initials] || {})
        function = "yscale * (yoffset + #{fname} ((x - xoffset) / xscale))"
        fit(data, **options, function: function)
      end
    end

    private

    ##
    # It takes some time to produce output so here we need
    # to wait for it.
    #
    # Max time to wait is stored in Settings.max_fit_delay, so one
    # can change it in order to wait longer.
    def wait_for_output(term, variables)
      # now we should catch 'error' from terminal: it will contain approximation data
      # but we can get a real error instead of output, so lets wait for limited time
      start = Time.now
      output = ''
      until output_ready?(output, variables)
        begin
          term.check_errors(raw: true)
        rescue GnuplotRB::GnuplotError => e
          output += e.message
        end
        if Time.now - start > Settings.max_fit_delay
          fail GnuplotError, "Seems like there is an error in gnuplotrb: #{output}"
        end
      end
      output
    end

    ##
    # Check if current output contains all the
    # variables given to fit.
    def output_ready?(output, variables)
      output =~ /Final set .*#{variables.join('.*')}/
    end

    ##
    # Parse Gnuplot's output to get coefficients and their deltas
    # from it. Also replaces coefficients in given function with
    # exact values.
    def parse_output(variables, function, output)
      plottable_function = " #{function.clone} "
      coefficients = {}
      deltas = {}
      variables.each do |var|
        value, error = output.scan(%r{#{var} *= ([^ ]+) *\+/\- ([^ ]+)})[0]
        plottable_function.gsub!(/#{var}([^0-9a-zA-Z])/) { value + Regexp.last_match(1) }
        coefficients[var] = value.to_f
        deltas[var] = error.to_f
      end
      [coefficients, deltas, plottable_function]
    end

    ##
    # Make fit command and send it to gnuplot
    def gnuplot_fit(function, data, options, initials, term_options)
      variables = initials.keys
      term = Terminal.new
      term.set(term_options)
      initials.each { |var_name, value| term.stream_puts "#{var_name} = #{value}" }
      command = "fit #{function} #{data.to_s(term, without_options: true)} " \
                "#{options} via #{variables.join(',')}"
      term.stream_puts(command)
      output = wait_for_output(term, variables)
      begin
        term.close
      rescue GnuplotError
        # Nothing interesting here.
        # If we had an error, we never reach this line.
        # Error here may be only additional information
        # such as correlation matrix.
      end
      output
    end
  end
end