SciRuby/statsample

View on GitHub
lib/statsample/regression/multiple/rubyengine.rb

Summary

Maintainability
A
1 hr
Test Coverage
module Statsample
module Regression
module Multiple
# Pure Ruby Class for Multiple Regression Analysis.
# Slower than AlglibEngine, but is pure ruby and can use a pairwise aproach for missing values. 
# Coeffient calculation uses correlation matrix between the vectors
# If you need listwise aproach for missing values, use AlglibEngine, because is faster.
# 
# Example:
#
#   @a = Daru::Vector.new([1,3,2,4,3,5,4,6,5,7])
#   @b = Daru::Vector.new([3,3,4,4,5,5,6,6,4,4])
#   @c = Daru::Vector.new([11,22,30,40,50,65,78,79,99,100])
#   @y = Daru::Vector.new([3,4,5,6,7,8,9,10,20,30])
#   ds = Daru::DataFrame.new({:a => @a,:b => @b,:c => @c,:y => @y})
#   lr=Statsample::Regression::Multiple::RubyEngine.new(ds,:y)

class RubyEngine < MatrixEngine
  def initialize(ds,y_var, opts=Hash.new)
    matrix = Statsample::Bivariate.correlation_matrix ds
    fields_indep=ds.vectors.to_a - [y_var]
    default= {
      :y_mean => ds[y_var].mean,
      :x_mean => fields_indep.inject({}) {|ac,f|  ac[f]=ds[f].mean; ac},
      :y_sd   => ds[y_var].sd,
      :x_sd   => fields_indep.inject({}) {|ac,f|  ac[f]=ds[f].sd; ac},
      :cases  => Statsample::Bivariate.min_n_valid(ds)
    }
    opts = opts.merge(default)
    super(matrix, y_var, opts)
    @ds = ds
    @dy = ds[@y_var]
    @ds_valid = ds.reject_values(*Daru::MISSING_VALUES)
    @total_cases = @ds.nrows
    @valid_cases = @ds_valid.nrows
    @ds_indep    = ds.dup(ds.vectors.to_a - [y_var])
    set_dep_columns
  end
  
  def set_dep_columns
    @dep_columns = []
    @ds_indep.each_vector { |v| @dep_columns.push(v.to_a) }                
  end

  def fix_with_mean
    i=0
    @ds_indep.each(:row) do |row|
      empty=[]
      row.each do |k,v|
        empty.push(k) if v.nil?
      end

      if empty.size==1
        @ds_indep[empty[0]][i]=@ds[empty[0]].mean
      end
      i += 1
    end
    set_dep_columns
  end
  def fix_with_regression
    i = 0
    @ds_indep.each(:row) do |row|
      empty = []
      row.each { |k,v| empty.push(k) if v.nil? }
      if empty.size==1
        field  = empty[0]
        lr     = MultipleRegression.new(@ds_indep,field)
        fields = []
        @ds_indep.vectors.each { |f|
          fields.push(row[f]) unless f == field
        }
        
        @ds_indep[field][i]=lr.process(fields)
      end
      i+=1
    end
    set_dep_columns
  end
  # Standard error for constant
  def constant_se
    estimated_variance_covariance_matrix[0,0]
  end
end
end
end
end