SciRuby/statsample

View on GitHub
lib/statsample/regression/multiple/alglibengine.rb

Summary

Maintainability
A
0 mins
Test Coverage
if HAS_ALGIB
module Statsample
module Regression
module Multiple
# Class for Multiple Regression Analysis
# Requires Alglib gem and uses a listwise aproach.
# Faster than GslEngine on massive prediction use, because process is c-based.
# Prefer GslEngine if you need good memory use.
# If you need pairwise, use RubyEngine
# Example:
#
#   @a = Daru::Vector.new([1,3,2,4,3,5,4,6,5,7])
#   @b = Daru::Vector.new([3,3,4,4,5,5,6,6,4,4])
#   @c = Daru::Vector.new([11,22,30,40,50,65,78,79,99,100])
#   @y = Daru::Vector.new([3,4,5,6,7,8,9,10,20,30])
#   ds = Daru::DataFrame.new({:a => @a,:b => @b,:c => @c,:y => @y})
#   lr=Statsample::Regression::Multiple::AlglibEngine.new(ds, :y)
#            
class AlglibEngine < BaseEngine
  def initialize(ds,y_var, opts=Hash.new)
    super    
    @ds       = ds.reject_values(*Daru::MISSING_VALUES)
    @ds_valid = @ds
    @dy       = @ds[@y_var]
    @ds_indep = ds.dup(ds.vectors.to_a - [y_var])
    # Create a custom matrix
    columns = []
    @fields = []
    @ds.vectors.each do |f|
      if f != @y_var
        columns.push(@ds[f].to_a)
        @fields.push(f)
      end
    end
    @dep_columns = columns.dup
    columns.push(@ds[@y_var])
    matrix=Matrix.columns(columns)
    @lr_s=nil
    @lr=::Alglib::LinearRegression.build_from_matrix(matrix)
    @coeffs=assign_names(@lr.coeffs)
  end
    
  def _dump(i)
    Marshal.dump({'ds'=>@ds,'y_var'=>@y_var})
  end

  def self._load(data)
    h=Marshal.load(data)
    self.new(h['ds'], h['y_var'])
  end
      
  def coeffs
    @coeffs
  end
  # Coefficients using a constant
  # Based on http://www.xycoon.com/ols1.htm
  def matrix_resolution
    mse_p=mse
    columns=@dep_columns.dup.map {|xi| xi.map{|i| i.to_f}}
    columns.unshift([1.0]*@ds.cases)
    y=Matrix.columns([@dy.data.map  {|i| i.to_f}])
    x=Matrix.columns(columns)
    xt=x.t
    matrix=((xt*x)).inverse*xt
    matrix*y
  end

  def r2
    r**2
  end

  def r
    Bivariate::pearson(@dy,predicted)
  end  

  def sst
    @dy.ss
  end

  def constant
    @lr.constant
  end

  def standarized_coeffs
    l=lr_s
    assign_names(l.coeffs)
  end

  def lr_s
    if @lr_s.nil?
      build_standarized
    end
    @lr_s
  end

  def build_standarized
    @ds_s=@ds.standardize
    columns=[]
    @ds_s.vectors.each{|f|
      columns.push(@ds_s[f].to_a) unless f == @y_var
    }
    @dep_columns_s=columns.dup
    columns.push(@ds_s[@y_var])
    matrix=Matrix.columns(columns)
    @lr_s=Alglib::LinearRegression.build_from_matrix(matrix)
  end

  def process(v)
    @lr.process(v)
  end

  def process_s(v)
    lr_s.process(v)
  end
  # ???? Not equal to SPSS output
  def standarized_residuals
    res    = residuals
    red_sd = residuals.sds
    Daru::Vector.new(res.collect {|v| v.quo(red_sd) })
  end
end
end
end
end # for Statsample
end # for if