lib/statsample/regression/multiple/alglibengine.rb
if HAS_ALGIB
module Statsample
module Regression
module Multiple
# Class for Multiple Regression Analysis
# Requires Alglib gem and uses a listwise aproach.
# Faster than GslEngine on massive prediction use, because process is c-based.
# Prefer GslEngine if you need good memory use.
# If you need pairwise, use RubyEngine
# Example:
#
# @a = Daru::Vector.new([1,3,2,4,3,5,4,6,5,7])
# @b = Daru::Vector.new([3,3,4,4,5,5,6,6,4,4])
# @c = Daru::Vector.new([11,22,30,40,50,65,78,79,99,100])
# @y = Daru::Vector.new([3,4,5,6,7,8,9,10,20,30])
# ds = Daru::DataFrame.new({:a => @a,:b => @b,:c => @c,:y => @y})
# lr=Statsample::Regression::Multiple::AlglibEngine.new(ds, :y)
#
class AlglibEngine < BaseEngine
def initialize(ds,y_var, opts=Hash.new)
super
@ds = ds.reject_values(*Daru::MISSING_VALUES)
@ds_valid = @ds
@dy = @ds[@y_var]
@ds_indep = ds.dup(ds.vectors.to_a - [y_var])
# Create a custom matrix
columns = []
@fields = []
@ds.vectors.each do |f|
if f != @y_var
columns.push(@ds[f].to_a)
@fields.push(f)
end
end
@dep_columns = columns.dup
columns.push(@ds[@y_var])
matrix=Matrix.columns(columns)
@lr_s=nil
@lr=::Alglib::LinearRegression.build_from_matrix(matrix)
@coeffs=assign_names(@lr.coeffs)
end
def _dump(i)
Marshal.dump({'ds'=>@ds,'y_var'=>@y_var})
end
def self._load(data)
h=Marshal.load(data)
self.new(h['ds'], h['y_var'])
end
def coeffs
@coeffs
end
# Coefficients using a constant
# Based on http://www.xycoon.com/ols1.htm
def matrix_resolution
mse_p=mse
columns=@dep_columns.dup.map {|xi| xi.map{|i| i.to_f}}
columns.unshift([1.0]*@ds.cases)
y=Matrix.columns([@dy.data.map {|i| i.to_f}])
x=Matrix.columns(columns)
xt=x.t
matrix=((xt*x)).inverse*xt
matrix*y
end
def r2
r**2
end
def r
Bivariate::pearson(@dy,predicted)
end
def sst
@dy.ss
end
def constant
@lr.constant
end
def standarized_coeffs
l=lr_s
assign_names(l.coeffs)
end
def lr_s
if @lr_s.nil?
build_standarized
end
@lr_s
end
def build_standarized
@ds_s=@ds.standardize
columns=[]
@ds_s.vectors.each{|f|
columns.push(@ds_s[f].to_a) unless f == @y_var
}
@dep_columns_s=columns.dup
columns.push(@ds_s[@y_var])
matrix=Matrix.columns(columns)
@lr_s=Alglib::LinearRegression.build_from_matrix(matrix)
end
def process(v)
@lr.process(v)
end
def process_s(v)
lr_s.process(v)
end
# ???? Not equal to SPSS output
def standarized_residuals
res = residuals
red_sd = residuals.sds
Daru::Vector.new(res.collect {|v| v.quo(red_sd) })
end
end
end
end
end # for Statsample
end # for if