lib/regression.rb
module Silicium
module Regression
class LinearRegressionByGradientDescent
# Finds parameters theta0, theta1 for equation theta0 + theta1 * x
# for linear regression of given plot of one variable
# @param plot Actually hash x => y for different points of the plot
# @param alpha Speed of learning (should be little enough not to diverge)
# @param start_theta0 Starting value of theta0
# @param start_theta1 Starting value of theta1
# @return [Numeric] theta0, theta1
def self.generate_function(plot, alpha = 0.01, start_theta0 = 0.0, start_theta1 = 0.0)
theta0 = start_theta0
theta1 = start_theta1
m = plot.length.to_f
epsilon = 0.000001
bias_new = 5.0
while bias_new.abs() > epsilon
old_theta0, old_theta1 = theta0, theta1
oth = theta0
theta0 = theta0 - alpha / m * d_dt_for_theta0(plot, theta0, theta1)
theta1 = theta1 - alpha / m * d_dt_for_theta1(plot, oth, theta1)
bias_new = [(theta0 - old_theta0).abs(), (theta1 - old_theta1).abs()].max()
end
return theta0, theta1
end
private
def self.d_dt_for_theta0(plot, theta0, theta1)
result = 0.0
plot.each { |x, y| result += (theta0 + theta1 * x) - y }
result
end
def self.d_dt_for_theta1(plot, theta0, theta1)
result = 0
plot.each { |x, y| result += ((theta0 + theta1 * x) - y) * x }
result
end
end
# Finds parameters as array [1..n] for polynom of n-th degree
# @param plot Actually hash x => y for different points of the plot
# @param n Degree of expected polynom
# @param alpha Speed of learning (should be little enough not to diverge)
# @param epsilon Accuracy
# @return Array[Numeric] coefficients (little index is lower degree parameter)
class PolynomialRegressionByGradientDescent
def self.generate_function(given_plot, n = 5, alpha = 0.001, epsilon = 0.000001)
scaling = n > 3
if scaling
plot, avg_x, div = feature_scaled_plot(given_plot, n)
else
(plot = given_plot)
end
array = Array.new(n + 1, 1)
m = plot.length.to_f
bias = old_bias = 0.5
array = calculate([bias, epsilon, array, alpha, plot, old_bias, m])
return array.map! {|x| x * div + avg_x } if scaling
array
end
private
def self.calculate(params)
while params[0].abs() > params[1]
old_array = params[2].dup()
i = -1
params[2].map! { |elem|
i += 1
elem - params[3] / params[6] * d_dt(params[4], old_array, i)
}
params[0] = (params[2].zip(old_array).map {|new, old| (new - old).abs()}).max()
raise "Divergence" if params[0] > params[5]
params[5] = params[0]
end
params[2]
end
def self.d_dt(plot, old_array, i)
sum = 0.0
plot.each { |x, y| sum += (func(old_array, x) - y) * (i + 1) * (x ** i) }
sum
end
def self.func(array, x)
sum = 0.0
i = 0
array.each do |elem|
sum += elem * (x ** i)
i += 1
end
sum
end
def self.feature_scaled_plot(given_plot, n)
max_x = given_plot[0][0]
min_x = given_plot[0][0]
sum_x = 0.0
given_plot.each do |x, _|
max_x = x if x > max_x
min_x = x if x < min_x
sum_x += x
end
avg_x = sum_x.to_f / given_plot.length
range_x = max_x - min_x
div = range_x ** n
new_plot = given_plot.map {|x, y| [x, (y.to_f - avg_x) / div]}
return new_plot, avg_x, div
end
end
end
end