# SciRuby/statsample

lib/statsample/histogram.rb

### Summary

A
1 hr
###### Test Coverage
``````module Statsample
# A histogram consists of a set of bins which count the
# number of events falling into a given range of a continuous variable x.
#
# This implementations follows convention of GSL
# for specification.
#
#  * Verbatim: *
#
#  The range for bin[i] is given by range[i] to range[i+1].
#  For n bins there are n+1 entries in the array range.
#  Each bin is inclusive at the lower end and exclusive at the upper end.
#  Mathematically this means that the bins are defined
#  by the following inequality,
#
#   bin[i] corresponds to range[i] <= x < range[i+1]
#
#  Here is a diagram of the correspondence between ranges and bins
#  on the number-line for x,
#
#
#      [ bin[0] )[ bin[1] )[ bin[2] )[ bin[3] )[ bin[4] )
#   ---|---------|---------|---------|---------|---------|---  x
#    r[0]      r[1]      r[2]      r[3]      r[4]      r[5]
#
#
#  In this picture the values of the range array are denoted by r.
#  On the left-hand side of each bin the square bracket ‘[’ denotes
#  an inclusive lower bound ( r <= x), and the round parentheses ‘)’
#  on the right-hand side denote an exclusive upper bound (x < r).
#  Thus any samples which fall on the upper end of the histogram are
#  excluded.
#  If you want to include this value for the last bin you will need to
#
#
# == Reference:
# * http://www.gnu.org/software/gsl/manual/html_node/The-histogram-struct.html

class Histogram
include Enumerable

class << self
# Alloc +n_bins+, using +range+ as ranges of bins
def alloc(n_bins, range=nil, opts=Hash.new)
Histogram.new(n_bins, range, opts)

end
# Alloc +n_bins+ bins, using +p1+ as minimum and +p2+
# as maximum
def alloc_uniform(n_bins, p1=nil,p2=nil)
if p1.is_a? Array
min,max=p1
else
min,max=p1,p2
end
range=max - min
step=range / n_bins.to_f
range=(n_bins+1).times.map {|i| min + (step*i)}
Histogram.new(range)
end
end

attr_accessor :name

include GetText
bindtextdomain("statsample")

def initialize(p1, min_max=false, opts=Hash.new)

if p1.is_a? Array
range=p1
@n_bins=p1.size-1
elsif p1.is_a? Integer
@n_bins=p1
end

@bin=[0.0]*(@n_bins)
if(min_max)
min, max=min_max[0], min_max[1]
range=Array.new(@n_bins+1)
(@n_bins+1).times {|i| range[i]=min+(i*(max-min).quo(@n_bins)) }
end
range||=[0.0]*(@n_bins+1)
set_ranges(range)
@name=""
opts.each{|k,v|
self.send("#{k}=",v) if self.respond_to? k
}
end

# Number of bins
def bins
@n_bins
end

def increment(x, w=1)
if x.respond_to? :each
x.each{|y| increment(y,w) }
elsif x.is_a? Numeric
(range.size - 1).times do |i|
if x >= range[i] and x < range[i+1]
@bin[i] += w
break
end
end
end
end

def set_ranges(range)
raise "Range size should be bin+1" if range.size!=@bin.size+1
@range=range
end

def get_range(i)
[@range[i],@range[i+1]]
end

def max
@range.last
end

def min
@range.first
end
def max_val
@bin.max
end
def min_val
@bin.min
end
def each
bins.times.each do |i|
r=get_range(i)
arg={:i=>i, :low=>r[0],:high=>r[1], :middle=>(r[0]+r[1]) / 2.0,  :value=>@bin[i]}
yield arg
end
end
def estimated_variance
sum,n=0,0
mean=estimated_mean
each do |v|
sum+=v[:value]*(v[:middle]-mean)**2
n+=v[:value]
end
sum / (n-1)
end
def estimated_standard_deviation
Math::sqrt(estimated_variance)
end
def estimated_mean
sum,n=0,0
each do |v|
sum+= v[:value]* v[:middle]
n+=v[:value]
end
sum / n
end
alias :mean :estimated_mean
alias :sigma :estimated_standard_deviation

def sum(start=nil,_end=nil)
start||=0
_end||=@n_bins-1
(start.._end).inject(0) {|ac,i| ac+@bin[i]}
end
def report_building(generator)
hg=Statsample::Graph::Histogram.new(self)
generator.parse_element(hg)
end
def report_building_text(generator)
@range.each_with_index do |r,i|
next if i==@bin.size
generator.text(sprintf("%5.2f : %d", r, @bin[i]))
end
end
end
end``````