SciRuby/statsample

View on GitHub
lib/statsample/graph/boxplot.rb

Summary

Maintainability
C
1 day
Test Coverage
require 'rubyvis'
module Statsample
  module Graph
    # = Boxplot
    # 
    # From Wikipedia:
    # In descriptive statistics, a box plot or boxplot (also known as a box-and-whisker diagram or plot) is a convenient way of graphically depicting groups of numerical data through their five-number summaries: the smallest observation (sample minimum), lower quartile (Q1), median (Q2), upper quartile (Q3), and largest observation (sample maximum). A boxplot may also indicate which observations, if any, might be considered outliers.
    # 
    # == Usage
    # === Svg output
    #  a = Daru::Vector.new([1,2,3,4])
    #  b = Daru::Vector.new([3,4,5,6])
    # puts Statsample::Graph::Boxplot.new(:vectors=>[a,b]).to_svg
    # === Using ReportBuilder
    #  a = Daru::Vector.new([1,2,3,4])
    #  b = Daru::Vector.new([3,4,5,6])
    #  rb=ReportBuilder.new
    #  rb.add(Statsample::Graph::Boxplot.new(:vectors=>[a,b]))
    #  rb.save_html('boxplot.html')
    
    class Boxplot
      include Summarizable
      attr_accessor :name
      # Total width of Boxplot
      attr_accessor :width
      # Total height of Boxplot
      attr_accessor :height
      # Top margin
      attr_accessor :margin_top
      # Bottom margin
      attr_accessor :margin_bottom
      # Left margin
      attr_accessor :margin_left
      # Right margin
      attr_accessor :margin_right
      # Array with assignation to groups of bars
      # For example, for four vectors, 
      #   boxplot.groups=[1,2,1,3]
      # Assign same color to first and third element, and different to
      # second and fourth
      attr_accessor :groups
      # Minimum value on y-axis. Automaticly defined from data
      attr_accessor :minimum
      # Maximum value on y-axis. Automaticly defined from data
      attr_accessor :maximum
      # Vectors to box-ploting
      attr_accessor :vectors
      # The rotation angle, in radians. Text is rotated clockwise relative 
      # to the anchor location. For example, with the default left alignment, 
      # an angle of Math.PI / 2 causes text to proceed downwards. The default angle is zero.      
      attr_accessor :label_angle
      attr_reader :x_scale, :y_scale
      # Create a new Boxplot.
      # Parameters: Hash of options
      # * :vectors: Array of vectors
      # * :groups: Array of same size as :vectors:, with name of groups
      #           to colorize vectors
      def initialize(opts=Hash.new)
        @vectors=opts.delete :vectors
        raise "You should define vectors" if @vectors.nil?
        
        opts_default={
          :name=>_("Boxplot"),
          :groups=>nil,
          :width=>400,
          :height=>300,
          :margin_top=>10,
          :margin_bottom=>20,
          :margin_left=>20,
          :margin_right=>20,
          :minimum=>nil,
          :maximum=>nil,
          :label_angle=>0
        }
        @opts=opts_default.merge(opts)
        opts_default.keys.each {|k| send("#{k}=", @opts[k]) }
      end
      
      # Returns a Rubyvis panel with scatterplot
      def rubyvis_panel # :nodoc:
        that=self
        
        min,max=@minimum, @maximum
        
        min||=@vectors.map {|v| v.min}.min
        max||=@vectors.map {|v| v.max}.max
        
        margin_hor=margin_left + margin_right
        margin_vert=margin_top  + margin_bottom
        x_scale = pv.Scale.ordinal(@vectors.size.times.map.to_a).split_banded(0, width-margin_hor, 4.0/5)
        y_scale=Rubyvis::Scale.linear(min,max).range(0,height-margin_vert)
        y_scale.nice
        # cache data
        
        colors=Rubyvis::Colors.category10
        
        data=@vectors.map {|v|
          out={:percentil_25=>v.percentil(25), :median=>v.median, :percentil_75=>v.percentil(75), :name=>v.name}
          out[:iqr]=out[:percentil_75] - out[:percentil_25]
          
          irq_max=out[:percentil_75] + out[:iqr]
          irq_min=out[:percentil_25] - out[:iqr]
          
          # Find the last data inside the margin
          min = out[:percentil_25]
          max = out[:percentil_75]
          
          v.each {|d|
            min=d if d < min and d > irq_min
            max=d if d > max and d < irq_max
          }
          # Whiskers!
          out[:low_whisker]=min
          out[:high_whisker]=max
          # And now, data outside whiskers
          out[:outliers]=v.to_a.find_all {|d| d < min or d > max }
          out
        }
               
        vis=Rubyvis::Panel.new do |pan| 
          pan.width  width  - margin_hor
          pan.height height - margin_vert
          pan.bottom margin_bottom
          pan.left   margin_left
          pan.right  margin_right
          pan.top    margin_top
           # Y axis
          pan.rule do
            data y_scale.ticks
            bottom y_scale
            stroke_style {|d| d!=0 ? "#eee" : "#000"}
            label(:anchor=>'left') do
              text y_scale.tick_format
            end
          end
          pan.rule do
            bottom 0
            stroke_style 'black'
          end
          
          # Labels
          
          pan.label  do |l|
            l.data data
            l.text_angle that.label_angle
            l.left  {|v| x_scale[index] }
            l.bottom(-15)
            l.text {|v,x| v[:name]}
          end
          
          pan.panel do |bp|
            bp.data data
            bp.left {|v|  x_scale[index]}
            bp.width x_scale.range_band
            
            # Bar
            bp.bar do |b|
              b.bottom {|v| y_scale[v[:percentil_25]]}
              b.height {|v| y_scale[v[:percentil_75]] - y_scale[v[:percentil_25]] }
              b.line_width 1
              b.stroke_style  {|v| 
                if that.groups
                  colors.scale(that.groups[parent.index]).darker
                else
                  colors.scale(index).darker
                end  
              }
              b.fill_style {|v| 
                if that.groups
                  colors.scale(that.groups[parent.index])
                else
                  colors.scale(index)
                end
              }
            end
            # Median
            bp.rule do |r|
              r.bottom {|v| y_scale[v[:median]]}
              r.width x_scale.range_band
              r.line_width 2
            end
            ##
            # Whiskeys
            ##
            # Low whiskey
            bp.rule do |r|
              r.visible {|v| v[:percentil_25] > v[:low_whisker]}
              r.bottom {|v| y_scale[v[:low_whisker]]}              
            end
            
            bp.rule do |r|
              r.visible {|v| v[:percentil_25] > v[:low_whisker]}
              r.bottom {|v| y_scale[v[:low_whisker]]}              
              r.left {|v| x_scale.range_band / 2.0}
              r.height {|v| y_scale.scale(v[:percentil_25]) - y_scale.scale(v[:low_whisker])}
            end
            # High whiskey

            bp.rule do |r|
              r.visible {|v| v[:percentil_75] < v[:high_whisker]}
              r.bottom {|v| y_scale.scale(v[:high_whisker])}              
            end
            
             bp.rule do |r|
              r.visible {|v| v[:percentil_75] < v[:high_whisker]}
              r.bottom {|v| y_scale.scale(v[:percentil_75])}              
              r.left {|v| x_scale.range_band / 2.0}
              r.height {|v| y_scale.scale(v[:high_whisker]) - y_scale.scale(v[:percentil_75])}
            end
            # Outliers
            bp.dot do |dot|
              dot.shape_size 4
              dot.data {|v| v[:outliers]}
              dot.left {|v| x_scale.range_band / 2.0}
              dot.bottom {|v| y_scale.scale(v)}
              dot.title {|v| v}
            end
          end
        end
        vis
      end
      
      # Returns SVG with scatterplot
      def to_svg
        rp=rubyvis_panel
        rp.render
        rp.to_svg
      end
      def report_building(builder) # :nodoc:
        builder.section(:name=>name) do |b|
          b.image(to_svg, :type=>'svg', :width=>width, :height=>height)
        end
      end
    end
  end
end