SciRuby/statsample

View on GitHub
lib/statsample/reliability/scaleanalysis.rb

Summary

Maintainability
C
1 day
Test Coverage
module Statsample
  module Reliability
    # Analysis of a Scale. Analoge of Scale Reliability analysis on SPSS.
    # Returns several statistics for complete scale and each item
    # == Usage
    #  @x1 = Daru::Vector.new([1,1,1,1,2,2,2,2,3,3,3,30])
    #  @x2 = Daru::Vector.new([1,1,1,2,2,3,3,3,3,4,4,50])
    #  @x3 = Daru::Vector.new([2,2,1,1,1,2,2,2,3,4,5,40])
    #  @x4 = Daru::Vector.new([1,2,3,4,4,4,4,3,4,4,5,30])
    #  ds  = Daru::DataFrame.new({:x1 => @x1,:x2 => @x2,:x3 => @x3,:x4 => @x4})
    #  ia  = Statsample::Reliability::ScaleAnalysis.new(ds)
    #  puts ia.summary
    class ScaleAnalysis
      include Summarizable
      attr_reader :ds,:mean, :sd,:valid_n, :alpha , :alpha_standarized, :variances_mean, :covariances_mean, :cov_m
      attr_accessor :name
      attr_accessor :summary_histogram
      def initialize(ds, opts=Hash.new)
        @dumped=ds.vectors.to_a.find_all {|f|
          ds[f].variance == 0
        }
        
        @ods = ds
        @ds  = ds.reject_values(*Daru::MISSING_VALUES).dup(ds.vectors.to_a - @dumped)
        @ds.rename ds.name
        
        @k     = @ds.ncols
        @total = @ds.vector_sum
        @o_total=@dumped.size > 0 ? @ods.vector_sum : nil
        
        @vector_mean = @ds.vector_mean
        @item_mean   = @vector_mean.mean
        @item_sd     = @vector_mean.sd
        
        @mean     = @total.mean
        @median   = @total.median
        @skew     = @total.skew
        @kurtosis = @total.kurtosis
        @sd       = @total.sd
        @variance = @total.variance
        @valid_n  = @total.size

        opts_default = {
          :name => _("Reliability Analysis"),
          :summary_histogram => true
        }
        @opts = opts_default.merge(opts)
        @opts.each{ |k,v| self.send("#{k}=",v) if self.respond_to? k }
        
        @cov_m=Statsample::Bivariate.covariance_matrix(@ds)
        # Mean for covariances and variances
        @variances = Daru::Vector.new(@k.times.map { |i| @cov_m[i,i] })
        @variances_mean=@variances.mean
        @covariances_mean=(@variance-@variances.sum).quo(@k**2-@k)
        #begin
          @alpha = Statsample::Reliability.cronbach_alpha(@ds)
          @alpha_standarized = Statsample::Reliability.cronbach_alpha_standarized(@ds)
        #rescue => e
        #  raise DatasetException.new(@ds,e), "Error calculating alpha"
        #end
      end
      # Returns a hash with structure
      def item_characteristic_curve
        i=0
        out={}
        total={}
        @ds.each do |row|
          tot=@total[i]
          @ds.vectors.each do |f|
            out[f]||= {}
            total[f]||={}
            out[f][tot]||= 0
            total[f][tot]||=0
            out[f][tot]+= row[f]
            total[f][tot]+=1
          end
          i+=1
        end
        total.each do |f,var|
          var.each do |tot,v|
            out[f][tot]=out[f][tot].quo(total[f][tot])
          end
        end
        out
      end
      # =Adjusted R.P.B. for each item
      # Adjusted RPB(Point biserial-correlation) for each item
      #
      def item_total_correlation
        vecs = @ds.vectors.to_a
        @itc ||= vecs.inject({}) do |a,v|
          total=@ds.vector_sum(vecs - [v])
          a[v]=Statsample::Bivariate.pearson(@ds[v],total)
          a
        end
      end
      def mean_rpb
        Daru::Vector.new(item_total_correlation.values).mean
      end
      def item_statistics
        @is||=@ds.vectors.to_a.inject({}) do |a,v|
          a[v]={:mean=>@ds[v].mean, :sds=>Math::sqrt(@cov_m.variance(v))}
          a
        end
      end
      # Returns a dataset with cases ordered by score
      # and variables ordered by difficulty

      def item_difficulty_analysis
        dif={}
        @ds.vectors.each{|f| dif[f]=@ds[f].mean }
        dif_sort = dif.sort { |a,b| -(a[1]<=>b[1]) }
        scores_sort={}
        scores=@ds.vector_mean
        scores.each_index{ |i| scores_sort[i]=scores[i] }
        scores_sort=scores_sort.sort{|a,b| a[1]<=>b[1]}
        ds_new = Daru::DataFrame.new({}, order: ([:case,:score] + dif_sort.collect{|a,b| a.to_sym}))
        scores_sort.each do |i,score|
          row = [i, score]
          case_row = @ds.row[i].to_h
          dif_sort.each{ |variable,dif_value| row.push(case_row[variable]) }
          ds_new.add_row(row)
        end
        ds_new
      end
      
      def stats_if_deleted
        @sif||=stats_if_deleted_intern
      end
      
      def stats_if_deleted_intern # :nodoc:
        return Hash.new if @ds.ncols == 1
        vecs = @ds.vectors.to_a
        vecs.inject({}) do |a,v|
          cov_2=@cov_m.submatrix(vecs - [v])
          #ds2=@ds.clone
          #ds2.delete_vector(v)
          #total=ds2.vector_sum
          a[v]={}
          #a[v][:mean]=total.mean
          a[v][:mean]=@mean-item_statistics[v][:mean]
          a[v][:variance_sample]=cov_2.total_sum
          a[v][:sds]=Math::sqrt(a[v][:variance_sample])
          n=cov_2.row_size
          a[v][:alpha] = (n>=2) ? Statsample::Reliability.cronbach_alpha_from_covariance_matrix(cov_2) : nil
          a
        end
      end
      def report_building(builder) #:nodoc:
        builder.section(:name=>@name) do |s|
          
          if @dumped.size>0
            s.section(:name=>"Items with variance=0") do |s1|
              s.table(:name=>_("Summary for %s with all items") % @name) do |t|
                t.row [_("Items"), @ods.ncols]
                t.row [_("Sum mean"),     "%0.4f" % @o_total.mean]
                t.row [_("S.d. mean"),     "%0.4f" % @o_total.sd]
              end
              s.table(:name=>_("Deleted items"), :header=>['item','mean']) do |t|
                @dumped.each do |f|
                  t.row(["#{@ods[f].name}(#{f})", "%0.5f" % @ods[f].mean])
                end
              end
              s.parse_element(Statsample::Graph::Histogram.new(@o_total, :name=>"Histogram (complete data) for %s" % @name)) if @summary_histogram
            end
          end
          
          
          s.table(:name=>_("Summary for %s") % @name) do |t|
            t.row [_("Valid Items"), @ds.ncols]
          
          t.row [_("Valid cases"), @valid_n]
          t.row [_("Sum mean"),     "%0.4f" % @mean]
          t.row [_("Sum sd"),       "%0.4f" % @sd  ]
#          t.row [_("Sum variance"), "%0.4f" % @variance]
          t.row [_("Sum median"),   @median]
          t.hr
          t.row [_("Item mean"),    "%0.4f" % @item_mean]
          t.row [_("Item sd"),    "%0.4f" % @item_sd]
          t.hr
          t.row [_("Skewness"),     "%0.4f" % @skew]
          t.row [_("Kurtosis"),     "%0.4f" % @kurtosis]
          t.hr
          t.row [_("Cronbach's alpha"), @alpha ? ("%0.4f" % @alpha) : "--"]
          t.row [_("Standarized Cronbach's alpha"), @alpha_standarized ? ("%0.4f" % @alpha_standarized) : "--" ]
          t.row [_("Mean rpb"), "%0.4f" % mean_rpb]
          
          t.row [_("Variances mean"),  "%g" % @variances_mean]
          t.row [_("Covariances mean") , "%g" % @covariances_mean]
          end
          
          if (@alpha)
            s.text _("Items for obtain alpha(0.8) : %d" % Statsample::Reliability::n_for_desired_reliability(@alpha, 0.8, @ds.ncols))
            s.text _("Items for obtain alpha(0.9) : %d" % Statsample::Reliability::n_for_desired_reliability(@alpha, 0.9, @ds.ncols))
          end
          
          
          sid=stats_if_deleted
          is=item_statistics
          itc=item_total_correlation
          
          s.table(:name=>_("Items report for %s") % @name, :header=>["item","mean","sd", "mean if deleted", "var if deleted", "sd if deleted"," item-total correl.", "alpha if deleted"]) do |t|
            @ds.vectors.each do |f|
              row=["#{@ds[f].name}(#{f})"]
              if is[f]
                row+=[sprintf("%0.5f",is[f][:mean]), sprintf("%0.5f", is[f][:sds])]
              else
                row+=["-","-"]
              end
              if sid[f]
                row+= [sprintf("%0.5f",sid[f][:mean]), sprintf("%0.5f",sid[f][:variance_sample]), sprintf("%0.5f",sid[f][:sds])]
              else
                row+=%w{- - -}
              end
              if itc[f]
                row+= [sprintf("%0.5f",itc[f])]
              else 
                row+=['-']
              end
              if sid[f] and !sid[f][:alpha].nil?
                row+=[sprintf("%0.5f",sid[f][:alpha])]
              else
                row+=["-"]
              end
              t.row row
            end # end each
          end # table
          s.parse_element(Statsample::Graph::Histogram.new(@total, :name=>"Histogram (valid data) for %s" % @name)) if @summary_histogram
        end # section
      end # def
    end # class
  end # module
end # module