SciRuby/statsample

View on GitHub
lib/statsample/converters.rb

Summary

Maintainability
B
4 hrs
Test Coverage
require 'statsample/converter/spss'
module Statsample
  # Create and dumps Datasets on a database
  # 
  # == NOTE
  # 
  # Deprecated. Use Daru::DataFrame.from_sql and Daru::DataFrame#write_sql
  module Database
    class << self
      # Read a database query and returns a Dataset
      #
      # == NOTE
      # 
      # Deprecated. Use Daru::DataFrame.from_sql instead.
      def read(dbh,query)
        raise NoMethodError, "Deprecated. Use Daru::DataFrame.from_sql instead."
      end

      # Insert each case of the Dataset on the selected table
      #
      # == NOTE
      # 
      # Deprecated. Use Daru::DataFrame#write_sql instead
      def insert(ds, dbh, table)
        raise NoMethodError, "Deprecated. Use Daru::DataFrame#write_sql instead."
      end
      # Create a sql, basen on a given Dataset
      #
      # == NOTE
      # 
      # Deprecated. Use Daru::DataFrame#create_sql instead.
      def create_sql(ds,table,charset="UTF8")
        raise NoMethodError, "Deprecated. Use Daru::DataFrame#create_sql instead."
      end
    end
  end
  module Mondrian
    class << self
      def write(dataset,filename)
        File.open(filename,"wb") do |fp|
          fp.puts dataset.vectors.to_a.join("\t")
          dataset.each_row do |row|
            row2 = row.map { |v| v.nil? ? "NA" : v.to_s.gsub(/\s+/,"_") }
            fp.puts row2.join("\t")
          end
        end
      end
    end
  end

  class PlainText
    class << self
      def read(filename, fields)
        raise NoMethodError, "Deprecated. Use Daru::DataFrame.from_plaintext instead."
      end
    end
  end
    
  # This class has been DEPRECATED. Use Daru::DataFrame::from_excel 
  # Daru::DataFrame#write_excel for XLS file operations.
  class Excel
    class << self
      # Write a Excel spreadsheet based on a dataset
      # * TODO: Format nicely date values
      # 
      # == NOTE
      # 
      # Deprecated. Use Daru::DataFrame#write_csv.
      def write(dataset,filename)
        raise NoMethodError, "Deprecated. Use Daru::DataFrame#write_excel instead."
      end

      # Returns a dataset based on a xls file
      # 
      # == NOTE
      # 
      # Deprecated. Use Daru::DataFrame.from_excel instead.
      def read(filename, opts=Hash.new)
        raise NoMethodError, "Deprecated. Use Daru::DataFrame.from_excel instead."
      end
    end
  end

  module Mx
    class << self
      def write(dataset,filename,type=:covariance)
        puts "Writing MX File"
        File.open(filename,"w") do |fp|
          fp.puts "! #{filename}"
          fp.puts "! Output generated by Statsample"
          fp.puts "Data Ninput=#{dataset.fields.size} Nobservations=#{dataset.cases}"
          fp.puts "Labels " + dataset.vectors.to_a.join(" ")
          case type
            when :raw
            fp.puts "Rectangular"
            dataset.each do |row|
              out=dataset.vectors.to_a.collect do |f|
                if dataset[f].is_valid? row[f]
                  row[f]
                else
                  "."
                end
              end
              fp.puts out.join("\t")
            end
            fp.puts "End Rectangular"
          when :covariance
            fp.puts " CMatrix Full"
            cm=Statsample::Bivariate.covariance_matrix(dataset)
            d=(0...(cm.row_size)).collect {|row|
              (0...(cm.column_size)).collect{|col|
                cm[row,col].nil? ? "." : sprintf("%0.3f", cm[row,col])
              }.join(" ")
            }.join("\n")
            fp.puts d
          end
        end
      end
    end
  end
    module GGobi
        class << self
      def write(dataset,filename,opt={})
        File.open(filename,"w") {|fp|
          fp.write(self.out(dataset,opt))
        }
      end
            def out(dataset,opt={})
                require 'ostruct'
                default_opt = {:dataname => "Default", :description=>"", :missing=>"NA"}
                default_opt.merge! opt
                carrier=OpenStruct.new
                carrier.categorials=[]
                carrier.conversions={}
                variables_def=dataset.vectors.to_a.collect{|k|
                    variable_definition(carrier,dataset[k],k)
                }.join("\n")

                indexes=carrier.categorials.inject({}) {|s,c|
                    s[dataset.vectors.to_a.index(c)]=c
                    s
                }
                records=""
                dataset.each_row {|c|
                    indexes.each { |ik,iv|
                        c[ik] = carrier.conversions[iv][c[ik]]
                    }
                    records << "<record>#{values_definition(c, default_opt[:missing])}</record>\n"
                }

out=<<EOC
<?xml version="1.0"?>
<!DOCTYPE ggobidata SYSTEM "ggobi.dtd">
<ggobidata count="1">
<data name="#{default_opt[:dataname]}">
<description>#{default_opt[:description]}</description>
<variables count="#{dataset.fields.size}">
#{variables_def}
</variables>
    <records count="#{dataset.cases}" missingValue="#{default_opt[:missing]}">
#{records}
</records>

</data>
</ggobidata>
EOC

out

            end
      def values_definition(c,missing)
        c.collect{|v|
          if v.nil?
            "#{missing}"
          elsif v.is_a? Numeric
            "#{v}"
          else
            "#{v.gsub(/\s+/,"_")}"
          end
        }.join(" ")
      end
            # Outputs a string for a variable definition
            # v = vector
            # name = name of the variable
            # nickname = nickname
            def variable_definition(carrier,v,name,nickname=nil)
                nickname = (nickname.nil? ? "" : "nickname=\"#{nickname}\"" )
                if v.type==:object or v.to_a.find {|d|  d.is_a? String }
                    carrier.categorials.push(name)
                    carrier.conversions[name]={}
                    factors=v.factors
                    out ="<categoricalvariable name=\"#{name}\" #{nickname}>\n"
                    out << "<levels count=\"#{factors.size}\">\n"
                    out << (1..factors.size).to_a.collect{|i|
                        carrier.conversions[name][factors[i-1]]=i
                        "<level value=\"#{i}\">#{(v.labels[factors[i-1]] || factors[i-1])}</level>"
                    }.join("\n")
                    out << "</levels>\n</categoricalvariable>\n"
                    out
                elsif v.to_a.find {|d| d.is_a? Float}
                    "<realvariable name=\"#{name}\" #{nickname} />"
                else
                    "<integervariable name=\"#{name}\" #{nickname} />"
                end
            end
        end
    end
end

require 'statsample/converter/csv.rb'