jonas-lantto/table_transform

View on GitHub
lib/table_transform/table.rb

Summary

Maintainability
B
5 hrs
Test Coverage
require 'csv'
require_relative 'properties'

module TableTransform
  module Util
    def self.get_col_index(col_name, data)
      data[col_name] || (raise "No column with name '#{col_name}' exists")
    end
  end

  class Table
    attr_reader :formulas
    attr_reader :table_properties
    attr_reader :column_properties

    def self.create_from_file(file_name, sep = ',')
      rows = CSV.read(file_name, { :col_sep => sep })
      raise "'#{file_name}' contains no data" if rows.empty?

      Table.new(rows)
    end

    def self.create_empty(header, table_properties = {})
      raise 'Table header need to be array' unless header.is_a? Array
      raise 'Table, No header defined' if header.empty?
      Table.new([header], table_properties)
    end

    # @throws if column names not unique
    # @throws if column size for each row match
    def initialize(rows, table_properties = {})
      raise 'Table required to have at least a header row' if (rows.nil? or rows.empty?)

      @data_rows = rows.clone
      header = @data_rows.shift
      @column_indexes = create_column_name_binding(header)
      @formulas = {}
      @table_properties = TableProperties.new(table_properties)
      @column_properties = Hash.new{|_hash, key| raise "No column with name '#{key}' exists"}
      create_column_properties(*header,{})

      validate_header_uniqueness(header)
      validate_column_size
    end

    # Sets metadata for given columns
    # Example:
    #  set_metadata('Col1', {format: '#,##0'})
    def set_metadata(*columns, metadata)
      warn 'set_metadata is deprecated. Use column_properties[] instead'
      columns.each{|c| @column_properties[c].reset(metadata)}
    end

    # Returns meta data as Hash with header name as key
    def metadata
      warn 'metadata is deprecated. Use column_properties[] instead'
      @column_properties.inject({}){|res, (k, v)| res.merge!({k => v.to_h})}
    end

    def add_column_formula(column, formula, column_properties = {})
      add_column(column, column_properties){nil}
      @formulas[column] = formula
      self # self chaining
    end

    def << (hash_values)
      @data_rows << create_row(hash_values)
      self
    end

    # Add two tables
    # @throws if header or properties do not match
    def +(table)
      t2 = table.to_a
      t2_header = t2.shift
      raise 'Tables cannot be added due to header mismatch' unless @column_properties.keys == t2_header
      raise 'Tables cannot be added due to column properties mismatch' unless column_properties_eql? table.column_properties
      raise 'Tables cannot be added due to table properties mismatch' unless @table_properties.to_h == table.table_properties.to_h
      TableTransform::Table.new(self.to_a + t2)
    end

    def each_row
      @data_rows.each{|x|
        yield Row.new(@column_indexes, x)
      }
    end

    # @returns array of data arrays including header row
    def to_a
      res = @data_rows.clone
      res.unshift @column_properties.keys.clone
    end

    # @returns new table with specified columns specified in given header
    def extract(header)
      validate_column_exist(*header)
      selected_cols = @column_indexes.values_at(*header)
      t = Table.new( @data_rows.inject([header]) {|res, row| (res << row.values_at(*selected_cols))}, @table_properties.to_h )
      header.each{|h| t.column_properties[h].reset(@column_properties[h].to_h)}
      t.formulas = header.zip(@formulas.values_at(*header)).to_h
      t
    end

    # @returns new table with rows that match given block
    def filter
      table_prop = @table_properties.to_h
      col_prop   = @column_properties.keys.clone
      t = Table.new( (@data_rows.select {|row| yield Row.new(@column_indexes, row)}.unshift col_prop), table_prop )
      t.formulas = @formulas.clone
      t.column_properties = @column_properties.clone
      t
    end

    #adds a column with given name to the far right of the table
    #@throws if given column name already exists
    def add_column(name, column_properties = {})
      validate_column_absence(name)
      create_column_properties(name, column_properties)
      @data_rows.each{|x|
        x << (yield Row.new(@column_indexes, x))
      }
      @column_indexes[name] = @column_indexes.size
      self # enable chaining
    end

    def change_column(name)
      raise "Column with formula('#{name}') cannot be changed" if @formulas[name]
      index = Util::get_col_index(name, @column_indexes)
      @data_rows.each{|r|
        r[index] = yield Row.new(@column_indexes, r)
      }

      self # enable chaining
    end

    def delete_column(*names)
      validate_column_exist(*names)
      names.each{|n|
        @column_properties.delete(n)
        @formulas.delete(n)
      }

      selected_cols = @column_indexes.values_at(*@column_properties.keys)
      @data_rows.map!{|row| row.values_at(*selected_cols)}

      @column_indexes = create_column_name_binding(@column_properties.keys)
      self
    end

    def rename_column(from, to)
      validate_column_exist(from)
      validate_column_absence(to)

      @column_properties = @column_properties.map{|k,v| [k == from ? to : k, v] }.to_h
      @formulas = @formulas.map{|k,v| [k == from ? to : k, v] }.to_h
      @column_indexes = create_column_name_binding(@column_properties.keys)
    end

    # Table row
    # Columns within row can be referenced by name, e.g. row['name']
    class Row

      def initialize(cols, row)
        @cols = cols #column name and index in row
        @row  = row  #Specific row data
      end

      # @returns row value with column name or empty string if it does not exist
      # @throws exception if column name does not exist
      def [](column_name)
        index = Util::get_col_index(column_name, @cols)
        Cell.new @row[ index ].to_s || ''
      end

    end

    # Cell within Table::Row
    class Cell < String
      # @returns true if this cell includes any of the given values in list
      def include_any?(list)
        list.inject(false){|res, x| res | (self.include? x)}
      end
    end

    # Table properties
    class TableProperties < TableTransform::Properties
      def validate(properties)
        super
        properties.each { |k, v|
          case k
            when :name
              raise "Table property '#{k}' expected to be a non-empty string" unless v.is_a?(String) && !v.empty?
            when :auto_filter
              raise "Table property '#{k}' expected to be a boolean" unless !!v == v
            else
              raise "Table property unknown '#{k}'"
          end
        }
      end
    end

    # Column properties
    class ColumnProperties < TableTransform::Properties
      def validate(properties)
        super
        properties.each { |k, v|
          case k
            when :format
              raise "Column property 'format' expected to be a non-empty string" unless v.is_a?(String) && !v.empty?
            else
              raise "Unknown column property '#{k}'"
          end
        }
      end
    end


    protected
      attr_writer :formulas
      attr_writer :column_properties

    private
      def create_column_name_binding(header_row)
        header_row.map.with_index{ |x, i| [x, i] }.to_h
      end

      def create_row(hash_values)
        @column_properties.keys.inject([]) { |row, col| row << hash_values.fetch(col){raise "Value for column '#{col}' could not be found"} }
      end

      def create_column_properties(*header, properties)
        header.each{|key| @column_properties.store(key, TableTransform::Table::ColumnProperties.new(properties))}
      end

      def column_properties_eql?(column_properties)
          return false unless @column_properties.size == column_properties.size
          @column_properties.each{|key, prop| return false unless prop.to_h == column_properties[key].to_h}
      end

      # @throws unless all header names are unique
      def validate_header_uniqueness(header)
        dup = header.select{ |e| header.count(e) > 1 }.uniq
        raise "Column(s) not unique: #{dup.map{|x| "'#{x}'"}.join(', ')}" if dup.size > 0
      end

      # @throws unless all rows have same number of elements
      def validate_column_size
        @data_rows.each_with_index {|x, index| raise "Column size mismatch. On row #{index+1}. Size #{x.size} expected to be #{@column_properties.size}" if @column_properties.size != x.size}
      end

      def validate_column_exist(*names)
        diff = names - @column_properties.keys
        raise raise "No column with name '#{diff.first}' exists" if diff.size > 0
      end

      def validate_column_absence(name)
        raise "Column '#{name}' already exists" if @column_properties.keys.include?(name)
      end

  end
end