SpeciesFileGroup/taxonworks

View on GitHub
lib/export/csv.rb

Summary

Maintainability
A
0 mins
Test Coverage
# Code that translates scopes into downloadable tab-delimited CSV. Dependant on Postgresql.
#
module ::Export::CSV

  #   translate a scope into a CSV table, with optional tweaks to the data
  #
  # This is a very nice reference for future consideration:
  #   http://collectiveidea.com/blog/archives/2015/03/05/optimizing-rails-for-memory-usage-part-3-pluck-and-database-laziness
  # @param [Scope] scope
  # @param [Array] exclude_columns
  #     strings
  # @param [Array] header_converters
  # @param [Boolean] trim_rows
  # @param [Boolean] trim_columns
  # @return [CSV]
  def self.generate_csv(scope, exclude_columns: [], header_converters: [], trim_rows: false, trim_columns: false, column_order: [])

    column_names = scope.columns_hash.keys
    column_names = sort_column_headers(column_names, column_order.map(&:to_s)) if column_order.any?

    h = ::CSV.new(column_names.join(','), header_converters:, headers: true)
    h.read

    headers = ::CSV::Row.new(h.headers, h.headers, true).headers

    tbl = headers.map { |h| [h] }

    # Pluck rows is from postgresql_cursor gem
    #puts Rainbow('preparing data: ' + (Benchmark.measure do
    scope.pluck_rows(*column_names).each do |o|
      o.each_with_index do |value, index|
        tbl[index] << Utilities::Strings.sanitize_for_csv(value)
      end
      # If keys are not deterministic: .attributes.values_at(*column_names).collect{|v| Utilities::Strings.sanitize_for_csv(v) }
    end
    # end).to_s).yellow

    if !exclude_columns.empty?
      # puts Rainbow('deleting columns: ' + (Benchmark.measure {
      delete_columns(tbl, exclude_columns)
      # }).to_s).yellow
    end

    if trim_columns
      # puts Rainbow('trimming columns: ' + (Benchmark.measure {
      trim_columns(tbl)
      # }).to_s).yellow
    end

    # CSV::Converters are only available on read, not generate, so we can't use them here.
    output = StringIO.new
    # puts Rainbow('generating CSV: '+ (Benchmark.measure do
    (0..tbl.first.length-1).each do |row_index|
      row = tbl.collect { |c| c[row_index] }
      if trim_rows
        next unless row.detect { |c| c.present? } # Minimize checks by exiting ASAP.  Could benchmark vs. row.compact.blank?
      end
      output.puts ::CSV.generate_line(row, col_sep: "\t", encoding: Encoding::UTF_8)
    end
    # end).to_s).yellow

    output.string
  end

  # @param table [Array]
  # @param columns [Array]
  # @return [Array]
  #   delete the specified columns
  def self.delete_columns(tbl, columns = [])
    columns.each do |col|
      headers = tbl.collect { |c| c.first }
      if index = headers.index(col.to_s)
        tbl.delete_at(index)
      end
    end
    tbl
  end

  # @return [Array]
  # @param table [Array]
  #   remove columns without any non-#blank? values (of course doing this in the scope is better!)
  #   this is very slow, use a proper scope, and exclude_columns: [] options instead
  def self.trim_columns(table)
    to_delete = []

    table.each_with_index do |col, index|
      to_delete << index unless col.inject { |_, c| break true if c.present? }
    end

    to_delete.each_with_index { |x, i| table.delete_at(x-i) }
    table
  end

  # Sort order for columns
  #   columns not in column order at added at the the *front* of the file
  def self.sort_column_headers(column_names = [], column_order = [])
    sorted = []
    unsorted = []
    column_names.each do |n|
      if p = column_order.index(n)
        sorted[p] = n
      else
        unsorted.push n
      end
    end

    unsorted + sorted
  end

end