SysMO-DB/seek

View on GitHub
lib/seek/data/spreadsheet_explorer_representation.rb

Summary

Maintainability
B
6 hrs
Test Coverage
require 'simple-spreadsheet-extractor'

module Seek
  module Data
    module SpreadsheetExplorerRepresentation

      include SysMODB::SpreadsheetExtractor

      MIN_ROWS = 35
      MIN_COLS = 10

      def spreadsheet_annotations
        content_blob.worksheets.collect {|w| w.cell_ranges.collect {|c| c.annotations}}.flatten
      end

      #Return the data file's spreadsheet
      #If it doesn't exist yet, it gets created
      def spreadsheet
        if content_blob.is_extractable_spreadsheet?
          workbook = parse_spreadsheet_xml(spreadsheet_xml)
          if content_blob.worksheets.empty?
            workbook.sheets.each_with_index do |sheet, sheet_number|
              content_blob.worksheets << Worksheet.create(:sheet_number => sheet_number, :last_row => sheet.last_row, :last_column => sheet.last_col)
            end
            content_blob.save
          end
          return workbook
        else
          return nil
        end
      end

      #Return the data file's spreadsheet XML
      #If it doesn't exist yet, it gets created
      def spreadsheet_xml
        if content_blob.is_extractable_spreadsheet?
          Rails.cache.fetch("#{content_blob.cache_key}-ss-xml") do
            spreadsheet_to_xml(open(content_blob.filepath))
          end
        else
          nil
        end
      end

      private

      #Takes in a string containing the xml representation of a spreadsheet and returns
      # a Workbook object
      def parse_spreadsheet_xml(spreadsheet_xml)
        workbook = Workbook.new

        doc = LibXML::XML::Parser.string(spreadsheet_xml).parse
        doc.root.namespaces.default_prefix="ss"

        doc.find("//ss:style").each do |s|
          style = Style.new(s["id"])
          s.children.each do |a|
            style.attributes[a.name] = a.content unless (a.name == "text")
          end
          workbook.styles[style.name] = style
        end


        doc.find("//ss:sheet").each do |s|
          unless s["hidden"] == "true" || s["very_hidden"] == "true"
            sheet = Sheet.new(s["name"])
            workbook.sheets << sheet
            #Load into memory
            min_rows = MIN_ROWS
            min_cols = MIN_COLS
            #Grab columns
            columns = s.find("./ss:columns/ss:column")
            col_index = 0
            #Add columns
            columns.each do |c|
              col_index = c["index"].to_i
              col = Column.new(col_index, c["width"])
              sheet.columns << col
            end
            #Pad columns (so it's at least 10 cols wide)
            if col_index < min_cols
              for i in ((col_index+1)..min_cols)
                col = Column.new(i, 2964.to_s)
                sheet.columns << col
              end
            else
              min_cols = col_index
            end
            #Grab rows
            rows = s.find("./ss:rows/ss:row")
            row_index = 0
            #Add rows
            rows.each do |r|
              row_index = r["index"].to_i
              row = Row.new(row_index, r["height"])
              sheet.rows[row_index] = row
              #Add cells
              r.find("./ss:cell").each do |c|
                col_index = c["column"].to_i
                content = c.content
                cell = Cell.new(content, row_index, col_index, c["formula"], c["style"])
                row.cells[col_index] = cell
              end
            end
            #Pad rows
            if row_index < min_rows
              for i in ((row_index + 1)..min_rows)
                row = Row.new(i, 1000.to_s)
                sheet.rows << row
              end
              min_rows = MIN_ROWS
            else
              min_rows = row_index
            end
            sheet.last_row = min_rows
            sheet.last_col = min_cols
          end
        end

        workbook
      end
      #Turns a numeric column ID into an Excel letter representation
      #eg. 1 > A, 10 > J, 28 > AB etc.
      def to_alpha(col)
        result = ""
        col = col-1
        while (col >= 0) do
          result = ((col % 26) + 65).chr + result
          col = (col / 26) - 1
        end
        result
      end

      #Does the opposite of the above
      #eg. A > 1, J > 10, AB > 28 etc.
      def from_alpha(col)
        result = 0
        col = col.split(//)
        col.each_with_index do |c,i|
          result += (c.ord - 64) * (26 ** (col.length - (i+1)))
        end
        result
      end

      #the cache file for a given feed url
      def cached_spreadsheet_path
        File.join(cached_spreadsheet_dir,"spreadsheet_blob_#{Rails.env}_#{content_blob.id.to_s}.xml")
      end

      #the directory used to contain the cached spreadsheets
      def cached_spreadsheet_dir
        if Rails.env=="test"
          dir = File.join(Dir.tmpdir,"seek-cache","spreadsheet-xml")
        else
          dir = File.join(Rails.root,"tmp","cache","spreadsheet-xml")
        end
        FileUtils.mkdir_p dir if !File.exists?(dir)
        dir
      end

      #Cache the data file's spreadsheet XML, and returns it
      def cache_spreadsheet
        xml = spreadsheet_to_xml(open(content_blob.filepath))
        File.open(cached_spreadsheet_path, "w") {|f| f.write(xml)}
        return xml
      end

      class Workbook

        attr_accessor :sheets
        attr_accessor :styles
        attr_accessor :annotations

        def initialize
          @sheets = []
          @styles = {}
          @annotations = []
        end

        def [] x
          sheet(x)
        end

        def sheet(x)
          if x.class.name == "String"
            @sheets.select {|s| s.name == x}.first
          elsif x.class.name == "Fixnum"
            @sheets[x]
          end
        end

      end

      class Style
        attr_accessor :name
        attr_accessor :attributes

        def initialize(name)
          @name = name
          @attributes = {}
        end

        def [] attribute
          @attributes[attribute]
        end

      end


      class Sheet
        attr_accessor :rows
        attr_accessor :columns
        attr_accessor :name
        attr_accessor :last_row
        attr_accessor :last_col

        def initialize(n=nil, h=0)
          @rows = []
          @columns = []
          @name = n
          @hidden = h
        end

        def row(x)
          @rows[x]
        end

        def [] x
          @rows[x]
        end

        def cell(x,y)
          @rows[x].cells[y]
        end

        def hidden?
          @hidden == 1
        end

        def very_hidden?
          @hidden == 2
        end

        #Rows with content
        def actual_rows
          @rows.select {|r| !r.nil?}
        end
      end

      class Column
        attr_accessor :index
        attr_accessor :width

        def initialize(c=nil, w=nil)
          @index = c
          @width = w unless w.blank?
        end

      end

      class Row
        attr_accessor :cells
        attr_accessor :index
        attr_accessor :height

        def initialize(r=nil, h=nil)
          @cells = []
          @index = r
          @height = h unless h.blank?
        end

        def cell(x)
          @cells[x]
        end

        def [] x
          @cells[x]
        end

        #Cells with content (present in XML - can still be blank with styles)
        def actual_cells
          @cells.select {|c| !c.nil?}
        end
      end

      class Cell
        attr_accessor :value
        attr_accessor :row
        attr_accessor :column
        attr_accessor :formula
        attr_accessor :style

        def initialize(v=nil, r=nil, c=nil, f=nil, s=nil)
          @value = v
          @row = r
          @column = c
          @formula = f
          @style = s unless s.blank?
        end

        def pretty_value
          if @value.class == Float
            return @value.round(3)
          else
            return @value
          end
        end
      end

    end
  end

end