EGI-FCTF/rOCCI-core

View on GitHub
lib/occi/core/parsers/text_parser.rb

Summary

Maintainability
A
0 mins
Test Coverage
module Occi
  module Core
    module Parsers
      # Contains all text-related classes and modules. This
      # module houses functionality transforming various internal
      # instances from basic text-based rendering.
      #
      # @author Boris Parak <parak@cesnet.cz>
      module Text; end
    end
  end
end

# Load class-specific parsing primitives
require File.join(File.dirname(__FILE__), 'text', 'constants')
Dir[File.join(File.dirname(__FILE__), 'text', '*.rb')].each { |file| require file.gsub('.rb', '') }

module Occi
  module Core
    module Parsers
      # Implementes components necessary to parse all required instance types
      # from `text` or `text`-like format.
      #
      # @author Boris Parak <parak@cesnet.cz>
      class TextParser < BaseParser
        # Media type constants
        URI_LIST_TYPES     = %w[text/uri-list].freeze
        HEADERS_TEXT_TYPES = %w[text/occi].freeze
        PLAIN_TEXT_TYPES   = %w[text/plain text/occi+plain].freeze
        OCCI_TEXT_TYPES    = [HEADERS_TEXT_TYPES, PLAIN_TEXT_TYPES].flatten.freeze
        MEDIA_TYPES        = [URI_LIST_TYPES, OCCI_TEXT_TYPES].flatten.freeze

        # Constants for OCCI keys
        KEY_NAME_GROUPS = %w[LOCATION_KEYS CATEGORY_KEYS LINK_KEYS ATTRIBUTE_KEYS].freeze
        LOCATION_KEYS   = %w[X-OCCI-Location X_occi_location X-occi-location Location].freeze
        CATEGORY_KEYS   = %w[Category X-OCCI-Category X_occi_category X-occi-category].freeze
        LINK_KEYS       = %w[Link X-OCCI-Link X_occi_link X-occi-link].freeze
        ATTRIBUTE_KEYS  = %w[X-OCCI-Attribute X_occi_attribute X-occi-attribute].freeze
        OCCI_KEYS       = [LOCATION_KEYS, CATEGORY_KEYS, LINK_KEYS, ATTRIBUTE_KEYS].flatten.freeze

        # Constants for header normalization
        HEADER_HTTP_PREFIX = 'HTTP_'.freeze

        # Parses entities from the given body/headers. Only kinds, mixins, and actions already declared
        # in the model are allowed.
        #
        # @param body [String] raw `String`-like body as provided by the transport protocol
        # @param headers [Hash] raw headers as provided by the transport protocol
        # @param expectation [Class] expected class of the returned instance(s)
        # @return [Set] set of instances
        def entities(body, headers, expectation = nil)
          expectation ||= Occi::Core::Entity
          logger.debug "Parsing #{expectation} from #{body.inspect} and #{headers.inspect}" if logger_debug?

          entity_parser = Text::Entity.new(model: model)
          entity = entity_parser.plain transform(body, headers)
          unless entity.is_a?(expectation)
            raise Occi::Core::Errors::ParsingError, "Entity is not of type #{expectation}"
          end

          setify(entity)
        end

        # Parses action instances from the given body/headers. Only actions already declared in the model are
        # allowed.
        #
        # @param body [String] raw `String`-like body as provided by the transport protocol
        # @param headers [Hash] raw headers as provided by the transport protocol
        # @return [Set] set of parsed instances
        def action_instances(body, headers)
          logger.debug "Parsing Occi::Core::ActionInstance from #{body.inspect} and #{headers.inspect}" if logger_debug?
          entity_parser = Text::Entity.new(model: model)
          tformed = transform(body, headers)

          action_instance = Occi::Core::ActionInstance.new(
            action: action_instance_category(entity_parser, tformed)
          )
          entity_parser.plain_attributes! tformed, action_instance.attributes
          setify(action_instance)
        end

        # Parses categories from the given body/headers and returns corresponding instances
        # from the known model.
        #
        # @param body [String] raw `String`-like body as provided by the transport protocol
        # @param headers [Hash] raw headers as provided by the transport protocol
        # @param expectation [Class] expected class of the returned instance(s)
        # @return [Set] set of instances
        def categories(body, headers, expectation = nil)
          expectation ||= Occi::Core::Category
          logger.debug "Parsing #{expectation} from #{body.inspect} and #{headers.inspect}" if logger_debug?

          cats = transform(body, headers).map do |line|
            cat = Text::Category.plain_category(line, false)
            lookup "#{cat[:scheme]}#{cat[:term]}", expectation
          end

          setify(cats)
        end

        # Transforms `body` and `headers` into an array of lines parsable by 'text/plain'
        # parser.
        #
        # @param body [String] raw `String`-like body as provided by the transport protocol
        # @param headers [Hash] raw headers as provided by the transport protocol
        # @return [Array] array with transformed lines
        def transform(body, headers)
          klass = self.class
          HEADERS_TEXT_TYPES.include?(media_type) ? klass.transform_headers(headers) : klass.transform_body(body)
        end
        private :transform

        class << self
          # Extracts categories from body and headers. For details, see `Occi::Core::Parsers::Text::Category`.
          #
          # @param body [String] raw `String`-like body as provided by the transport protocol
          # @param headers [Hash] raw headers as provided by the transport protocol
          # @param media_type [String] media type string as provided by the transport protocol
          # @param model [Occi::Core::Model] `Model`-like instance to be populated (may contain existing categories)
          # @return [Occi::Core::Model] model instance filled with parsed categories
          def model(body, headers, media_type, model)
            if logger_debug?
              logger.debug "Parsing model from #{media_type.inspect} in #{body.inspect} and #{headers.inspect}"
            end

            if HEADERS_TEXT_TYPES.include? media_type
              Text::Category.plain transform_headers(headers), model
            elsif PLAIN_TEXT_TYPES.include? media_type
              Text::Category.plain transform_body(body), model
            else
              raise Occi::Core::Errors::ParsingError, "Model cannot be parsed from #{media_type.inspect}"
            end
          end

          # Extracts URI-like locations from body and headers. For details, see `Occi::Core::Parsers::Text::Location`.
          #
          # @param body [String] raw `String`-like body as provided by the transport protocol
          # @param headers [Hash] raw headers as provided by the transport protocol
          # @param media_type [String] media type string as provided by the transport protocol
          # @return [Array] list of extracted URIs
          def locations(body, headers, media_type)
            if logger_debug?
              logger.debug "Parsing locations from #{media_type.inspect} in #{body.inspect} and #{headers.inspect}"
            end

            if URI_LIST_TYPES.include? media_type
              Text::Location.uri_list transform_body(body)
            elsif HEADERS_TEXT_TYPES.include? media_type
              Text::Location.plain transform_headers(headers)
            else
              Text::Location.plain transform_body(body)
            end
          end

          # Transforms a `String`-like body into a series of independent lines.
          # Note: Multi-line elements are not supported by this parser.
          #
          # @param body [String] multi-line body
          # @return [Array] an array of lines
          def transform_body(body)
            return [] if body.blank?
            lines = body.respond_to?(:lines) ? body.lines : body.split("\n")
            lines.map(&:strip)
          end

          # Transforms a widely varied content of headers into a unified body-like
          # format for further processing. This method and its helpers should hide
          # differences between `text/occi` and `text/plain` completely.
          #
          # @param headers [Hash] hash with raw header key-value pairs
          # @return [Array] an array of body-like lines
          def transform_headers(headers)
            logger.debug "Transforming headers #{headers.inspect}" if logger_debug?
            unify_headers(
              canonize_headers(
                normalize_headers(headers)
              )
            )
          end

          # Normalizes raw headers into a more agreeable form. This process will remove known prefixes
          # as well as blank lines or non-OCCI keys.
          #
          # @param headers [Hash] hash with raw header key-value pairs
          # @return [Hash] a cleaner hash with relevant headers
          def normalize_headers(headers)
            return {} unless headers

            headers = Hash[
              headers.map { |k, v| [k.gsub(HEADER_HTTP_PREFIX, '').capitalize, v] }
            ]                                                 # remove 'HTTP_' prefix in keys
            headers.delete_if { |_k, v| v.blank? }            # remove pairs with empty values
            headers.keep_if { |k, _v| OCCI_KEYS.include?(k) } # drop non-OCCI pairs

            logger.debug "Normalized headers #{headers.inspect}" if logger_debug?
            headers
          end

          # Canonizes normalized headers by eliminating naming differences in keys. Input for
          # this method must be already normalized by `normalize_headers`.
          #
          # @param headers [Hash] hash with normalized header key-value pairs
          # @return [Hash] canonized hash with predictable keys
          def canonize_headers(headers)
            validate_header_keys! headers.keys

            canonical = {}
            headers.each_pair do |key, value|
              pref = key_name_groups.detect { |ka| ka.include?(key) }
              canonical[pref.first] = [canonize_header_value(value)].flatten
            end

            logger.debug "Canonized headers #{canonical.inspect}" if logger_debug?
            canonical
          end

          # Attempts to canonize value from headers by looking for hidden multi-value lines usually
          # separated by commas.
          #
          # @param value [Object] value to canonize
          # @return [Object] original or canonized value
          def canonize_header_value(value)
            value = value.first if value.is_a?(Array) && value.count == 1 # ary with 1 could be hidden multi-val
            value = value.split(',').map(&:strip) if value.is_a?(String)  # multi-vals can hide as ','-separated String
            value
          end

          # Unifies canonized headers by transforming them into a body-like
          # form. Output of this method should be directly parsable as
          # `text/plain` found in body.
          #
          # @param headers [Hash] hash with canonized header key-value pairs
          # @return [Array] an array of body-like lines
          def unify_headers(headers)
            lines = []
            headers.each_pair do |k, v|
              lines << v.map { |val| "#{k}: #{val}" }
            end

            logger.debug "Unified headers #{lines.inspect}" if logger_debug?
            lines.flatten.sort
          end

          # Checks for use of multiple keys from a single key name groups. See `key_name_groups` for a list
          # of available groups. Mixed key notations will produce an error.
          #
          # @param headers_keys [Array] list of keys to check
          # @raise [Occi::Core::Errors::ParsingError] if mixed key notations detected
          def validate_header_keys!(headers_keys)
            return unless key_name_groups.any? { |elm| (headers_keys & elm).count > 1 }

            raise Occi::Core::Errors::ParsingError, "Headers #{headers_keys.inspect} contain mixed key notations"
          end

          # Returns a list of available key name groups accessible as constants by name on this class.
          #
          # @return [Array] list of available key name groups
          def key_name_groups
            KEY_NAME_GROUPS.map { |kn| const_get kn }
          end
        end

        protected

        # :nodoc:
        def action_instance_category(entity_parser, action_instance)
          cats = entity_parser.plain_categories action_instance
          action = cats.detect { |c| c.is_a? Occi::Core::Action }
          raise Occi::Core::Errors::ParsingError, 'ActionInstance does not specify action' unless action
          action
        end

        # :nodoc:
        def setify(object)
          object.is_a?(Enumerable) ? Set.new(object) : Set.new([object].compact)
        end

        # :nodoc:
        def post_initialize(args)
          super
          return if OCCI_TEXT_TYPES.include?(args[:media_type])
          raise Occi::Core::Errors::ParserError, "Media type #{args[:media_type].inspect} is not supported " \
                "by instances of this parser, only #{OCCI_TEXT_TYPES.inspect}"
        end
      end
    end
  end
end