nofxx/georuby

View on GitHub
lib/geo_ruby/ewk/ewkt_parser.rb

Summary

Maintainability
C
1 day
Test Coverage
require 'strscan'

module GeoRuby
  module SimpleFeatures
    # Raised when an error in the EWKT string is detected
    class EWKTFormatError < StandardError
    end

    # Parses EWKT strings and notifies of events (such as the beginning of the definition of geometry, the value of the SRID...) the factory passed as argument to the constructor.
    #
    # =Example
    # factory = GeometryFactory::new
    # ewkt_parser = EWKTParser::new(factory)
    # ewkt_parser.parse(<EWKT String>)
    # geometry = @factory.geometry
    #
    # You can also use directly the static method Geometry.from_ewkt
    class EWKTParser
      def initialize(factory)
        @factory = factory
        @parse_options = {
          'POINT' => method(:parse_point),
          'LINESTRING' => method(:parse_line_string),
          'POLYGON' => method(:parse_polygon),
          'MULTIPOINT' => method(:parse_multi_point),
          'MULTILINESTRING' => method(:parse_multi_line_string),
          'MULTIPOLYGON' => method(:parse_multi_polygon),
          'GEOMETRYCOLLECTION' => method(:parse_geometry_collection)
        }
      end

      # Parses the ewkt string passed as argument and notifies the factory of events
      def parse(ewkt)
        @factory.reset
        @tokenizer_structure = TokenizerStructure.new(ewkt)
        @with_z = false
        @with_m = false
        @is_3dm = false
        parse_geometry(true)
        @srid = nil
      end

      private

      def parse_geometry(srid_allowed)
        token = @tokenizer_structure.get_next_token
        if token == 'SRID'
          # SRID present
          fail EWKTFormatError.new('SRID not allowed at this position') unless srid_allowed
          if @tokenizer_structure.get_next_token != '='
            fail EWKTFormatError.new('Invalid SRID expression')
          else
            @srid = @tokenizer_structure.get_next_token.to_i
            fail EWKTFormatError.new('Invalid SRID separator') if @tokenizer_structure.get_next_token != ';'
            geom_type = @tokenizer_structure.get_next_token
          end

        else
          # to manage multi geometries : the srid is not present in sub_geometries, therefore we take the srid of the parent ; if it is the root, we take the default srid
          @srid = @srid || DEFAULT_SRID
          geom_type = token
        end

        if geom_type[-1] == 'M'
          @is_3dm = true
          @with_m = true
          geom_type.chop! # remove the M
        end

        if @parse_options.key?(geom_type)
          @parse_options[geom_type].call
        else
          fail EWKTFormatError.new("Urecognized geometry type: #{geom_type}")
        end
      end

      def parse_geometry_collection
        if @tokenizer_structure.get_next_token != '('
          fail EWKTFormatError.new('Invalid GeometryCollection')
        end

        @factory.begin_geometry(GeometryCollection, @srid)

        token = ''
        while token != ')'
          parse_geometry(false)
          token = @tokenizer_structure.get_next_token
          if token.nil?
            fail EWKTFormatError.new('EWKT string not correctly terminated')
          end
        end

        @factory.end_geometry(@with_z, @with_m)
      end

      def parse_multi_polygon
        if @tokenizer_structure.get_next_token != '('
          fail EWKTFormatError.new('Invalid MultiLineString')
        end

        @factory.begin_geometry(MultiPolygon, @srid)
        token = ''
        while token != ')'
          parse_polygon
          token = @tokenizer_structure.get_next_token
          if token.nil?
            fail EWKTFormatError.new('EWKT string not correctly terminated')
          end
        end

        @factory.end_geometry(@with_z, @with_m)
      end

      def parse_multi_line_string
        if @tokenizer_structure.get_next_token != '('
          fail EWKTFormatError.new('Invalid MultiLineString')
        end

        @factory.begin_geometry(MultiLineString, @srid)

        token = ''
        while token != ')'
          parse_line_string
          token = @tokenizer_structure.get_next_token
          if token.nil?
            fail EWKTFormatError.new('EWKT string not correctly terminated')
          end
        end

        @factory.end_geometry(@with_z, @with_m)
      end

      def parse_polygon
        if @tokenizer_structure.get_next_token != '('
          fail EWKTFormatError.new('Invalid Polygon')
        end

        @factory.begin_geometry(Polygon, @srid)

        token = ''
        while token != ')'
          parse_linear_ring
          token = @tokenizer_structure.get_next_token
          if token.nil?
            fail EWKTFormatError.new('EWKT string not correctly terminated')
          end
        end

        @factory.end_geometry(@with_z, @with_m)
      end

      # must support the PostGIS form and the one in the specification
      def parse_multi_point
        if @tokenizer_structure.get_next_token != '('
          fail EWKTFormatError.new('Invalid MultiPoint')
        end

        token = @tokenizer_structure.check_next_token
        if token == '('
          # specification
          @factory.begin_geometry(MultiPoint, @srid)

          token = ''
          while token != ')'
            parse_point
            token = @tokenizer_structure.get_next_token
            if token.nil?
              fail EWKTFormatError.new('EWKT string not correctly terminated')
            end
          end

          @factory.end_geometry(@with_z, @with_m)
        else
          # postgis
          parse_point_list(MultiPoint)
        end
      end

      def parse_linear_ring
        if @tokenizer_structure.get_next_token != '('
          fail EWKTFormatError.new('Invalid Linear ring')
        end

        parse_point_list(LinearRing)
      end

      def parse_line_string
        if @tokenizer_structure.get_next_token != '('
          fail EWKTFormatError.new('Invalid Line string')
        end

        parse_point_list(LineString)
      end

      # used to parse line_strings and linear_rings and the PostGIS form of multi_points
      def parse_point_list(geometry_type)
        @factory.begin_geometry(geometry_type, @srid)

        token = ''
        while token != ')'
          @factory.begin_geometry(Point, @srid)
          token = parse_coords
          if token.nil?
            fail EWKTFormatError.new('EWKT string not correctly terminated')
          end
          @factory.end_geometry(@with_z, @with_m)
        end

        @factory.end_geometry(@with_z, @with_m)
      end

      def parse_point
        if @tokenizer_structure.get_next_token != '('
          fail EWKTFormatError.new('Invalid Point')
        end

        @factory.begin_geometry(Point, @srid)

        token = parse_coords

        if token != ')'
          fail EWKTFormatError.new('EWKT string not correctly terminated')
        end

        @factory.end_geometry(@with_z, @with_m)
      end

      def parse_coords
        coords = []
        x = @tokenizer_structure.get_next_token
        y = @tokenizer_structure.get_next_token

        if x.nil? || y.nil?
          fail EWKTFormatError.new('Bad Point format')
        end

        if @is_3dm
          m = @tokenizer_structure.get_next_token

          if m.nil? || m == ',' || m == ')'
            fail EWKTFormatError.new('No M dimension found')
          else
            @factory.add_point_x_y_m(x.to_f, y.to_f, m.to_f)
            @tokenizer_structure.get_next_token
          end
        else
          z = @tokenizer_structure.get_next_token

          if z.nil?
            fail EWKTFormatError.new('EWKT string not correctly terminated')
          end

          if z == ',' || z == ')'
            # 2D : no z no m
            @factory.add_point_x_y(x.to_f, y.to_f)
            z
          else
            m = @tokenizer_structure.get_next_token
            if m.nil?
              fail EWKTFormatError.new('EWKT string not correctly terminated')
            end

            if m == ',' || m == ')'
              # 3Dz : no m
              @with_z = true
              @factory.add_point_x_y_z(x.to_f, y.to_f, z.to_f)
              m
            else
              # 4D
              @with_z = true
              @with_m = true
              @factory.add_point_x_y_z_m(x.to_f, y.to_f, z.to_f, m.to_f)
              @tokenizer_structure.get_next_token
            end
          end
        end
      end
    end

    class TokenizerStructure
      def initialize(ewkt)
        @ewkt = ewkt
        @scanner = StringScanner.new(ewkt)
        @regex = /\s*([\w.-]+)s*/
      end

      def get_next_token
        if @scanner.scan(@regex).nil?
          if @scanner.eos?
            nil
          else
            ch = @scanner.getch
            while ch == ' '
              ch = @scanner.getch
            end
            ch
          end
        else
          @scanner[1]
        end
      end

      def check_next_token
        check = @scanner.check(@regex)
        if check.nil?
          if @scanner.eos?
            nil
          else
            pos = @scanner.pos
            while @ewkt[pos].chr == ' '
              pos += 1
            end
            @ewkt[pos].chr
          end
        else
          check
        end
      end
    end
  end
end