CartoDB/cartodb20

View on GitHub
lib/carto/db/sanitize.rb

Summary

Maintainability
A
0 mins
Test Coverage
module Carto
  module DB
    module Sanitize
      PREFIX_REPLACEMENT = 'table_'.freeze
      SUFFIX_REPLACEMENT = '_t'.freeze
      CHARACTER_REPLACEMENT = '_'.freeze
      # See https://www.postgresql.org/docs/current/sql-syntax-lexical.html#SQL-SYNTAX-IDENTIFIERS
      MAX_IDENTIFIER_LENGTH = 63
      DISALLOWED_STARTING_CHARACTERS_REGEX = /^[^a-z]+/
      DISALLOWED_CHARACTERS_REGEX = /[^a-z|_|0-9]/
      REPEATED_UNDERSCORES_REGEX = /_{2,}/
      # PG12_DEPRECATED raster, raster_overviews and raster_columns not supported in postgis 3+
      SYSTEM_TABLE_NAMES    = %w(spatial_ref_sys geography_columns geometry_columns raster_columns raster_overviews
                                 cdb_tablemetadata geometry raster).freeze
      RESERVED_TABLE_NAMES  = %w(layergroup all public).freeze

      # See https://www.postgresql.org/docs/current/ddl-system-columns.html
      SYSTEM_COLUMN_NAMES   = %w(tableoid xmin cmin xmax cmax ctid).freeze

      # See https://www.postgresql.org/docs/current/sql-keywords-appendix.html
      RESERVED_WORDS        = %w(all analyse analyze and any array as asc asymmetric authorization binary both
                                 case cast check collate collation column concurrently constraint create cross
                                 current_catalog current_date current_role current_schema current_time
                                 current_timestamp current_user default deferrable desc distinct do else end
                                 except false fetch for foreign freeze from full grant group having ilike in
                                 initially inner intersect into is isnull join lateral leading left like limit
                                 localtime localtimestamp natural not notnull null offset on only or order outer
                                 overlaps placing primary references returning right select session_user similar
                                 some symmetric table tablesample then to trailing true union unique user using
                                 variadic verbose when where window with).freeze

      ADDITIONAL_RESERVED_COLUMNS = %w(oid ogc_fid).freeze

      REJECTED_COLUMN_NAMES = (SYSTEM_COLUMN_NAMES + ADDITIONAL_RESERVED_COLUMNS).freeze

      # FIXME we have been reserving these name in columns but I don't know the reason ¯\_(ツ)_/¯
      ADDITIONAL_WORDS      = %w(between new off old format controller action).freeze

      RESERVED_COLUMN_NAMES = SYSTEM_COLUMN_NAMES + RESERVED_WORDS + ADDITIONAL_WORDS


      def self.append_with_truncate_and_sanitize(identifier, suffix)
        suffix_length = suffix.length

        unless suffix_length < MAX_IDENTIFIER_LENGTH
          raise "'#{suffix}' is too long (#{suffix_length} >= #{MAX_IDENTIFIER_LENGTH}) for append"
        end

        truncated_identifier = if identifier.length + suffix_length > MAX_IDENTIFIER_LENGTH
                                 identifier[0..(MAX_IDENTIFIER_LENGTH - suffix_length - 1)]
                               else
                                 identifier
                               end

        sanitize_identifier("#{truncated_identifier}#{suffix}")
      end

      def self.sanitize_identifier(identifier)
        # Make lowercase
        sanitized_identifier = identifier.downcase

        # Strip of starting or ending white spaces
        sanitized_identifier = sanitized_identifier.strip

        # Remove disallowed starting characters
        if sanitized_identifier =~ DISALLOWED_STARTING_CHARACTERS_REGEX
          sanitized_identifier = PREFIX_REPLACEMENT + sanitized_identifier
        end

        # Replace disallowed characters with '_'
        sanitized_identifier = sanitized_identifier.gsub(DISALLOWED_CHARACTERS_REGEX, CHARACTER_REPLACEMENT)

        # Remove repated '_'
        sanitized_identifier = sanitized_identifier.gsub(REPEATED_UNDERSCORES_REGEX, '_')

        # Make sure it's not too long
        sanitized_identifier = sanitized_identifier[0..(MAX_IDENTIFIER_LENGTH - 1)]

        # Append _t if is a reserved word or reserved table name
        # NOTE: strictly, we don't need to avoid ADDITIONAL_WORDS for table names
        if (RESERVED_WORDS + RESERVED_TABLE_NAMES + SYSTEM_TABLE_NAMES + ADDITIONAL_WORDS).map(&:downcase).include?(sanitized_identifier)
          sanitized_identifier += SUFFIX_REPLACEMENT
        end

        sanitized_identifier
      end
    end
  end
end