ronin-rb/ronin-support

View on GitHub
lib/ronin/support/encoding/c.rb

Summary

Maintainability
A
1 hr
Test Coverage
# frozen_string_literal: true
#
# Copyright (c) 2006-2023 Hal Brodigan (postmodern.mod3 at gmail.com)
#
# ronin-support is free software: you can redistribute it and/or modify
# it under the terms of the GNU Lesser General Public License as published
# by the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# ronin-support is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with ronin-support.  If not, see <https://www.gnu.org/licenses/>.
#

require 'strscan'

module Ronin
  module Support
    class Encoding < ::Encoding
      #
      # Contains methods for encoding/decoding escaping/unescaping C data.
      #
      # ## Core-Ext Methods
      #
      # * {Integer#c_escape}
      # * {Integer#c_encode}
      # * {String#c_escape}
      # * {String#c_unescape}
      # * {String#c_encode}
      # * {String#c_string}
      # * {String#c_unquote}
      #
      # @api public
      #
      module C
        #
        # Encodes a byte as a C escaped String.
        #
        # @param [Integer] byte
        #   The byte value to encode.
        #
        # @return [String]
        #   The escaped C character.
        #
        # @example
        #   Encoding::C.encode_byte(0x41)
        #   # => "\\x41"
        #   Encoding::C.encode_byte(0x100)
        #   # => "\\u1000"
        #   Encoding::C.encode_byte(0x10000)
        #   # => "\\U000100000"
        #
        def self.encode_byte(byte)
          if byte >= 0x00 && byte <= 0xff
            "\\x%.2x" % byte
          elsif byte >= 0x100 && byte <= 0xffff
            "\\u%.4x" % byte
          elsif byte >= 0x10000
            "\\U%.8x" % byte
          else
            raise(RangeError,"#{byte.inspect} out of char range")
          end
        end

        # Special C bytes and their escaped Strings.
        ESCAPE_BYTES = {
          0x00 => '\0',
          0x07 => '\a',
          0x08 => '\b',
          0x09 => '\t',
          0x0a => '\n',
          0x0b => '\v',
          0x0c => '\f',
          0x0d => '\r',
          0x22 => '\"',
          0x1B => '\e',
          0x5c => '\\\\'
        }

        #
        # Escapes a byte as a C character.
        #
        # @param [Integer] byte
        #   The byte value to escape.
        #
        # @return [String]
        #   The escaped C character.
        #
        # @raise [RangeError]
        #   The integer value is negative.
        #
        # @example
        #   Encoding::C.escape_byte(0x41)
        #   # => "A"
        #   Encoding::C.escape_byte(0x22)
        #   # => "\\\""
        #   Encoding::C.escape_byte(0x7f)
        #   # => "\\x7F"
        #
        # @example Escaping unicode characters:
        #   Encoding::C.escape_byte(0xffff)
        #   # => "\\uFFFF"
        #   Encoding::C.escape_byte(0x10000)
        #   # => "\\U000100000"
        #
        def self.escape_byte(byte)
          if byte >= 0x00 && byte <= 0xff
            ESCAPE_BYTES.fetch(byte) do
              if byte >= 0x20 && byte <= 0x7e
                byte.chr
              else
                encode_byte(byte)
              end
            end
          else
            encode_byte(byte)
          end
        end

        #
        # Encodes each character of the given data as C escaped characters.
        #
        # @param [String] data
        #   The given data to encode.
        #
        # @return [String]
        #   The C encoded String.
        #
        # @example
        #   Encoding::C.encode("hello")
        #   # => "\\x68\\x65\\x6c\\x6c\\x6f"
        #
        def self.encode(data)
          encoded = String.new

          if data.valid_encoding?
            data.each_codepoint do |codepoint|
              encoded << encode_byte(codepoint)
            end
          else
            data.each_byte do |byte|
              encoded << encode_byte(byte)
            end
          end

          return encoded
        end

        #
        # Decodes the C encoded data.
        #
        # @param [String] data
        #   The given C data to decode.
        #
        # @return [String]
        #   The decoded data.
        #
        # @see unescape
        #
        def self.decode(data)
          unescape(data)
        end

        #
        # Escapes the C encoded data.
        #
        # @param [String] data
        #   The data to C escape.
        #
        # @return [String]
        #   The C escaped String.
        #
        def self.escape(data)
          escaped = String.new

          if data.valid_encoding?
            data.each_codepoint do |codepoint|
              escaped << escape_byte(codepoint)
            end
          else
            data.each_byte do |byte|
              escaped << escape_byte(byte)
            end
          end

          return escaped
        end

        # C characters that must be back-slashed.
        BACKSLASHED_CHARS = {
          '0'  => "\0",
          'a'  => "\a",
          'b'  => "\b",
          'e'  => "\e",
          't'  => "\t",
          'n'  => "\n",
          'v'  => "\v",
          'f'  => "\f",
          'r'  => "\r"
        }

        #
        # Unescapes the given C escaped data.
        #
        # @param [String] data
        #   The given C escaped data.
        #
        # @return [String]
        #   The unescaped C String.
        #
        # @example
        #   Encoding::C.unescape("\\x68\\x65\\x6c\\x6c\\x6f\\x20\\x77\\x6f\\x72\\x6c\\x64")
        #   # => "hello world"
        #
        def self.unescape(data)
          unescaped = String.new(encoding: Encoding::UTF_8)
          scanner   = StringScanner.new(data)

          until scanner.eos?
            unescaped << case (char = scanner.getch)
                         when "\\" # backslash
                           if (hex_char        = scanner.scan(/x[0-9a-fA-F]{1,2}/)) # \xXX
                             hex_char[1..].to_i(16).chr
                           elsif (hex_char     = scanner.scan(/u[0-9a-fA-F]{4,8}/)) # \u..
                             hex_char[1..].to_i(16).chr(Encoding::UTF_8)
                           elsif (octal_char   = scanner.scan(/[0-7]{1,3}/)) # \N, \NN, or \NNN
                             octal_char.to_i(8).chr
                           elsif (special_char = scanner.getch) # \[A-Za-z]
                             BACKSLASHED_CHARS.fetch(special_char,special_char)
                           end
                         else
                           char
                         end
          end

          return unescaped
        end

        #
        # Escapes and quotes the given data as a C string.
        #
        # @param [String] data
        #   The given data to escape and quote.
        #
        # @return [String]
        #   The quoted C string.
        #
        # @example
        #   Encoding::C.quote("hello\nworld\n")
        #   # => "\"hello\\nworld\\n\""
        #
        def self.quote(data)
          "\"#{escape(data)}\""
        end

        #
        # Unquotes and unescapes the given C string.
        #
        # @param [String] data
        #   The given C string.
        #
        # @return [String]
        #   The un-quoted String if the String begins and ends with quotes, or
        #   the same String if it is not quoted.
        #
        # @example
        #   Encoding::C.unquote("\"hello\\nworld\"")
        #   # => "hello\nworld"
        #
        def self.unquote(data)
          if ((data[0] == '"' && data[-1] == '"') ||
              (data[0] == "'" && data[-1] == "'"))
            unescape(data[1..-2])
          else
            data
          end
        end
      end
    end
  end
end

require 'ronin/support/encoding/c/core_ext'