opal/corelib/string/unpack.rb

Summary

Maintainability
A
2 hrs
Test Coverage
# backtick_javascript: true

require 'base64'
require 'corelib/pack_unpack/format_string_parser'

class ::String
  %x{
    // Format Parser
    var eachDirectiveAndCount = Opal.PackUnpack.eachDirectiveAndCount;

    function flattenArray(callback) {
      return function(data) {
        var array = callback(data);
        return #{`array`.flatten};
      }
    }

    function mapChunksToWords(callback) {
      return function(data) {
        var chunks = callback(data);

        return chunks.map(function(chunk) {
          return chunk.reverse().reduce(function(result, singleByte) {
            return result * 256 + singleByte;
          }, 0);
        });
      }
    }

    function chunkBy(chunkSize, callback) {
      return function(data) {
        var array = callback(data),
            chunks = [],
            chunksCount = (array.length / chunkSize);

        for (var i = 0; i < chunksCount; i++) {
          var chunk = array.splice(0, chunkSize);
          if (chunk.length === chunkSize) {
            chunks.push(chunk);
          }
        }

        return chunks;
      }
    }

    function toNByteSigned(bytesCount, callback) {
      return function(data) {
        var unsignedBits = callback(data),
            bitsCount = bytesCount * 8,
            limit = Math.pow(2, bitsCount);

        return unsignedBits.map(function(n) {
          if (n >= limit / 2) {
            n -= limit;
          }

          return n;
        });
      }
    }

    function bytesToAsciiChars(callback) {
      return function(data) {
        var bytes = callback(data);

        return bytes.map(function(singleByte) {
          return String.fromCharCode(singleByte);
        });
      }
    }

    function joinChars(callback) {
      return function(data) {
        var chars = callback(data);
        return chars.join('');
      }
    }

    function wrapIntoArray(callback) {
      return function(data) {
        var object = callback(data);
        return [object];
      }
    }

    function filterTrailingChars(chars) {
      var charCodesToFilter = chars.map(function(s) { return s.charCodeAt(0); });

      return function(callback) {
        return function(data) {
          var charCodes = callback(data);

          while (charCodesToFilter.indexOf(charCodes[charCodes.length - 1]) !== -1) {
            charCodes = charCodes.slice(0, charCodes.length - 1);
          }

          return charCodes;
        }
      }
    }

    var filterTrailingZerosAndSpaces = filterTrailingChars(["\u0000", " "]);

    function invertChunks(callback) {
      return function(data) {
        var chunks = callback(data);

        return chunks.map(function(chunk) {
          return chunk.reverse();
        });
      }
    }

    function uudecode(callback) {
      return function(data) {
        var bytes = callback(data);

        var stop = false;
        var i = 0, length = 0;

        var result = [];

        do {
          if (i < bytes.length) {
            var n = bytes[i] - 32 & 0x3F;

            ++i;

            if (bytes[i] === 10) {
              continue;
            }

            if (n > 45) {
              return '';
            }

            length += n;

            while (n > 0) {
              var c1 = bytes[i];
              var c2 = bytes[i + 1];
              var c3 = bytes[i + 2];
              var c4 = bytes[i + 3];

              var b1 = (c1 - 32 & 0x3F) << 2 | (c2 - 32 & 0x3F) >> 4;
              var b2 = (c2 - 32 & 0x3F) << 4 | (c3 - 32 & 0x3F) >> 2;
              var b3 = (c3 - 32 & 0x3F) << 6 | c4 - 32 & 0x3F;

              result.push(b1 & 0xFF);
              result.push(b2 & 0xFF);
              result.push(b3 & 0xFF);

              i += 4;
              n -= 3;
            }

            ++i;
          } else {
            break;
          }
        } while (true);

        return result.slice(0, length);
      }
    }

    function toBits(callback) {
      return function(data) {
        var bytes = callback(data);

        var bits = bytes.map(function(singleByte) {
          return singleByte.toString(2);
        });

        return bits;
      }
    }

    function decodeBERCompressedIntegers(callback) {
      return function(data) {
        var bytes = callback(data), result = [], buffer = '';

        for (var i = 0; i < bytes.length; i++) {
          var singleByte = bytes[i],
              bits = singleByte.toString(2);

          bits = Array(8 - bits.length + 1).join('0').concat(bits);

          var firstBit = bits[0];
          bits = bits.slice(1, bits.length);

          buffer = buffer.concat(bits);

          if (firstBit === '0') {
            var decoded = parseInt(buffer, 2);
            result.push(decoded);
            buffer = ''
          }
        }

        return result;
      }
    }

    function base64Decode(callback) {
      return function(data) {
        return #{Base64.decode64(`callback(data)`)};
      }
    }

    // quoted-printable decode
    function qpdecode(callback) {
      return function(data) {
        var string = callback(data);

        return string
          .replace(/[\t\x20]$/gm, '')
          .replace(/=(?:\r\n?|\n|$)/g, '')
          .replace(/=([a-fA-F0-9]{2})/g, function($0, $1) {
            var codePoint = parseInt($1, 16);
            return String.fromCharCode(codePoint);
          });
      }
    }

    function identityFunction(value) { return value; }

    var handlers = {
      // Integer
      'C': identityFunction,
      'S': mapChunksToWords(chunkBy(2, identityFunction)),
      'L': mapChunksToWords(chunkBy(4, identityFunction)),
      'Q': mapChunksToWords(chunkBy(8, identityFunction)),
      'J': null,

      'S>': mapChunksToWords(invertChunks(chunkBy(2, identityFunction))),
      'L>': mapChunksToWords(invertChunks(chunkBy(4, identityFunction))),
      'Q>': mapChunksToWords(invertChunks(chunkBy(8, identityFunction))),

      'c': toNByteSigned(1, identityFunction),
      's': toNByteSigned(2, mapChunksToWords(chunkBy(2, identityFunction))),
      'l': toNByteSigned(4, mapChunksToWords(chunkBy(4, identityFunction))),
      'q': toNByteSigned(8, mapChunksToWords(chunkBy(8, identityFunction))),
      'j': null,

      's>': toNByteSigned(2, mapChunksToWords(invertChunks(chunkBy(2, identityFunction)))),
      'l>': toNByteSigned(4, mapChunksToWords(invertChunks(chunkBy(4, identityFunction)))),
      'q>': toNByteSigned(8, mapChunksToWords(invertChunks(chunkBy(8, identityFunction)))),

      'n': null, // aliased later
      'N': null, // aliased later
      'v': null, // aliased later
      'V': null, // aliased later

      'U': identityFunction,
      'w': decodeBERCompressedIntegers(identityFunction),

      // Float
      'D': null,
      'd': null,
      'F': null,
      'f': null,
      'E': null,
      'e': null,
      'G': null,
      'g': null,

      // String
      'A': wrapIntoArray(joinChars(bytesToAsciiChars(filterTrailingZerosAndSpaces(identityFunction)))),
      'a': wrapIntoArray(joinChars(bytesToAsciiChars(identityFunction))),
      'Z': joinChars(bytesToAsciiChars(identityFunction)),
      'B': joinChars(identityFunction),
      'b': joinChars(identityFunction),
      'H': joinChars(identityFunction),
      'h': joinChars(identityFunction),
      'u': joinChars(bytesToAsciiChars(uudecode(identityFunction))),
      'M': qpdecode(joinChars(bytesToAsciiChars(identityFunction))),
      'm': base64Decode(joinChars(bytesToAsciiChars(identityFunction))),

      'P': null,
      'p': null
    };

    function readBytes(n) {
      return function(bytes) {
        var chunk = bytes.slice(0, n);
        bytes = bytes.slice(n, bytes.length);
        return { chunk: chunk, rest: bytes };
      }
    }

    function readUnicodeCharChunk(bytes) {
      var currentByteIndex = 0;
      var bytesLength = bytes.length;
      function readByte() {
        var result = bytes[currentByteIndex++];
        bytesLength = bytes.length - currentByteIndex;
        return result;
      }

      var c = readByte(), extraLength;

      if (c >> 7 == 0) {
        // 0xxx xxxx
        return { chunk: [c], rest: bytes.slice(currentByteIndex) };
      }

      if (c >> 6 == 0x02) {
        #{::Kernel.raise ::ArgumentError, 'malformed UTF-8 character'}
      }

      if (c >> 5 == 0x06) {
        // 110x xxxx (two bytes)
        extraLength = 1;
      } else if (c >> 4 == 0x0e) {
        // 1110 xxxx (three bytes)
        extraLength = 2;
      } else if (c >> 3 == 0x1e) {
        // 1111 0xxx (four bytes)
        extraLength = 3;
      } else if (c >> 2 == 0x3e) {
        // 1111 10xx (five bytes)
        extraLength = 4;
      } else if (c >> 1 == 0x7e) {
        // 1111 110x (six bytes)
        extraLength = 5;
      } else {
        #{::Kernel.raise 'malformed UTF-8 character'}
      }

      if (extraLength > bytesLength) {
        #{
          expected = `extraLength + 1`
          given = `bytesLength + 1`
          ::Kernel.raise ::ArgumentError, "malformed UTF-8 character (expected #{expected} bytes, given #{given} bytes)"
        }
      }

      // Remove the UTF-8 prefix from the char
      var mask = (1 << (8 - extraLength - 1)) - 1,
          result = c & mask;

      for (var i = 0; i < extraLength; i++) {
        c = readByte();

        if (c >> 6 != 0x02) {
          #{::Kernel.raise 'Invalid multibyte sequence'}
        }

        result = (result << 6) | (c & 0x3f);
      }

      if (result <= 0xffff) {
        return { chunk: [result], rest: bytes.slice(currentByteIndex) };
      } else {
        result -= 0x10000;
        var high = ((result >> 10) & 0x3ff) + 0xd800,
            low = (result & 0x3ff) + 0xdc00;
        return { chunk: [high, low], rest: bytes.slice(currentByteIndex) };
      }
    }

    function readUuencodingChunk(buffer) {
      var length = buffer.indexOf(32); // 32 = space

      if (length === -1) {
        return { chunk: buffer, rest: [] };
      } else {
        return { chunk: buffer.slice(0, length), rest: buffer.slice(length, buffer.length) };
      }
    }

    function readNBitsLSBFirst(buffer, count) {
      var result = '';

      while (count > 0 && buffer.length > 0) {
        var singleByte = buffer[0],
            bitsToTake = Math.min(count, 8),
            bytesToTake = Math.ceil(bitsToTake / 8);

        buffer = buffer.slice(1, buffer.length);

        if (singleByte != null) {
          var bits = singleByte.toString(2);
          bits = Array(8 - bits.length + 1).join('0').concat(bits).split('').reverse().join('');

          for (var j = 0; j < bitsToTake; j++) {
            result += bits[j] || '0';
            count--;
          }
        }
      }

      return { chunk: [result], rest: buffer };
    }

    function readNBitsMSBFirst(buffer, count) {
      var result = '';

      while (count > 0 && buffer.length > 0) {
        var singleByte = buffer[0],
            bitsToTake = Math.min(count, 8),
            bytesToTake = Math.ceil(bitsToTake / 8);

        buffer = buffer.slice(1, buffer.length);

        if (singleByte != null) {
          var bits = singleByte.toString(2);
          bits = Array(8 - bits.length + 1).join('0').concat(bits);

          for (var j = 0; j < bitsToTake; j++) {
            result += bits[j] || '0';
            count--;
          }
        }
      }

      return { chunk: [result], rest: buffer };
    }

    function readWhileFirstBitIsOne(buffer) {
      var result = [];

      for (var i = 0; i < buffer.length; i++) {
        var singleByte = buffer[i];

        result.push(singleByte);

        if ((singleByte & 128) === 0) {
          break;
        }
      }

      return { chunk: result, rest: buffer.slice(result.length, buffer.length) };
    }

    function readTillNullCharacter(buffer, count) {
      var result = [];

      for (var i = 0; i < count && i < buffer.length; i++) {
        var singleByte = buffer[i];

        if (singleByte === 0) {
          break;
        } else {
          result.push(singleByte);
        }
      }

      if (count === Infinity) {
        count = result.length;
      }

      if (buffer[count] === 0) {
        count++;
      }

      buffer = buffer.slice(count, buffer.length);

      return { chunk: result, rest: buffer };
    }

    function readHexCharsHighNibbleFirst(buffer, count) {
      var result = [];

      while (count > 0 && buffer.length > 0) {
        var singleByte = buffer[0],
            hex = singleByte.toString(16);

        buffer = buffer.slice(1, buffer.length);
        hex = Array(2 - hex.length + 1).join('0').concat(hex);

        if (count === 1) {
          result.push(hex[0]);
          count--;
        } else {
          result.push(hex[0], hex[1]);
          count -= 2;
        }
      }

      return { chunk: result, rest: buffer };
    }

    function readHexCharsLowNibbleFirst(buffer, count) {
      var result = [];

      while (count > 0 && buffer.length > 0) {
        var singleByte = buffer[0],
            hex = singleByte.toString(16);

        buffer = buffer.slice(1, buffer.length);
        hex = Array(2 - hex.length + 1).join('0').concat(hex);

        if (count === 1) {
          result.push(hex[1]);
          count--;
        } else {
          result.push(hex[1], hex[0]);
          count -= 2;
        }
      }

      return { chunk: result, rest: buffer };
    }

    function readNTimesAndMerge(callback) {
      return function(buffer, count) {
        var chunk = [], chunkData;

        if (count === Infinity) {
          while (buffer.length > 0) {
            chunkData = callback(buffer);
            buffer = chunkData.rest;
            chunk = chunk.concat(chunkData.chunk);
          }
        } else {
          for (var i = 0; i < count; i++) {
            chunkData = callback(buffer);
            buffer = chunkData.rest;
            chunk = chunk.concat(chunkData.chunk);
          }
        }

        return { chunk: chunk, rest: buffer };
      }
    }

    function readAll(buffer, count) {
      return { chunk: buffer, rest: [] };
    }

    var readChunk = {
      // Integer
      'C': readNTimesAndMerge(readBytes(1)),
      'S': readNTimesAndMerge(readBytes(2)),
      'L': readNTimesAndMerge(readBytes(4)),
      'Q': readNTimesAndMerge(readBytes(8)),
      'J': null,

      'S>': readNTimesAndMerge(readBytes(2)),
      'L>': readNTimesAndMerge(readBytes(4)),
      'Q>': readNTimesAndMerge(readBytes(8)),

      'c': readNTimesAndMerge(readBytes(1)),
      's': readNTimesAndMerge(readBytes(2)),
      'l': readNTimesAndMerge(readBytes(4)),
      'q': readNTimesAndMerge(readBytes(8)),
      'j': null,

      's>': readNTimesAndMerge(readBytes(2)),
      'l>': readNTimesAndMerge(readBytes(4)),
      'q>': readNTimesAndMerge(readBytes(8)),

      'n': null, // aliased later
      'N': null, // aliased later
      'v': null, // aliased later
      'V': null, // aliased later

      'U': readNTimesAndMerge(readUnicodeCharChunk),
      'w': readNTimesAndMerge(readWhileFirstBitIsOne),

      // Float
      'D': null,
      'd': null,
      'F': null,
      'f': null,
      'E': null,
      'e': null,
      'G': null,
      'g': null,

      // String
      'A': readNTimesAndMerge(readBytes(1)),
      'a': readNTimesAndMerge(readBytes(1)),
      'Z': readTillNullCharacter,
      'B': readNBitsMSBFirst,
      'b': readNBitsLSBFirst,
      'H': readHexCharsHighNibbleFirst,
      'h': readHexCharsLowNibbleFirst,
      'u': readNTimesAndMerge(readUuencodingChunk),
      'M': readAll,
      'm': readAll,

      'P': null,
      'p': null
    }

    var autocompletion = {
      // Integer
      'C': true,
      'S': true,
      'L': true,
      'Q': true,
      'J': null,

      'S>': true,
      'L>': true,
      'Q>': true,

      'c': true,
      's': true,
      'l': true,
      'q': true,
      'j': null,

      's>': true,
      'l>': true,
      'q>': true,

      'n': null, // aliased later
      'N': null, // aliased later
      'v': null, // aliased later
      'V': null, // aliased later

      'U': false,
      'w': false,

      // Float
      'D': null,
      'd': null,
      'F': null,
      'f': null,
      'E': null,
      'e': null,
      'G': null,
      'g': null,

      // String
      'A': false,
      'a': false,
      'Z': false,
      'B': false,
      'b': false,
      'H': false,
      'h': false,
      'u': false,
      'M': false,
      'm': false,

      'P': null,
      'p': null
    }

    var optimized = {
      'C*': handlers['C'],
      'c*': handlers['c'],
      'A*': handlers['A'],
      'a*': handlers['a'],
      'M*': wrapIntoArray(handlers['M']),
      'm*': wrapIntoArray(handlers['m']),
      'S*': handlers['S'],
      's*': handlers['s'],
      'L*': handlers['L'],
      'l*': handlers['l'],
      'Q*': handlers['Q'],
      'q*': handlers['q'],
      'S>*': handlers['S>'],
      's>*': handlers['s>'],
      'L>*': handlers['L>'],
      'l>*': handlers['l>'],
      'Q>*': handlers['Q>'],
      'q>*': handlers['q>']
    }

    function alias(existingDirective, newDirective) {
      readChunk[newDirective] = readChunk[existingDirective];
      handlers[newDirective] = handlers[existingDirective];
      autocompletion[newDirective] = autocompletion[existingDirective];
    }

    alias('S>', 'n');
    alias('L>', 'N');

    alias('S', 'v');
    alias('L', 'V');
  }

  def unpack(format, offset: 0)
    ::Kernel.raise ::ArgumentError, "offset can't be negative" if offset < 0
    format = ::Opal.coerce_to!(format, ::String, :to_str).gsub(/\s/, '').delete("\000")

    %x{
      var output = [];

      // A very optimized handler for U*.
      if (format == "U*" &&
          self.internal_encoding.name === "UTF-8" &&
          typeof self.codePointAt === "function") {

        var cp, j = 0;

        output = new Array(self.length);
        for (var i = offset; i < self.length; i++) {
          cp = output[j++] = self.codePointAt(i);
          if (cp > 0xffff) i++;
        }
        return output.slice(0, j);
      }

      var buffer = self.$bytes();

      #{::Kernel.raise ::ArgumentError, 'offset outside of string' if offset > `buffer`.length}

      buffer = buffer.slice(offset);


      // optimization
      var optimizedHandler = optimized[format];
      if (optimizedHandler) {
        return optimizedHandler(buffer);
      }

      function autocomplete(array, size) {
        while (array.length < size) {
          array.push(nil);
        }

        return array;
      }

      function processChunk(directive, count) {
        var chunk,
            chunkReader = readChunk[directive];

        if (chunkReader == null) {
          #{::Kernel.raise "Unsupported unpack directive #{`directive`.inspect} (no chunk reader defined)"}
        }

        var chunkData = chunkReader(buffer, count);
        chunk = chunkData.chunk;
        buffer = chunkData.rest;

        var handler = handlers[directive];

        if (handler == null) {
          #{::Kernel.raise "Unsupported unpack directive #{`directive`.inspect} (no handler defined)"}
        }

        return handler(chunk);
      }

      eachDirectiveAndCount(format, function(directive, count) {
        var part = processChunk(directive, count);

        if (count !== Infinity) {
          var shouldAutocomplete = autocompletion[directive];

          if (shouldAutocomplete == null) {
            #{::Kernel.raise "Unsupported unpack directive #{`directive`.inspect} (no autocompletion rule defined)"}
          }

          if (shouldAutocomplete) {
            autocomplete(part, count);
          }
        }

        output = output.concat(part);
      });

      return output;
    }
  end

  def unpack1(format, offset: 0)
    format = ::Opal.coerce_to!(format, ::String, :to_str).gsub(/\s/, '').delete("\000")

    unpack(format[0], offset: offset)[0]
  end
end