uccser/cs-field-guide

View on GitHub
csfieldguide/static/interactives/unicode-length/js/unicode-length.js

Summary

Maintainability
B
5 hrs
Test Coverage
$(document).ready(function () {
  $("#interactive-unicode-length-button").on('click', function() {
    var text = $("#interactive-unicode-length-text").val();
    numUTF8BitsUsed = utf8BitsUsed(text);
    numUTF16BitsUsed = utf16BitsUsed(text);
    $("#interactive-unicode-length-utf8").html(getUnicodeLengthText(numUTF8BitsUsed));
    $("#interactive-unicode-length-utf16").html(getUnicodeLengthText(numUTF16BitsUsed));
    $("#interactive-unicode-length-utf32").html(getUnicodeLengthText(text.length * 32));
  });
});

function getUnicodeLengthText(numBitsUsed) {
    format = gettext("%(num_bits_used)s bits");
    unicode_length_text = interpolate(format, {"num_bits_used": numBitsUsed}, true);
    return unicode_length_text;
}

function utf16BitsUsed(str) {
    var total_bits = 0;
    for (var i = 0; i < str.length; i++) {
        char = str[i];
        var code = char.charCodeAt(0);
        if (code >= 0x00 && code <= 0xD7FF) {
            total_bits += 16;
        } else if (code >= 0xD800 && code <= 0xDBFF) {
            // High surrogate
            total_bits += 16;
            // The next code unit must be the low surrogate
            i++;
            code = str.charCodeAt(i);
            if (code >= 0xDC00 && code <= 0xDFFF) {
                // Low surrogate
                total_bits += 16;
            }
        }
    }
    return total_bits;
}

function utf8BitsUsed(str) {
    var total_bits = 0;
    for (var i = 0; i < str.length; i++) {
        var code = str.charCodeAt(i);
        if (code <= 0x7F) {
          total_bits += 8;
        } else if (code <= 0x7FF) {
          total_bits += 16;
        } else if (code >= 0xD800 && code <= 0xDBFF) {
            // High surrogate
            total_bits += 16;
            // The next code point must be the low surrogate
            i++;
            code = str.charCodeAt(i);
            if (code >= 0xDC00 && code <= 0xDFFF) {
                // Low surrogate
                total_bits += 16;
            }
        } else if (code < 0xFFFF) {
          total_bits += 24;
        } else {
          total_bits += 32;
        }
    }
    return total_bits;
}