fossasia/loklak_webclient

View on GitHub
iframely/lib/plugins/system/meta/HTMLMetaHandler.js

Summary

Maintainability
F
5 days
Test Coverage
var decodeHTML5 = require('entities').decodeHTML5;
var _ = require('underscore');
var url = require('url');

var utils = require('../../../utils');

var getCharset = utils.getCharset;
var encodeText = utils.encodeText;
var lowerCaseKeys = utils.lowerCaseKeys;

var LINK_REL_SKIP_VALUES = [
    'help',
    'license',
    'next',
    'prefetch',
    'prev',
    'search',
    'stylesheet'
];

var LINK_REL_ARRAY_VALUES = [
    'alternate'
];

function HTMLMetaHandler(uri, contentType, callback) {
    this._uri = uri;
    this._charset = getCharset(contentType);
    this._callback = callback;

    this._result = {};

    this._customProperties = {};
    this._currentCustomTag = null;

    this._end = false;
}

HTMLMetaHandler.prototype.onerror = function(err) {

    if (this._end) return;
    this._end = true;

    this._callback(err);
};

HTMLMetaHandler.prototype.onopentag = function(name, attributes) {

    if (this._end) return;

    name = name.toUpperCase();

    if (name === 'META') {

        var metaTag = attributes;

        if (('property' in metaTag) || ('name' in metaTag)) {

            var propertyParts = ('property' in metaTag) ? metaTag.property.split(':') : metaTag.name.split(':');

            var value = metaTag.content || metaTag.value || metaTag.src;

            if (typeof(value) === 'string') {
                // Remove new lines.
                value = value.replace(/(\r\n|\n|\r)/gm, ' ');
                // Trim.
                value = value.replace(/^\s+|\s+$/g, '');
            }

            if (/^\d+$/.test(value)) { // convert to integer
                value = parseInt(value, 10);
            } else if (/^\d+\.\d+$/.test(value)) {
                value = parseFloat(value);
            }

            if (typeof value !== 'undefined') {
                merge(this._result, propertyParts, value);
            }

        } else if (!this._charset && metaTag['http-equiv'] && metaTag['http-equiv'].toLowerCase() == 'content-type') {

            // Set encoding with <meta content='text/html; charset=UTF-8' http-equiv='Content-Type'/>
            this._charset = getCharset(metaTag.content);

        } else if (!this._charset && metaTag['charset']) {

            // Set encoding with <meta charset="UTF-8" />.
            this._charset = getCharset(metaTag.charset, true);

        } else if (metaTag['http-equiv'] && metaTag['http-equiv'].toLowerCase() == 'x-frame-options') {

            this._customProperties["x-frame-options"] = metaTag.content;

        } else if (!this._refresh && metaTag['http-equiv'] && metaTag['http-equiv'].toLowerCase() == 'refresh') {

            // ex.: http://tv.sme.sk/v/29770/kalinak-o-ficovi-cudujem-sa-ze-vedie-vladu.html
            var refresh = metaTag.content && metaTag.content.match(/url=(?:'|")?([^'"]+)(?:'|")?/i);

            if (refresh) { 
                this._refresh = refresh[1];
            }

        } else if (metaTag.name == "description") {

            this._customProperties["html-description"] = metaTag.content;
        }

    } else if (name == 'TITLE') {

        this._currentCustomTag = {
            name: "html-title",
            value: ""
        };

    } else if (name == 'LINK') {

        var metaTag = attributes;
        var name = metaTag.name;
        var rel = metaTag.rel || name;
        var title = metaTag.title;
        var sizes = metaTag.sizes;
        var type = metaTag.type;
        var media = metaTag.media;
        var href;

        if (typeof(metaTag.href) == 'string') {
            href = metaTag.href.replace(/(\r\n|\n|\r)/gm,"");
            href = url.resolve(this._uri, href);
        }

        if (LINK_REL_SKIP_VALUES.indexOf(rel) == -1) {
            var existingProperty = this._customProperties[rel];

            if (existingProperty && !(existingProperty instanceof Array)) {
                existingProperty = this._customProperties[rel] = [existingProperty];
            }

            if (!existingProperty && LINK_REL_ARRAY_VALUES.indexOf(rel) > -1) {
                existingProperty = this._customProperties[rel] = [];
            }

            var property;
            if (type || sizes || media || title) {
                property = {
                    href: href
                };
                if (type) {
                    property.type = type;
                }
                if (sizes) {
                    property.sizes = sizes;
                }
                if (media) {
                    property.media = media;
                }
                if (title) {
                    property.title = title;
                }
            } else {
                property = href;
            }

            if (existingProperty) {
                existingProperty.push(property);
            } else {
                this._customProperties[rel] = property;
            }
        }
    }
};

HTMLMetaHandler.prototype.ontext = function(text) {
    if (this._currentCustomTag) {
        this._currentCustomTag.value += text;
    }
};

HTMLMetaHandler.prototype.onclosetag = function(name) {

    if (this._end) return;

    if (this._currentCustomTag) {
        this._customProperties[this._currentCustomTag.name] = this._currentCustomTag.value;
        this._currentCustomTag = null;
    }

    if (name.toUpperCase() === 'HEAD') {
        this._finalMerge();
    }
};

HTMLMetaHandler.prototype.onend = function() {
    if (this._end) return;
    this._finalMerge();
};

HTMLMetaHandler.prototype._finalMerge = function() {

    this._end = true;

    var that = this;

    for(var name in this._customProperties) {
        if (!(name in this._result)) {
            this._result[name] = this._customProperties[name];
        }
    }

    function encodeAllStrings(obj) {
        for (var k in obj) {
            if (typeof obj[k] == "object") {
                encodeAllStrings(obj[k]);
            } else {
                if (!obj.hasOwnProperty(k)) {
                    continue;       // skip this property
                }
                if (typeof(obj[k]) == 'string'){
                    // decode HTML entities after decoding the charset
                    // otherwise we would end up with a string with mixed encoding
                    obj[k] = decodeHTML5(encodeText(that._charset, obj[k]));
                }
            }
        }
    }

    //This is the "to-the-forehead" solution for those glitchy situations.
    function processArrays(obj){
        for (var k in obj) {

            if (!obj.hasOwnProperty(k)) {
                continue;

            } else if (obj[k] instanceof Array){

                // TODO: what is that???

                if ((obj[k].length === 2)
                    && (typeof(obj[k][0]) === 'object')
                    && ((typeof(obj[k][1]) !== 'undefined')
                        && (typeof(obj[k][1]) !== 'object'))){

                    obj[k][0][getDefaultKey(k)] = obj[k][1];

                    obj[k] = obj[k][0];
                }

            } else if (typeof obj[k] === "object"){
                processArrays(obj[k]);
            }
        }
    }

    /*
     ProcessSingleObjects works like:

     "og": {
         "image": [
             {
                "url": "https://thumbs.gfycat.com/PoliticalCalmGiantschnauzer-thumb100.jpg"
             },
             {
                "url": "https://thumbs.gfycat.com/PoliticalCalmGiantschnauzer-poster.jpg"
             }
         ]
     }

     to

     "og": {
         "image": [
            "https://thumbs.gfycat.com/PoliticalCalmGiantschnauzer-thumb100.jpg",
            "https://thumbs.gfycat.com/PoliticalCalmGiantschnauzer-poster.jpg"
         ]
     }
    */
    function getSingleValue(parentName, obj) {
        var key = getDefaultKey(parentName);
        if (key in obj) {
            if (_.keys(obj).length === 1) {
                return obj[key];
            }
        }
    }
    function processSingleObjects(obj) {

        for (var k in obj) {

            if (!obj.hasOwnProperty(k)) {
                continue;
            }

            var item = obj[k];

            if (item instanceof Array){

                for(var i = 0; i < item.length; i++) {
                    if (typeof item[i] === "object") {
                        var v = getSingleValue(k, item[i]);
                        if (v) {
                            item[i] = v;
                        }
                    }
                }

            } else if (typeof item === "object") {

                var v = getSingleValue(k, item);
                if (v) {

                    obj[k] = v;

                } else {
                    processSingleObjects(item);
                }
            }
        }
    }

    encodeAllStrings(this._result);
    processArrays(this._result);
    processSingleObjects(this._result);
    lowerCaseKeys(this._result);

    this._result['charset'] = this._charset || 'UTF-8';

    if (this._refresh) {this._result['refresh'] = this._refresh;}

    this._callback(null, this._result);
};

// TODO: add 'player': 1. Need some update in plugins.
var defaultMediaKeys = {'audio': 1, 'image': 1, 'video': 1, 'stream': 1};

function getDefaultKey(parent) {
    if (defaultMediaKeys[parent] && parent !== 'stream') {
        return 'url';
    } else {
        return 'value';
    }
}

function merge(parentObj, props, value) {

    /*
    Test urls:
     http://www.travelchannel.com/video/its-a-real-life-video-game
     http://cds.cern.ch/record/1628561
     http://www.dramafever.com/drama/4274/1/Heirs/?ap=1
     http://gfycat.com/PoliticalCalmGiantschnauzer
    */

    function _buildChildren(children, obj) {

        var current = obj;

        children.forEach(function(child) {

            var currentChild = current[child];

            if (currentChild instanceof Array) {

                // Get last child.
                var currentArray = currentChild;
                currentChild = currentChild[currentChild.length - 1];

                if (typeof(currentChild) !== 'object') {

                    var o = {};
                    o[getDefaultKey(child)] = currentChild;
                    currentChild = o;

                    currentArray[currentArray.length - 1] = currentChild;
                }

                current = currentChild;

            } else {

                if (typeof(currentChild) !== 'object') {

                    if (typeof(currentChild) === 'undefined') {

                        current[child] = {};

                    } else {

                        var o = {};
                        o[getDefaultKey(child)] = currentChild;
                        current[child] = o;
                    }
                }

                current = current[child];
            }
        });

        return current;
    }

    if (props.length === 0) {
        return;
    }

    var currentNodeName = props[props.length - 1];

    // Add 'url' to 'og:video'. And other same cases.
    if (props.length > 0 && defaultMediaKeys[currentNodeName]) {
        currentNodeName = getDefaultKey(currentNodeName);
        props = props.concat(currentNodeName);
    }

    // Create new node if property exist in last array object.
    /*

    Example:
        Have:
            [{
                url: 'xxx',
                type: 'yyy'
            }]
        Add:
            og.video.type = 'zzz'
        Result:
            [{
                url: 'xxx',
                type: 'yyy'
            }, {
                type: 'zzz'
            }]

    */
    if (props.length > 1) {
        var parentNodeName = props[props.length - 2];
        if (defaultMediaKeys[parentNodeName]) {
            // This will get last array item.
            var parentNode2 = _buildChildren(props.slice(0, -1), parentObj);
            // Check if property already exists. (e.g. og:video:type).
            /**/
            if (currentNodeName in parentNode2) {
                props = props.slice(0, -1);
                var _value = value;
                value = {};
                value[currentNodeName] = _value;
                currentNodeName = parentNodeName;
            }
        }
    }

    if (typeof(currentNodeName) === 'undefined'){
        return;
    }

    var parentNode = _buildChildren(props.slice(0, -1), parentObj);

    if (!(currentNodeName in parentNode)) {

        // New node.
        parentNode[currentNodeName] = value;

    } else if (_.isArray(parentNode[currentNodeName])) {

        var append = false;

        if (defaultMediaKeys[currentNodeName]) {
            var key = getDefaultKey(currentNodeName);
            var list = parentNode[currentNodeName];
            var currentChild = list[list.length - 1];
            if (typeof(currentChild) === 'object' && !currentChild[key]) {
                // Same as below in "***".
                currentChild[key] = value;
                append = true;
            }
        }

        if (!append) {
            // Push to existing array.
            parentNode[currentNodeName].push(value);
        }

    } else {

        var currentChild = parentNode[currentNodeName];
        var key = getDefaultKey(currentNodeName);

        if (typeof(currentChild) === 'object' && !currentChild[key]) {

            /*

            "***" Case description:
            Adding new url or value, but some child properties defined:

            have:   og.video = {width: 100);

            adding: og.video.url = "http://some.url";

            Result should be:

            og.video = {
                width: 100,
                url: "http://some.url"
            };

            If add again og.video = "http://some2.url"; then result should be:

            og.video = [{
                width: 100,
                url: "http://some.url"
            }, {
                url: "http://some2.url"
            }];

            */

            // Append to created object if value not present yet.
            currentChild[key] = value;

        } else {

            // Create array.
            if (parentNode[currentNodeName] != value){
                parentNode[currentNodeName] = [parentNode[currentNodeName], value];
            }
        }
    }
}

module.exports = HTMLMetaHandler;

module.exports.notPlugin = true;