src/language/HTMLUtils.js from adobe/brackets

src/language/HTMLUtils.js
Summary

Maintainability

3 days
Test Coverage

Issues
/*
 * Copyright (c) 2012 - present Adobe Systems Incorporated. All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS IN THE SOFTWARE.
 *
 */

define(function (require, exports, module) {
    "use strict";

    var CodeMirror = require("thirdparty/CodeMirror/lib/codemirror"),
        TokenUtils = require("utils/TokenUtils");

    // Constants
    var TAG_NAME = "tagName",
        CLOSING_TAG = "closingTag",
        ATTR_NAME = "attr.name",
        ATTR_VALUE = "attr.value";

    // Regular expression for token types with "tag" prefixed
    var tagPrefixedRegExp = /^tag/;

   /**
     * @private
     * Sometimes as attr values are getting typed, if the quotes aren't balanced yet
     * some extra 'non attribute value' text gets included in the token. This attempts
     * to assure the attribute value we grab is always good
     * @param {editor:{CodeMirror}, pos:{ch:{string}, line:{number}}, token:{object}} context
     * @return { val:{string}, offset:{number}}
     */
    function _extractAttrVal(ctx) {
        var attrValue = ctx.token.string,
            startChar = attrValue.charAt(0),
            endChar = attrValue.charAt(attrValue.length - 1),
            offset = TokenUtils.offsetInToken(ctx),
            foundEqualSign = false;

        //If this is a fully quoted value, return the whole
        //thing regardless of position
        if (attrValue.length > 1 &&
                (startChar === "'" || startChar === '"') &&
                endChar === startChar) {

            // Find an equal sign before the end quote. If found,
            // then the user may be entering an attribute value right before
            // another attribute and we're getting a false balanced string.
            // An example of this case is <link rel" href="foo"> where the
            // cursor is right after the first double quote.
            foundEqualSign = (attrValue.match(/\=\s*['"]$/) !== null);

            if (!foundEqualSign) {
                //strip the quotes and return;
                attrValue = attrValue.substring(1, attrValue.length - 1);
                offset = offset - 1 > attrValue.length ? attrValue.length : offset - 1;
                return {val: attrValue, offset: offset, quoteChar: startChar, hasEndQuote: true};
            }
        }

        if (foundEqualSign) {
            var spaceIndex = attrValue.indexOf(" "),
                bracketIndex = attrValue.indexOf(">"),
                upToIndex = (spaceIndex !== -1 && spaceIndex < bracketIndex) ? spaceIndex : bracketIndex;
            attrValue = attrValue.substring(0, (upToIndex > offset) ? upToIndex : offset);
        } else if (offset > 0 && (startChar === "'" || startChar === '"')) {
            //The att value is getting edit in progress. There is possible extra
            //stuff in this token state since the quote isn't closed, so we assume
            //the stuff from the quote to the current pos is definitely in the attribute
            //value.
            attrValue = attrValue.substring(0, offset);
        }

        //If the attrValue start with a quote, trim that now
        startChar = attrValue.charAt(0);
        if (startChar === "'" || startChar === '"') {
            attrValue = attrValue.substring(1);
            offset--;
        } else {
            startChar = "";
            // Make attr value empty and set offset to zero if it has the ">"
            // which is the closing of the tag.
            if (endChar === ">") {
                attrValue = "";
                offset = 0;
            }
        }

        return {val: attrValue, offset: offset, quoteChar: startChar, hasEndQuote: false};
    }

    /**
     * @private
     * Gets the tagname from where ever you are in the currect state
     * @param {editor:{CodeMirror}, pos:{ch:{string}, line:{number}}, token:{object}} context
     * @return {string}
     */
    function _extractTagName(ctx) {
        var mode = ctx.editor.getMode(),
            innerModeData = CodeMirror.innerMode(mode, ctx.token.state);

        if (ctx.token.type === "tag bracket") {
            return innerModeData.state.tagName;
        }

        // If the ctx is inside the tag name of an end tag, innerModeData.state.tagName is
        // undefined. So return token string as the tag name.
        return innerModeData.state.tagName || ctx.token.string;
    }

    /**
     * Compiles a list of used attributes for a given tag
     * @param {CodeMirror} editor An instance of a CodeMirror editor
     * @param {ch:{string}, line:{number}} pos A CodeMirror position
     * @return {Array.<string>} A list of the used attributes inside the current tag
     */
    function getTagAttributes(editor, pos) {
        var attrs       = [],
            backwardCtx = TokenUtils.getInitialContext(editor._codeMirror, pos),
            forwardCtx  = $.extend({}, backwardCtx);

        if (editor.getModeForSelection() === "html") {
            if (backwardCtx.token && !tagPrefixedRegExp.test(backwardCtx.token.type)) {
                while (TokenUtils.movePrevToken(backwardCtx) && !tagPrefixedRegExp.test(backwardCtx.token.type)) {
                    if (backwardCtx.token.type === "error" && backwardCtx.token.string.indexOf("<") === 0) {
                        break;
                    }
                    if (backwardCtx.token.type === "attribute") {
                        attrs.push(backwardCtx.token.string);
                    }
                }

                while (TokenUtils.moveNextToken(forwardCtx) && !tagPrefixedRegExp.test(forwardCtx.token.type)) {
                    if (forwardCtx.token.type === "attribute") {
                        // If the current tag is not closed, codemirror may return the next opening
                        // tag as an attribute. Stop the search loop in that case.
                        if (forwardCtx.token.string.indexOf("<") === 0) {
                            break;
                        }
                        attrs.push(forwardCtx.token.string);
                    } else if (forwardCtx.token.type === "error") {
                        if (forwardCtx.token.string.indexOf("<") === 0 || forwardCtx.token.string.indexOf(">") === 0) {
                            break;
                        }
                        // If we type the first letter of the next attribute, it comes as an error
                        // token. We need to double check for possible invalidated attributes.
                        if (/\S/.test(forwardCtx.token.string) &&
                                forwardCtx.token.string.indexOf("\"") === -1 &&
                                forwardCtx.token.string.indexOf("'") === -1 &&
                                forwardCtx.token.string.indexOf("=") === -1) {
                            attrs.push(forwardCtx.token.string);
                        }
                    }
                }
            }
        }

        return attrs;
    }

    /**
     * Creates a tagInfo object and assures all the values are entered or are empty strings
     * @param {string=} tokenType what is getting edited and should be hinted
     * @param {number=} offset where the cursor is for the part getting hinted
     * @param {string=} tagName The name of the tag
     * @param {string=} attrName The name of the attribute
     * @param {string=} attrValue The value of the attribute
     * @return {{tagName:string,
     *           attr:{name:string, value:string, valueAssigned:boolean, quoteChar:string, hasEndQuote:boolean},
     *           position:{tokenType:string, offset:number}
     *         }}
     *         A tagInfo object with some context about the current tag hint.
     */
    function createTagInfo(tokenType, offset, tagName, attrName, attrValue, valueAssigned, quoteChar, hasEndQuote) {
        return { tagName: tagName || "",
                 attr:
                    { name: attrName || "",
                      value: attrValue || "",
                      valueAssigned: valueAssigned || false,
                      quoteChar: quoteChar || "",
                      hasEndQuote: hasEndQuote || false },
                 position:
                    { tokenType: tokenType || "",
                      offset: offset || 0 } };
    }

    /**
     * @private
     * Gets the taginfo starting from the attribute value and moving backwards
     * @param {editor:{CodeMirror}, pos:{ch:{string}, line:{number}}, token:{object}} context
     * @return {string}
     */
    function _getTagInfoStartingFromAttrValue(ctx) {
        // Assume we in the attr value
        // and validate that by going backwards
        var attrInfo = _extractAttrVal(ctx),
            attrVal = attrInfo.val,
            offset = attrInfo.offset,
            quoteChar = attrInfo.quoteChar,
            hasEndQuote = attrInfo.hasEndQuote,
            strLength = ctx.token.string.length;

        if ((ctx.token.type === "string" || ctx.token.type === "error") &&
                ctx.pos.ch === ctx.token.end && strLength > 1) {
            var firstChar = ctx.token.string[0],
                lastChar = ctx.token.string[strLength - 1];

            // We get here only when the cursor is immediately on the right of the end quote
            // of an attribute value. So we want to return an empty tag info so that the caller
            // can dismiss the code hint popup if it is still open.
            if (firstChar === lastChar && (firstChar === "'" || firstChar === "\"")) {
                return createTagInfo();
            }
        }
        
        //Skip all the 'string' tokens backwards. Required to reach to the first line 
        //of multiline HTML attribute value.
        while (TokenUtils.moveSkippingWhitespace(TokenUtils.movePrevToken, ctx)) {
            if (ctx.token.type !== "string") {
                break;
            }
        }

        //As we have skipped all the string tokens, make a forward navigation to move to the
        //first 'string token so that in next backward navigation we can find '='.
        TokenUtils.moveSkippingWhitespace(TokenUtils.moveNextToken, ctx);

        //Move to the prev token, and check if it's "="
        if (!TokenUtils.moveSkippingWhitespace(TokenUtils.movePrevToken, ctx) || ctx.token.string !== "=") {
            return createTagInfo();
        }

        //Move to the prev token, and check if it's an attribute
        if (!TokenUtils.moveSkippingWhitespace(TokenUtils.movePrevToken, ctx) || ctx.token.type !== "attribute") {
            return createTagInfo();
        }

        var attrName = ctx.token.string;
        var tagName = _extractTagName(ctx);

        //We're good.
        return createTagInfo(ATTR_VALUE, offset, tagName, attrName, attrVal, true, quoteChar, hasEndQuote);
    }

    /**
     * @private
     * Gets the taginfo starting from the attribute name and moving forwards
     * @param {editor:{CodeMirror}, pos:{ch:{string}, line:{number}}, token:{object}} context
     * @param {boolean} isPriorAttr indicates whether we're getting info for a prior attribute
     * @return {string}
     */
    function _getTagInfoStartingFromAttrName(ctx, isPriorAttr) {
        //Verify We're in the attribute name, move forward and try to extract the rest of
        //the info. If the user it typing the attr the rest might not be here
        if (isPriorAttr === false && ctx.token.type !== "attribute") {
            return createTagInfo();
        }

        var tagName = _extractTagName(ctx);
        var attrName = ctx.token.string;
        var offset = TokenUtils.offsetInToken(ctx);

        if (!TokenUtils.moveSkippingWhitespace(TokenUtils.moveNextToken, ctx) || ctx.token.string !== "=") {
            // If we're checking for a prior attribute and the next token we get is a tag or an html comment or
            // an undefined token class, then we've already scanned past our original cursor location.
            // So just return an empty tag info.
            if (isPriorAttr &&
                    (!ctx.token.type ||
                    (ctx.token.type && ctx.token.type !== "attribute" &&
                        ctx.token.type.indexOf("error") === -1 &&
                        ctx.token.string.indexOf("<") !== -1))) {
                return createTagInfo();
            }
            return createTagInfo(ATTR_NAME, offset, tagName, attrName);
        }

        if (!TokenUtils.moveSkippingWhitespace(TokenUtils.moveNextToken, ctx)) {
            return createTagInfo(ATTR_NAME, offset, tagName, attrName);
        }
        //this should be the attrvalue
        var attrInfo = _extractAttrVal(ctx),
            attrVal = attrInfo.val,
            quoteChar = attrInfo.quoteChar,
            hasEndQuote = attrInfo.hasEndQuote;

        return createTagInfo(ATTR_NAME, offset, tagName, attrName, attrVal, true, quoteChar, hasEndQuote);
    }

    /**
     * Figure out if we're in a tag, and if we are return info about it
     * An example token stream for this tag is <span id="open-files-disclosure-arrow"></span> :
     *      className:tag       string:"<span"
     *      className:          string:" "
     *      className:attribute string:"id"
     *      className:          string:"="
     *      className:string    string:""open-files-disclosure-arrow""
     *      className:tag       string:"></span>"
     * @param {Editor} editor An instance of a Brackets editor
     * @param {{ch: number, line: number}} constPos  A CM pos (likely from editor.getCursorPos())
     * @param {isHtmlMode:boolean} let the module know we are in html mode
     * @return {{tagName:string,
     *           attr:{name:string, value:string, valueAssigned:boolean, quoteChar:string, hasEndQuote:boolean},
     *           position:{tokenType:string, offset:number}
     *         }}
     *         A tagInfo object with some context about the current tag hint.
     */
    function getTagInfo(editor, constPos, isHtmlMode) {
        // We're going to be changing pos a lot, but we don't want to mess up
        // the pos the caller passed in so we use extend to make a safe copy of it.
        var pos = $.extend({}, constPos),
            ctx = TokenUtils.getInitialContext(editor._codeMirror, pos),
            offset = TokenUtils.offsetInToken(ctx),
            tagInfo,
            tokenType;

        // Check if this is not known to be in html mode and inside a style block.
        if (!isHtmlMode && editor.getModeForSelection() !== "html") {
            return createTagInfo();
        }

        // Check and see where we are in the tag
        if (ctx.token.string.length > 0 && !/\S/.test(ctx.token.string)) {

            // token at (i.e. before) pos is whitespace, so test token at next pos
            //
            // note: getTokenAt() does range checking for ch. If it detects that ch is past
            // EOL, it uses EOL, same token is returned, and the following condition fails,
            // so we don't need to worry about testPos being valid.
            var testPos = {ch: ctx.pos.ch + 1, line: ctx.pos.line},
                testToken = editor._codeMirror.getTokenAt(testPos, true);

            if (testToken.string.length > 0 && /\S/.test(testToken.string) &&
                    testToken.string.charAt(0) !== ">") {
                // pos has whitespace before it and non-whitespace after it, so use token after
                ctx.token = testToken;

                // Check whether the token type is one of the types prefixed with "tag"
                // (e.g. "tag", "tag error", "tag brackets")
                if (tagPrefixedRegExp.test(ctx.token.type)) {
                    // Check to see if the cursor is just before a "<" but not in any tag.
                    if (ctx.token.string.charAt(0) === "<") {
                        return createTagInfo();
                    }
                } else if (ctx.token.type === "attribute") {
                    // Check to see if the user is going to add a new attr before an existing one
                    return _getTagInfoStartingFromAttrName(ctx, false);
                } else if (ctx.token.string === "=") {
                    // We're between a whitespace and  "=", so return an empty tag info.
                    return createTagInfo();
                }
            } else {
                // We get here if ">" or white spaces after testPos.
                // Check if there is an equal sign after testPos by creating a new ctx
                // with the original pos. We can't use the current ctx since we need to
                // use it to scan backwards if we don't find an equal sign here.
                // Comment out this block to fix issue #1510.
//                if (testToken.string.length > 0 && testToken.string.charAt(0) !== ">") {
//                    tempCtx = TokenUtils.getInitialContext(editor._codeMirror, pos);
//                    if (TokenUtils.moveSkippingWhitespace(TokenUtils.moveNextToken, tempCtx) && tempCtx.token.string === "=") {
//                        // Return an empty tag info since we're between an atribute name and the equal sign.
//                        return createTagInfo();
//                    }
//                }

                // next, see what's before pos
                if (!TokenUtils.movePrevToken(ctx)) {
                    return createTagInfo();
                }

                if (ctx.token.type === "comment") {
                    return createTagInfo();
                } else if (!tagPrefixedRegExp.test(ctx.token.type) && ctx.token.string !== "=") {
                    // If it wasn't the tag name, assume it was an attr value
                    // Also we don't handle the "=" here.
                    tagInfo = _getTagInfoStartingFromAttrValue(ctx);

                    // Check to see if this is the closing of a tag (either the start or end)
                    // or a comment tag.
                    if (ctx.token.type === "comment" ||
                            (tagPrefixedRegExp.test(ctx.token.type) &&
                            (ctx.token.string === ">" || ctx.token.string === "/>" ||
                                ctx.token.string === "</"))) {
                        return createTagInfo();
                    }

                    // If it wasn't an attr value, assume it was an empty attr (ie. attr with no value)
                    if (!tagInfo.tagName) {
                        tagInfo = _getTagInfoStartingFromAttrName(ctx, true);
                    }

                    // We don't want to give context for the previous attr
                    // and we want it to look like the user is going to add a new attr
                    if (tagInfo.tagName) {
                        return createTagInfo(ATTR_NAME, 0, tagInfo.tagName);
                    }
                    return createTagInfo();
                }

                // We know the tag was here, so the user is adding an attr name
                tokenType = ATTR_NAME;
                offset = 0;
            }
        }

        if (tagPrefixedRegExp.test(ctx.token.type)) {
            if (ctx.token.type !== "tag bracket") {
                // Check if the user just typed a white space after "<" that made an existing tag invalid.
                if (TokenUtils.movePrevToken(ctx) && !/\S/.test(ctx.token.string)) {
                    return createTagInfo();
                }

                // Check to see if this is a closing tag
                if (ctx.token.type === "tag bracket" && ctx.token.string === "</") {
                    tokenType = CLOSING_TAG;
                }

                // Restore the original ctx by moving back to next context since we call
                // movePrevToken above to detect "<" or "</".
                TokenUtils.moveNextToken(ctx);
            }

            // Check to see if this is the closing of a start tag or a self closing tag
            if (ctx.token.string === ">" || ctx.token.string === "/>") {
                return createTagInfo();
            }

            // Make sure the cursor is not after an equal sign or a quote before we report the context as a tag.
            if (ctx.token.string !== "=" && ctx.token.string.match(/^["']/) === null) {
                if (!tokenType) {
                    tokenType = TAG_NAME;
                    if (ctx.token.type === "tag bracket") {
                        // Check to see if this is a closing tag
                        if (ctx.token.string === "</") {
                            tokenType = CLOSING_TAG;
                            offset -= 2;
                        } else {
                            offset = 0;
                        }
                        // If the cursor is right after the "<" or "</", then
                        // move context to next one so that _extractTagName
                        // call below can get the tag name if there is one.
                        if (offset === 0) {
                            TokenUtils.moveNextToken(ctx);
                        }
                    }
                }

                // We're actually in the tag, just return that as we have no relevant
                // info about what attr is selected
                return createTagInfo(tokenType, offset, _extractTagName(ctx));
            }
        }

        if (ctx.token.string === "=") {
            // We could be between the attr and the value
            // Step back and check
            if (!TokenUtils.moveSkippingWhitespace(TokenUtils.movePrevToken, ctx) || ctx.token.type !== "attribute") {
                return createTagInfo();
            }

            // The "=" is added, time to hint for values
            tokenType = ATTR_VALUE;
            offset = 0;
        }

        if (ctx.token.type === "attribute") {
            tagInfo = _getTagInfoStartingFromAttrName(ctx, false);

            // If we're in attr value, then we may need to calculate the correct offset
            // from the beginning of the attribute value. If the cursor position is to
            // the left of attr value, then the offset is negative.
            // e.g. if the cursor is just to the right of the "=" in <a rel= "rtl", then
            // the offset will be -2.
            if (tagInfo.attr.quoteChar) {
                offset = constPos.ch - ctx.pos.ch;
            } else if (tokenType === ATTR_VALUE && (constPos.ch + 1) < ctx.pos.ch) {
                // The cursor may be right before an unquoted attribute or another attribute name.
                // Since we can't distinguish between them, we will empty the value so that the
                // caller can just insert a new attribute value.
                tagInfo.attr.value = "";
            }
        } else {
            // if we're not at a tag, "=", or attribute name, assume we're in the value
            tagInfo = _getTagInfoStartingFromAttrValue(ctx);
        }

        if (tokenType && tagInfo.tagName) {
            tagInfo.position.tokenType = tokenType;
            tagInfo.position.offset = offset;
        }

        return tagInfo;
    }



    /**
     * Returns an Array of info about all blocks whose token mode name matches that passed in,
     * in the given Editor's HTML document (assumes the Editor contains HTML text).
     * @param {!Editor} editor - the editor containing the HTML text
     * @param {string} modeName - the mode name of the tokens to look for
     * @return {Array.<{start:{line:number, ch:number}, end:{line:number, ch:number}, text:string}>}
     */
    function findBlocks(editor, modeName) {
        // Start scanning from beginning of file
        var ctx = TokenUtils.getInitialContext(editor._codeMirror, {line: 0, ch: 0}),
            blocks = [],
            currentBlock = null,
            inBlock = false,
            outerMode = editor._codeMirror.getMode(),
            tokenModeName,
            previousMode;

        while (TokenUtils.moveNextToken(ctx, false)) {
            tokenModeName = CodeMirror.innerMode(outerMode, ctx.token.state).mode.name;
            if (inBlock) {
                if (!currentBlock.end) {
                    // Handle empty blocks
                    currentBlock.end = currentBlock.start;
                }
                // Check for end of this block
                if (tokenModeName === previousMode) {
                    // currentBlock.end is already set to pos of the last token by now
                    currentBlock.text = editor.document.getRange(currentBlock.start, currentBlock.end);
                    inBlock = false;
                } else {
                    currentBlock.end = { line: ctx.pos.line, ch: ctx.pos.ch };
                }
            } else {
                // Check for start of a block
                if (tokenModeName === modeName) {
                    currentBlock = {
                        start: { line: ctx.pos.line, ch: ctx.pos.ch }
                    };
                    blocks.push(currentBlock);
                    inBlock = true;
                } else {
                    previousMode = tokenModeName;
                }
                // else, random token: ignore
            }
        }

        return blocks;
    }

    /**
     * Returns an Array of info about all <style> blocks in the given Editor's HTML document (assumes
     * the Editor contains HTML text).
     * @param {!Editor} editor
     * @return {Array.<{start:{line:number, ch:number}, end:{line:number, ch:number}, text:string}>}
     */
    function findStyleBlocks(editor) {
        return findBlocks(editor, "css");
    }


    // Define public API
    exports.TAG_NAME         = TAG_NAME;
    exports.CLOSING_TAG      = CLOSING_TAG;
    exports.ATTR_NAME        = ATTR_NAME;
    exports.ATTR_VALUE       = ATTR_VALUE;

    exports.getTagInfo       = getTagInfo;
    exports.getTagAttributes = getTagAttributes;
    //The createTagInfo is really only for the unit tests so they can make the same structure to
    //compare results with
    exports.createTagInfo   = createTagInfo;
    exports.findStyleBlocks = findStyleBlocks;
    exports.findBlocks      = findBlocks;
});