angular/angular.js

View on GitHub
docs/config/processors/keywords.js

Summary

Maintainability
B
5 hrs
Test Coverage
'use strict';

var _ = require('lodash');
var fs = require('fs');
var path = require('canonical-path');

/**
 * @dgProcessor generateKeywordsProcessor
 * @description
 * This processor extracts all the keywords from each document and creates
 * a new document that will be rendered as a JavaScript file containing all
 * this data.
 */
module.exports = function generateKeywordsProcessor(log, readFilesProcessor) {
  return {
    ignoreWordsFile: undefined,
    areasToSearch: ['api', 'guide', 'misc', 'error', 'tutorial'],
    propertiesToIgnore: [],
    docTypesToIgnore: [],
    $validate: {
      ignoreWordsFile: { },
      areasToSearch: { presence: true },
      docTypesToIgnore: { },
      propertiesToIgnore: {  }
    },
    $runAfter: ['memberDocsProcessor'],
    $runBefore: ['rendering-docs'],
    $process: function(docs) {

      // Keywords to ignore
      var wordsToIgnore = [];
      var propertiesToIgnore;
      var docTypesToIgnore;
      var areasToSearch;

      // Keywords start with "ng:" or one of $, _ or a letter
      var KEYWORD_REGEX = /^((ng:|[$_a-z])[\w\-_]+)/;

      // Load up the keywords to ignore, if specified in the config
      if (this.ignoreWordsFile) {

        var ignoreWordsPath = path.resolve(readFilesProcessor.basePath, this.ignoreWordsFile);
        wordsToIgnore = fs.readFileSync(ignoreWordsPath, 'utf8').toString().split(/[,\s\n\r]+/gm);

        log.debug('Loaded ignore words from "' + ignoreWordsPath + '"');
        log.silly(wordsToIgnore);

      }

      areasToSearch = _.keyBy(this.areasToSearch);
      propertiesToIgnore = _.keyBy(this.propertiesToIgnore);
      log.debug('Properties to ignore', propertiesToIgnore);
      docTypesToIgnore = _.keyBy(this.docTypesToIgnore);
      log.debug('Doc types to ignore', docTypesToIgnore);

      var ignoreWordsMap = _.keyBy(wordsToIgnore);

      // If the title contains a name starting with ng, e.g. "ngController", then add the module name
      // without the ng to the title text, e.g. "controller".
      function extractTitleWords(title) {
        var match = /ng([A-Z]\w*)/.exec(title);
        if (match) {
          title = title + ' ' + match[1].toLowerCase();
        }
        return title;
      }

    function extractWords(text, words, keywordMap) {

      var tokens = text.toLowerCase().split(/[.\s,`'"#]+/mg);
      _.forEach(tokens, function(token) {
        var match = token.match(KEYWORD_REGEX);
        if (match) {
          var key = match[1];
          if (!keywordMap[key]) {
            keywordMap[key] = true;
            words.push(key);
          }
          }
        });
      }


      // We are only interested in docs that live in the right area
      docs = _.filter(docs, function(doc) { return areasToSearch[doc.area]; });
      docs = _.filter(docs, function(doc) { return !docTypesToIgnore[doc.docType]; });

      _.forEach(docs, function(doc) {


        var words = [];
        var keywordMap = _.clone(ignoreWordsMap);
        var members = [];
        var membersMap = {};

        // Search each top level property of the document for search terms
        _.forEach(doc, function(value, key) {

          if (_.isString(value) && !propertiesToIgnore[key]) {
            extractWords(value, words, keywordMap);
          }

          if (key === 'methods' || key === 'properties' || key === 'events') {
            _.forEach(value, function(member) {
              extractWords(member.name, members, membersMap);
            });
          }
        });


        doc.searchTerms = {
          titleWords: extractTitleWords(doc.name),
          keywords: _.sortBy(words).join(' '),
          members: _.sortBy(members).join(' ')
        };

      });

    }
  };
};