index.js

Summary

Maintainability
B
5 hrs
Test Coverage
/**
 * @file index.js
 * @project anne
 * @license GPLv3.
 * @copyright 2015 Online Health Database.
 */

"use strict";

var metaphone = require('natural').Metaphone

    /**
     * The general class used by Anne.
     * @class Anne
     */
  , Anne = function () {
      this.dict = {}
    }

  , alpha = 'abcdefghijklmnopqrstuvwxyz'

    /**
     * Checks if two words are within a character distance of one.
     * @private
     * @method similar
     * @params {String} word - the original word
     * @params {String} word - the second word to check against
     * @returns {Boolean} isSimilar - whether or not the words are similar
     */
  , similar = function (wordA, wordB) {
      // if the lengths differ by more than one, then we instantly know the
      // difference is larger than one
      if ((wordB.length - wordA.length) > 1) return false

      // if this counter exceeds one at any point, we know that
      // the distance is too far to try and bridge it
      var matches = 0
        , ln = Math.min(wordA.length, wordB.length)
        , i

      // we check for letter matches, and every difference
      // is counted as one unless a surrounding character is similar
      for (i = 0; matches < 2 && i < ln; i += 1) {
        if (wordA[i] !== wordB[i]) {
          matches += wordA[i] === wordB[i - 1] || wordA[i] === wordB[i + 1] ? 1 : 0
        }
      }

      return matches < 2
    }

    /**
     * @private
     * @method possibles
     * @params {String} word - the word to find edits for
     * @returns {Array} possibilities - all edit possiblities of word
     */
  , possibles = function (word, anne) {
      var edits = {}
        , total = 0
        , i
        , j
        , add = function (poss) {
            if (!edits[poss]) {
              var f = anne.freq(poss)
              if (f > 0) {
                edits[poss] = f
                total += f
              }
            }
          }

      for (i = 0; i < word.length + 1; i += 1) {
        if (i > 0) {
          // handle deletes
          add(word.substr(0, i - 1) + word.substr(i))

          // handle transposes
          add(word.substr(0, i - 1) + word.substr(i, i + 1) + word.substr(i - 1, i) + word.substr(i + 1))
        }

        for (j = 0; j < alpha.length; j += 1) {
          // handle replaces
          if (i > 0) {
            add(word.substr(0, i - 1) + alpha[j] + word.substr(i))
          }

          // handle inserts
          add(word.substr(0, i) + alpha[j] + word.substr(i))
        }
      }

      return Object.keys(edits).map(function (poss) {
        return [poss, edits[poss] / total]
      })
    }

/**
 * Learn from a list of words in a sentence.
 * @memberof Anne
 * @method learn
 * @param {String} sentence - a string sentence or word to learn from.
 */
Anne.prototype.learn = function (string) {
  var that = this

  // we split by '\s' instead of '\W' to respect
  // conjunctions even though we don't like them
  string.split(/\s+/g).forEach(function (word) {
    // only pay attention to proper words
    if (word.length > 1 && word.match(/[a-z\']*/i)) {
        // also add the word to our dictionary, so
        // that during future searches, we don't have
        // to visit the entire dictionary to find a correction
        var prev = that.dict, i
        for (i = 0; i < word.length; i += 1) {
          if (!prev[word[i]]) {
            prev[word[i]] = {}
          }

          prev = prev[word[i]]
        }

        // record word frequency
        if (prev._ === undefined) {
          prev._ = 0
        }

        prev._ += 1
    }
  })

  // continue chaining
  return this
}

/**
 * Get the frequency count of a word.
 * @memberof Anne
 * @method freq
 * @param {String} word - the word to get the frequency of
 * @returns {Number} frequency - the frequency count of the word
 */
Anne.prototype.freq = function (word) {
  var prev = this.dict, i

  for (i = 0; i < word.length; i += 1) {
    if (!prev[word[i]]) return 0
    prev = prev[word[i]]
  }

  return prev._
}

/**
 * Fix all found spelling errors in a sentence.
 * @memberof Anne
 * @method fix
 * @param {String} sentence - a string sentence you wish to fix.
 * @returns {String} fixed sentence - input sentence with words replaced with correct words.
 */
Anne.prototype.fix = function (string) {
  // split by spaces, and fix words individually
  var that = this
    , isSimple = function (obj) {
        var i

        for (i in obj) {
          if (obj.hasOwnProperty(i) && i !== '_') {
            return false
          }
        }

        return true
      }
    , fixed = string.split(/\s+/g).map(function (word) {
        if (word.length > 1 && word.match(/[a-z\']*/)) {
          // search through dictionary for known words and their
          // frequencies
          var possible = possibles(word, that)
            , phonetics = metaphone.process(word)

          // sort to get best probability on top, and remove
          // all non-phonetic possibilties
          possible = possible.filter(function (test) {
            return metaphone.compare(word, test[0]) || similar(phonetics, metaphone.process(test[0]))
          }).sort(function (a, b) {
            return b[1] - a[1]
          })

          // simply return the final word found by the search
          return possible.length > 0 ? possible[0][0] : word
        } else {
          // we don't need to try to fix single letters, or
          // any non-word entities (i.e. numbers)
          return word
        }
      })

  // merge by single whitespace, assuming that no other space
  // characters have been lost in the formatting
  return fixed.join(' ')
}

/**
 * Fix a sentence, then learn from its original source.
 * @memberof Anne
 * @method fixAndLearn
 * @param {String} sentence - the sentence to fix and then learn
 * @returns {String} fixed sentence - the sentence after fixing
 */
Anne.prototype.fixAndLearn = function (string) {
  var fixed = this.fix(string)
  this.learn(string)
  return fixed
}

/**
 * Serialize anne to a savable JSON object.
 * @memberof Anne
 * @method toJSON
 * @returns {Object} json - a JSON object which can be saved as a string
 */
Anne.prototype.toJSON = function () {
  return this.dict
}

/**
 * Deserialize anne from a JSON object/string
 * @memberof Anne
 * @method fromJSON
 * @params {Object|String} json - the JSON object/string to use as the dictionary
 */
Anne.prototype.fromJSON = function (json) {
  // deserialize with native JSON library, if
  // needed
  if (typeof json === 'string') {
    json = JSON.parse(json)
  }

  // replace current dictionary with the new one
  this.dict = json

  // continue chaining
  return this
}

// uses the same instance across imports
// at runtime
module.exports = new Anne