wtetsu/mouse-dictionary

View on GitHub
src/main/lib/traverser.js

Summary

Maintainability
A
0 mins
Test Coverage
/**
 * Mouse Dictionary (https://github.com/wtetsu/mouse-dictionary/)
 * Copyright 2018-present wtetsu
 * Licensed under MIT
 */

import dom from "./dom";
import decoy from "./decoy";
import ponyfill from "./ponyfill/ponyfill";

const build = (doConfirmValidCharacter, maxWords) => {
  const traverser = new Traverser(doConfirmValidCharacter, maxWords);

  const getTextUnderCursor = (element, clientX, clientY) => {
    let textOnCursor;
    try {
      textOnCursor = traverser.fetchTextUnderCursor(element, clientX, clientY);
    } catch (err) {
      console.error(err);
    }
    return textOnCursor ?? [];
  };

  return getTextUnderCursor;
};

class Traverser {
  constructor(doGetTargetCharacterType, maxWords) {
    this.JA_MAX_LENGTH = 40;
    this.getTargetCharacterType = doGetTargetCharacterType ?? ((code) => (isEnglishLikeCharacter(code) ? 3 : 0));
    this.maxWords = maxWords ?? 8;
    this.decoy = decoy.create("div");
  }

  fetchTextUnderCursor(element, clientX, clientY) {
    const range = ponyfill.getCaretNodeAndOffsetFromPoint(element.ownerDocument, clientX, clientY);
    if (!range) {
      return [];
    }
    const { node, offset } = range;

    if (node.nodeType === Node.TEXT_NODE) {
      return this.fetchTextFromTextNode(node, offset);
    }

    if (node.nodeType === Node.ELEMENT_NODE) {
      return this.fetchTextFromElementNode(element, clientX, clientY);
    }

    return [];
  }

  fetchTextFromTextNode(textNode, offset) {
    const { text, subText, end, isEnglish } = this.getTextFromRange(textNode.data, offset);
    const textList = subText ? [text, subText] : [text];
    if (!end) {
      return textList;
    }
    const followingText = dom.traverse(textNode);
    return textList.map((t) => this.concatenate(t, followingText, isEnglish));
  }

  concatenate(text, followingText, isEnglish) {
    const concatenatedText = concatenateFollowingText(text, followingText, isEnglish);
    const endIndex = isEnglish
      ? searchEndIndex(concatenatedText, 0, this.maxWords, this.getTargetCharacterType)
      : this.JA_MAX_LENGTH;
    return concatenatedText.substring(0, endIndex);
  }

  fetchTextFromElementNode(element, clientX, clientY) {
    try {
      this.decoy.activate(element);

      const range = ponyfill.getCaretNodeAndOffsetFromPoint(element.ownerDocument, clientX, clientY);
      if (!range) {
        return;
      }
      const { node, offset } = range;

      if (node.nodeType === Node.TEXT_NODE) {
        return this.fetchTextFromTextNode(node, offset, this.maxWords);
      }
    } finally {
      this.decoy.deactivate();
    }
  }

  getTextFromRange(sourceText, offset) {
    if (!sourceText) {
      return {};
    }
    const code = sourceText.charCodeAt(offset);
    const isEnglish = isEnglishLikeCharacter(code);

    let startIndex, endIndex, text, subText;
    if (isEnglish) {
      startIndex = searchStartIndex(sourceText, offset, this.getTargetCharacterType);
      endIndex = searchEndIndex(sourceText, offset, this.maxWords, this.getTargetCharacterType);
      text = sourceText.substring(startIndex, endIndex);
    } else {
      startIndex = offset;
      endIndex = offset + this.JA_MAX_LENGTH;

      const properStartIndex = retrieveProperStartIndex(sourceText, startIndex + 1);
      text = sourceText.substring(properStartIndex, endIndex);

      if (startIndex !== properStartIndex) {
        subText = sourceText.substring(startIndex, endIndex);
      }
    }
    const end = endIndex >= sourceText.length;
    return { text, subText, end, isEnglish };
  }
}

const retrieveProperStartIndex = (sourceText, cursorIndex) => {
  let currentLength = 0;
  const tokens = tokenize(sourceText, "ja-JP");
  if (!tokens) {
    return cursorIndex;
  }
  for (let i = 0; i < tokens.length; i++) {
    const token = tokens[i];
    if (cursorIndex <= currentLength + token.length) {
      return currentLength;
    }
    currentLength += token.length;
  }
  return 0;
};

const searchStartIndex = (text, index, doGetCharacterType) => {
  let startIndex;
  let i = index;
  for (;;) {
    const code = text.charCodeAt(i);
    const toPursue = doGetCharacterType(code) & 1;
    if (!toPursue) {
      startIndex = i + 1;
      break;
    }
    if (i <= 0) {
      startIndex = 0;
      break;
    }
    i -= 1;
  }
  return startIndex;
};

const searchEndIndex = (text, index, maxWords, doGetCharacterType) => {
  let endIndex;
  let i = index + 1;
  let spaceCount = 0;
  let theLastIsSpace = false;
  for (;;) {
    const code = text.charCodeAt(i);
    if (code === 0x20) {
      if (!theLastIsSpace) {
        spaceCount += 1;
      }
      theLastIsSpace = true;
      if (spaceCount >= maxWords) {
        endIndex = i;
        break;
      }
    } else {
      const toPursue = doGetCharacterType(code) & 2;
      if (!toPursue) {
        endIndex = i;
        break;
      }
      theLastIsSpace = false;
    }
    if (i >= text.length) {
      endIndex = i;
      break;
    }

    i += 1;
  }
  return endIndex;
};

const concatenateFollowingText = (text, followingText, isEnglish) => {
  if (!followingText) {
    return text;
  }
  if (!isEnglish) {
    return text + followingText;
  }
  if (followingText.startsWith("-")) {
    return text + followingText;
  }
  return text + " " + followingText;
};

const isEnglishLikeCharacter = (code) => 0x20 <= code && code <= 0x7e;

// Intl.v8BreakIterator will be replaced with Intl.Segmenter in the future.
// https://github.com/tc39/proposal-intl-segmenter
const tokenize = (text, lang) => {
  if (!Intl?.v8BreakIterator) {
    return null;
  }
  const it = Intl.v8BreakIterator([lang], { type: "word" });
  it.adoptText(text);

  let cur = 0;

  const words = [];
  while (cur < text.length) {
    const prev = cur;
    cur = it.next();
    words.push(text.substring(prev, cur));
  }
  return words;
};

export default { build };