sera1mu/deno-mecab

View on GitHub
src/MeCab.ts

Summary

Maintainability
A
1 hr
Test Coverage
A
93%
import { MeCabOptions, ParsedDumpWord, ParsedWord } from "./types.ts";

/**
 * The wrapper of MeCab.
 *
 * Requires `allow-run` permission.
 */
export default class MeCab {
  private readonly cmd: string[];
  private readonly options?: MeCabOptions;

  constructor(cmd: string[], options?: MeCabOptions) {
    this.cmd = cmd;
    this.options = options;
  }

  /**
   * Run MeCab and return stdout.
   *
   * Requires `allow-run` permission.
   */
  private async runMeCab(text: string, cmdArgs?: string[]): Promise<string> {
    const options: Deno.RunOptions = {
      cmd: cmdArgs ? this.cmd.concat(cmdArgs) : this.cmd,
      cwd: this.options?.cwd,
      env: this.options?.env,
      stdout: "piped",
      stdin: "piped",
    };

    const process = Deno.run(options);

    // Write text to stdin
    await process.stdin?.write(new TextEncoder().encode(text));
    process.stdin?.close();

    const [{ code }, stdout] = await Promise.all([
      process.status(),
      process.output(),
    ]);
    process.close();

    if (code !== 0) {
      throw new Error(
        `MeCab exited with code ${code}. MeCab stdout: ${stdout}`,
      );
    }

    return new TextDecoder().decode(stdout);
  }

  /**
   * Parse text.
   *
   * Requires `allow-run` permission.
   */
  async parse(text: string): Promise<ParsedWord[]> {
    const result = await this.runMeCab(text);

    // Remove not needed symbol
    const splitedResult = result
      .replace(/\nEOS\n/, "")
      .replace(/\t/g, ",")
      .split("\n");

    const parsedWords: ParsedWord[] = [];

    for (const line of splitedResult) {
      const splitedLine = line.split(",");
      const word: ParsedWord = {
        surface: splitedLine[0],
        feature: splitedLine[1],
        featureDetails: [splitedLine[2], splitedLine[3], splitedLine[4]],
        conjugationForms: [splitedLine[5], splitedLine[6]],
        originalForm: splitedLine[7],
        reading: splitedLine[8],
        pronunciation: splitedLine[9],
      };
      parsedWords.push(word);
    }
    return parsedWords;
  }

  /**
   * Get a dump of text.
   *
   * Requires `allow-run` permission.
   */
  async dump(text: string): Promise<ParsedDumpWord[]> {
    const result = await this.runMeCab(text, ["-Odump"]);

    // Remove not needed symbol
    const splitedLines = result.replace(/\n$/, "").split("\n");

    const parsedWords: ParsedDumpWord[] = [];

    for (const line of splitedLines) {
      const splitedLine = line.split(" ");
      const splitedLineFeature = splitedLine[2].split(",");
      const word: ParsedDumpWord = {
        nodeId: Number(splitedLine[0]),
        surface: splitedLine[1],
        feature: splitedLineFeature[0],
        featureDetails: [
          splitedLineFeature[1],
          splitedLineFeature[2],
          splitedLineFeature[3],
        ],
        conjugationForms: [splitedLineFeature[4], splitedLineFeature[5]],
        originalForm: splitedLineFeature[6],
        reading: splitedLineFeature[7],
        pronunciation: splitedLineFeature[8],
        characterStartByte: Number(splitedLine[3]),
        characterEndByte: Number(splitedLine[4]),
        rcAttr: Number(splitedLine[5]),
        lcAttr: Number(splitedLine[6]),
        posId: Number(splitedLine[7]),
        characterType: Number(splitedLine[8]),
        status: Number(splitedLine[9]),
        isBest: Number(splitedLine[10]),
        alpha: Number(splitedLine[11]),
        beta: Number(splitedLine[12]),
        prob: Number(splitedLine[13]),
        cost: Number(splitedLine[14]),
      };
      parsedWords.push(word);
    }

    return parsedWords;
  }

  /**
   * Split text into words.
   *
   * Requires `allow-run` permission.
   */
  async wakati(text: string): Promise<string[]> {
    const result = await this.runMeCab(text, ["-Owakati"]);

    const editedResult = result.split(" ");
    editedResult.pop();

    return editedResult;
  }

  /**
   * Add reading to text.
   *
   * Requires `allow-run` permission.
   */
  async yomi(text: string): Promise<string> {
    const result = await this.runMeCab(text, ["-Oyomi"]);

    const editedResult = result.replace(/ \n/g, "");
    return editedResult;
  }
}