gsantiago/subtitle.js

View on GitHub
src/Parser.ts

Summary

Maintainability
A
1 hr
Test Coverage
import stripBom from 'strip-bom'
import { Node, RE_TIMESTAMP, parseTimestamps } from '.'

export type Pusher = (node: Node) => void

export interface ParseState {
  expect: 'header' | 'id' | 'timestamp' | 'text' | 'vtt_comment'
  row: number
  hasContentStarted: boolean
  isWebVTT: boolean
  node: Partial<Node>
  buffer: string[]
}

export class Parser {
  private push: Pusher
  private state: ParseState

  constructor({ push }: { push: Pusher }) {
    this.push = push
    this.state = {
      expect: 'header',
      row: 0,
      hasContentStarted: false,
      isWebVTT: false,
      node: {},
      buffer: []
    }
  }

  private isIndex(line: string): boolean {
    return /^\d+$/.test(line.trim())
  }

  private isTimestamp(line: string): boolean {
    return RE_TIMESTAMP.test(line)
  }

  private isVttComment(line: string): boolean {
    return /^NOTE/.test(line)
  }

  private getError(expected: string, index: number, row: string): Error {
    return new Error(
      `expected ${expected} at row ${index + 1}, but received: "${row}"`
    )
  }

  public parseLine(line: string): void {
    const contents = this.state.row === 0 ? stripBom(line) : line

    if (!this.state.hasContentStarted) {
      if (contents.trim()) {
        this.state.hasContentStarted = true
      } else {
        return
      }
    }

    const parse = {
      header: this.parseHeader,
      id: this.parseId,
      timestamp: this.parseTimestamp,
      text: this.parseText,
      vtt_comment: this.parseVttComment
    }[this.state.expect]

    parse.call(this, contents)

    this.state.row++
  }

  public flush(): void {
    if (this.state.buffer.length > 0) {
      this.pushNode()
    }
  }

  private parseHeader(line: string) {
    if (!this.state.isWebVTT) {
      this.state.isWebVTT = /^WEBVTT/.test(line)

      if (this.state.isWebVTT) {
        this.state.node.type = 'header'
      } else {
        this.parseId(line)
        return
      }
    }

    this.state.buffer.push(line)

    if (!line) {
      this.state.expect = 'id'
      return
    }
  }

  private parseId(line: string) {
    this.state.expect = 'timestamp'

    if (this.state.node.type === 'header') {
      this.pushNode()
    }

    if (this.isIndex(line)) return

    if (this.state.isWebVTT && this.isVttComment(line)) {
      this.state.expect = 'vtt_comment'
      return
    }

    this.parseTimestamp(line)
  }

  private parseVttComment(line: string) {
    this.state.expect = 'vtt_comment'

    if (line.trim() === '') {
      this.state.expect = 'id'
    }
  }

  private parseTimestamp(line: string) {
    if (!this.isTimestamp(line)) {
      throw this.getError('timestamp', this.state.row, line)
    }

    this.state.node = {
      type: 'cue',
      data: {
        ...parseTimestamps(line),
        text: ''
      }
    }

    this.state.expect = 'text'
  }

  private parseText(line: string) {
    if (this.state.buffer.length === 0) {
      this.state.buffer.push(line)
      return
    }

    if (this.isTimestamp(line)) {
      const lastIndex = this.state.buffer.length - 1

      if (this.isIndex(this.state.buffer[lastIndex])) {
        this.state.buffer.pop()
      }

      this.pushNode()
      this.parseTimestamp(line)
      return
    }

    if (this.isVttComment(line)) {
      this.pushNode()
      this.parseVttComment(line)
      return
    }

    this.state.buffer.push(line)
  }

  private pushNode(): void {
    if (this.state.node.type === 'cue') {
      while (true) {
        const lastItem = this.state.buffer[this.state.buffer.length - 1]
        if (['', '\n'].includes(lastItem)) {
          this.state.buffer.pop()
        } else {
          break
        }
      }

      while (true) {
        const firstItem = this.state.buffer[0]
        if (['', '\n'].includes(firstItem)) {
          this.state.buffer.shift()
        } else {
          break
        }
      }

      this.state.node.data!.text = this.state.buffer.join('\n')
    }

    if (this.state.node.type === 'header') {
      this.state.node.data = this.state.buffer.join('\n').trim()
    }

    this.push(this.state.node as Node)

    this.state.node = {}
    this.state.buffer = []
  }
}