david-mccullars/text_rank

View on GitHub
lib/text_rank/tokenizer/number.rb

Summary

Maintainability
A
0 mins
Test Coverage
module TextRank
  module Tokenizer

    ##
    # A tokenizer regex that preserves (optionally formatted) numbers as a single token.
    ##
    # rubocop:disable Naming/ConstantName
    Number = /
      (
        [1-9]\d{3,}       # 453231162
        (?:\.\d+)?        # 453231162.17

        |

        [1-9]\d{0,2}      # 453
        (?:,\d{3})*       # 453,231,162
        (?:\.\d+)?        # 453,231,162.17

        |

        0                 # 0
        (?:\.\d+)?        # 0.17

        |

        (?:\.\d+)         # .17
      )
    /x
    # rubocop:enable Naming/ConstantName

  end
end