nlpodyssey/gotokenizers

View on GitHub
normalizedstring/normalizedstring.go

Summary

Maintainability
D
1 day
Test Coverage
// Copyright (c) 2020, NLP Odyssey Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package normalizedstring

import (
    "fmt"
    "github.com/nlpodyssey/gotokenizers/splitpattern"
    "github.com/nlpodyssey/gotokenizers/strutils"
    "strings"
    "unicode"
)

// NormalizedString takes care of processing an "original" string to modify
// it and obtain a "normalized" string.
//
// It keeps both versions of the string, alignments information between both
// of them, and provides an interface to retrieve ranges of each string,
// using offsets from any of them.
//
// It is possible to retrieve a part of the original string, by indexing it
// with offsets from the normalized one, and the other way around too.
// It is also possible to convert offsets from one referential to the other
// one easily.
type NormalizedString struct {
    // The original version of the string, before any modification.
    original string
    // The normalized version of the string, after all modifications.
    normalized string
    // Mapping from the normalized string to the original one: (start, end)
    // for each byte of the normalized string.
    alignments []AlignmentRange
    // If this NormalizedString is a slice of a bigger one, we keep track
    // of the missing part, so that we can still give offsets from this
    // original string.
    originalShift int
}

// AlignmentRange represents a (start, end) range for representing the
// alignments of a NormalizedString.
type AlignmentRange struct {
    // Start byte position, inclusive.
    start int
    // End byte position, exclusive.
    end int
}

// New returns a new NormalizedString.
func New(
    original string,
    normalized string,
    alignments []AlignmentRange,
    originalShift int,
) *NormalizedString {
    return &NormalizedString{
        original:      original,
        normalized:    normalized,
        alignments:    alignments,
        originalShift: originalShift,
    }
}

// FromString returns a new NormalizedString built from the given string.
func FromString(s string) *NormalizedString {
    return &NormalizedString{
        original:      s,
        normalized:    s,
        alignments:    alignmentsFromString(s),
        originalShift: 0,
    }
}

func alignmentsFromString(s string) []AlignmentRange {
    alignments := make([]AlignmentRange, 0, len(s))

    for runeIndex, r := range s {
        strRune := string(r)
        bytesLen := len(strRune)
        for byteIndex := 0; byteIndex < bytesLen; byteIndex++ {
            alignments = append(alignments, AlignmentRange{
                start: runeIndex,
                end:   runeIndex + bytesLen,
            })
        }
    }

    return alignments
}

// Get returns the "normalized" version of the NormalizedString.
func (ns *NormalizedString) Get() string {
    return ns.normalized
}

// GetOriginal returns the "original" version of the NormalizedString.
func (ns *NormalizedString) GetOriginal() string {
    return ns.original
}

// Len returns the length in bytes of the "normalized" string.
func (ns *NormalizedString) Len() int {
    return len(ns.normalized)
}

// OriginalLen returns the length in bytes of the "original" string.
func (ns *NormalizedString) OriginalLen() int {
    return len(ns.original)
}

// IsEmpty reports whether the "normalized" string is empty.
func (ns *NormalizedString) IsEmpty() bool {
    return len(ns.normalized) == 0
}

// OriginalOffsets returns the original offsets.
func (ns *NormalizedString) OriginalOffsets() strutils.ByteOffsets {
    return strutils.ByteOffsets{
        Start: ns.originalShift,
        End:   ns.originalShift + ns.OriginalLen(),
    }
}

// RuneChange is a single rune-based transformation that can be applied
// by NormalizedString.TransformRange
type RuneChange struct {
    // Rune is a rune of the new transformed "normalized" string.
    Rune rune

    // Change is a number representing how the Rune must be handled when
    // it is inserted in the new "normalized" string.

    // - "1" = this is a new rune
    // - "-N" = the rune is right before N removed runes
    // - "0" = this rune is replacing the existing one

    // Values greater than "1" are not allowed. To add multiple runes, each of
    // them must be represented by a separate RuneChange value, with Changes
    // set to "1".
    Change int
}

// TransformRange applies transformations to the current normalized version of
// the string, while updating the alignments.
//
// This method expect a slice of RuneChange, where each item corresponds to one
// rune of the new normalized string.
//
// Since it is possible that the normalized string doesn't include some of the
// characters at the beginning of the original one, we need an initialOffset
// which represents the number of removed chars at the very beginning.
func (ns *NormalizedString) TransformRange(
    rng Range,
    dest []RuneChange,
    initialOffset int,
) {
    normalizedRange, normalizedRangeOk := ns.CoerceRangeToNormalized(rng)
    if !normalizedRangeOk {
        return
    }

    // Retrieve the original characters that are being replaced. This lets us
    // compute the change in byte sizes along the way.
    replacedNormalizedString := ns.normalized[normalizedRange.Start():normalizedRange.End()]

    initialRemoved := len(replacedNormalizedString[:initialOffset])
    replacedNormalizedRunes := []rune(replacedNormalizedString[initialOffset:])

    offset := initialRemoved + normalizedRange.Start()
    alignments := make([]AlignmentRange, 0, normalizedRange.Len())

    var normalizedBuilder strings.Builder

    for _, runeChange := range dest {
        idx := offset
        var align AlignmentRange

        if runeChange.Change >= 1 {
            if idx < 1 {
                align = AlignmentRange{start: 0, end: 0}
            } else {
                // This is a newly inserted character, so it shares the same
                // alignment than the previous one.
                align = ns.alignments[idx-1]
            }
        } else {
            align = ns.alignments[idx]
        }

        // If we are replacing a character, find it and compute the change in size
        replacedRuneSize := 0
        if runeChange.Change <= 0 {
            replacedRune := replacedNormalizedRunes[0]
            replacedNormalizedRunes = replacedNormalizedRunes[1:]
            replacedRuneSize = len(string(replacedRune))
        }

        // If we are removing some characters, find them too
        totalBytesToRemove := 0
        if runeChange.Change <= -1 {
            totalBytesToRemove = len(string(replacedNormalizedRunes[:-runeChange.Change]))
            replacedNormalizedRunes = replacedNormalizedRunes[-runeChange.Change:]
        }

        // Keep track of the changes for next offsets
        offset += replacedRuneSize
        offset += totalBytesToRemove

        newBytesCount := len(string(runeChange.Rune))
        for i := 0; i < newBytesCount; i++ {
            alignments = append(alignments, align)
        }

        // Then we keep only the rune for string reconstruction
        normalizedBuilder.WriteRune(runeChange.Rune)
    }

    newAlignments := make([]AlignmentRange, 0, len(ns.alignments)-normalizedRange.Len()+len(alignments))
    newAlignments = append(newAlignments, ns.alignments[:normalizedRange.start]...)
    newAlignments = append(newAlignments, alignments...)
    newAlignments = append(newAlignments, ns.alignments[normalizedRange.end:]...)
    ns.alignments = newAlignments

    ns.normalized = fmt.Sprintf("%s%s%s",
        ns.normalized[:normalizedRange.start],
        normalizedBuilder.String(),
        ns.normalized[normalizedRange.end:],
    )
}

// Transform applies transformations to the current normalized version of
// the string, while updating the alignments.
//
// It is the same as calling TransformRange with the full normalized string range.
func (ns *NormalizedString) Transform(dest []RuneChange, initialOffset int) {
    ns.TransformRange(NewNormalizedRange(0, ns.Len()), dest, initialOffset)
}

// OriginalAlignments recalculates the original alignments.
func (ns *NormalizedString) OriginalAlignments() []AlignmentRange {
    // (start, end) are in alignments
    // (offset, length) are in originalAlignments
    originalAlignments := make([]AlignmentRange, 0, len(ns.original))

    // Eventual gap before first group

    if start := ns.alignments[0].start; start > 0 {
        for i := 0; i < start; i++ {
            originalAlignments = append(originalAlignments, AlignmentRange{
                start: 0,
                end:   0,
            })
        }
    }

    last := ns.alignments[0]
    offset := 0
    length := 0

    for _, alignment := range ns.alignments {
        if last == alignment {
            // This is the same group
            length++
        } else {
            // This is a new group
            if alignment.start < last.end {
                panic("Invalid overlapping ranges.")
            }

            // Add the old group
            lastLen := last.end - last.start
            for i := 0; i < lastLen; i++ {
                originalAlignments = append(originalAlignments, AlignmentRange{
                    start: offset,
                    end:   offset + length,
                })
            }
            offset += length
            length = 1

            // Eventual gap between the 2 groups
            gapLen := alignment.start - last.end
            for i := 0; i < gapLen; i++ {
                originalAlignments = append(originalAlignments, AlignmentRange{
                    start: offset,
                    end:   offset,
                })
            }
        }

        last = alignment
    }

    // Add the last group
    lastLen := last.end - last.start
    for i := 0; i < lastLen; i++ {
        originalAlignments = append(originalAlignments, AlignmentRange{
            start: offset,
            end:   offset + length,
        })
    }

    // Add eventual last gap
    offset += length

    lastGapLen := len(ns.original) - len(originalAlignments)
    for i := 0; i < lastGapLen; i++ {
        originalAlignments = append(originalAlignments, AlignmentRange{
            start: offset,
            end:   offset,
        })
    }

    return originalAlignments
}

// GetRange returns a range of the normalized string, and a flag which reports
// whether the operation is successful or not.
func (ns *NormalizedString) GetRange(rng Range) (string, bool) {
    nr, ok := ns.CoerceRangeToNormalized(rng)
    if !ok {
        return "", false
    }
    return ns.normalized[nr.Start():nr.End()], true
}

// GetOriginalRange returns a range of the original string, and a flag which
// reports whether the operation is successful or not.
func (ns *NormalizedString) GetOriginalRange(rng Range) (string, bool) {
    or, ok := ns.CoerceRangeToOriginal(rng)
    if !ok {
        return "", false
    }
    return ns.original[or.Start():or.End()], true
}

// CoerceRangeToNormalized coerces the given Range (either an OriginalRange or
// a NormalizedRange) to a NormalizedRange, performing a conversion if needed.
// It also returns a flag which reports whether the operation was successful.
//
// The operation is unsuccessful if the range is targeting something out of
// range.
func (ns *NormalizedString) CoerceRangeToNormalized(r Range) (NormalizedRange, bool) {
    // If the string range is already in the normalized referential, return it as it is
    if nr, isNormalized := r.(NormalizedRange); isNormalized {
        return nr, true
    }

    // If we target an empty range, let's return the same
    if r.Len() == 0 {
        return NewNormalizedRange(r.Start(), r.End()), true
    }

    // If the target goes reverse, return invalid status
    if r.Start() > r.End() {
        return NewNormalizedRange(0, 0), false
    }

    // If we target (0, 0) on an empty string, we want to expand to the entire equivalent
    if len(ns.original) == 0 && r.Start() == 0 && r.End() == 0 {
        return NewNormalizedRange(0, ns.Len()), true
    }

    start := -1
    end := -1

    for i, alignment := range ns.alignments {
        if alignment.end > r.End() {
            break
        }

        if start == -1 && alignment.start >= r.Start() {
            // For now, don't update if width == 0
            if alignment.start != alignment.end {
                start = i
            }
        }

        if alignment.end <= r.End() {
            end = i + 1
        }
    }

    switch {
    case start != -1 && end != -1:
        return NewNormalizedRange(start, end), true
    case start != -1:
        return NewNormalizedRange(start, start), true
    case end != -1:
        return NewNormalizedRange(end, end), true
    default:
        return NewNormalizedRange(0, 0), false
    }
}

// CoerceRangeToOriginal coerces the given Range (either an OriginalRange or
// a NormalizedRange) to a OriginalRange, performing a conversion if needed.
// It also returns a flag which reports whether the operation was successful.
//
// The operation is unsuccessful if the range is targeting something out of
// range.
func (ns *NormalizedString) CoerceRangeToOriginal(r Range) (OriginalRange, bool) {
    // If the string range is already in the original referential, return it as it is
    if or, isOriginal := r.(OriginalRange); isOriginal {
        return or, true
    }

    // If we target an empty range, let's return the same
    if r.Len() == 0 {
        return NewOriginalRange(r.Start(), r.End()), true
    }

    // If the target goes reverse, return invalid status
    if r.Start() > r.End() {
        return NewOriginalRange(0, 0), false
    }

    // If we target (0, 0) on an empty string, we want to expand to the entire equivalent
    if len(ns.normalized) == 0 && r.Start() == 0 && r.End() == 0 {
        return NewOriginalRange(0, ns.OriginalLen()), true
    }

    alignments := ns.alignments[r.Start():r.End()]

    // Expand alignments, returning the range covered by this slice
    return NewOriginalRange(alignments[0].start, alignments[len(alignments)-1].end), true
}

// Prepend prepends the given string to the NormalizedString.
//
// FIXME: Prepend does nothing if the normalized string is empty
func (ns *NormalizedString) Prepend(s string) {
    if len(ns.normalized) == 0 {
        return
    }

    transformations := make([]RuneChange, 0, len(s)+1)

    for i, r := range s {
        change := 0
        if i > 0 {
            change = 1
        }
        transformations = append(transformations, RuneChange{Rune: r, Change: change})
    }

    firstRune := []rune(ns.normalized)[0]
    firstRuneBytesLen := len(string(firstRune))
    transformations = append(transformations, RuneChange{Rune: firstRune, Change: 1})

    ns.TransformRange(NewNormalizedRange(0, firstRuneBytesLen), transformations, 0)
}

// Append appends the given string to the NormalizedString.
//
// FIXME: Append does nothing if the normalized string is empty
func (ns *NormalizedString) Append(s string) {
    if len(ns.normalized) == 0 {
        return
    }

    transformations := make([]RuneChange, 0, len(s)+1)

    runes := []rune(ns.normalized)
    lastRuneIndex := len(runes) - 1
    lastRune := runes[lastRuneIndex]

    transformations = append(transformations, RuneChange{Rune: lastRune, Change: 0})

    for _, r := range s {
        transformations = append(transformations, RuneChange{Rune: r, Change: 1})
    }

    // FIXME: it's probably wrong to pass the rune index, instead of byte index
    ns.TransformRange(NewNormalizedRange(lastRuneIndex, ns.Len()), transformations, 0)
}

// SplitDelimiterBehavior is used by NormalizedString.Split to define the
// expected behavior for the delimiter.
//
// For example, when splitting on '-' with input `the-final--countdown`:
// - SplitDelimiterRemoved            => [ "the", "final", "countdown" ]
// - SplitDelimiterIsolated           => [ "the", "-", "final", "-", "-", "countdown" ]
// - SplitDelimiterMergedWithPrevious => [ "the-", "final-", "-", "countdown" ]
// - SplitDelimiterMergedWithNext     => [ "the", "-final", "-", "-countdown" ]
// - SplitDelimiterContiguous         => [ "the", "-", "final", "--", "countdown" ]
type SplitDelimiterBehavior uint8

const (
    SplitDelimiterRemoved            SplitDelimiterBehavior = iota
    SplitDelimiterIsolated                                  = iota
    SplitDelimiterMergedWithPrevious                        = iota
    SplitDelimiterMergedWithNext                            = iota
    SplitDelimiterContiguous                                = iota
)

// Split splits the current string in many subparts.
func (ns *NormalizedString) Split(
    pattern splitpattern.SplitPattern,
    behaviour SplitDelimiterBehavior,
) ([]*NormalizedString, error) {
    captures, err := pattern.FindMatches(ns.normalized)
    if err != nil {
        return nil, err
    }

    type SplitMatch struct {
        Offsets      strutils.ByteOffsets
        ShouldRemove bool
    }

    splits := make([]SplitMatch, 0, len(captures))

    // Process the matches according to the selected behavior
    switch behaviour {
    case SplitDelimiterRemoved:
        for _, capture := range captures {
            splits = append(splits, SplitMatch{
                Offsets:      capture.Offsets,
                ShouldRemove: capture.IsMatch,
            })
        }
    case SplitDelimiterIsolated:
        for _, capture := range captures {
            splits = append(splits, SplitMatch{
                Offsets:      capture.Offsets,
                ShouldRemove: false,
            })
        }
    case SplitDelimiterMergedWithPrevious:
        previousMatch := false
        for _, capture := range captures {
            if capture.IsMatch && !previousMatch && len(splits) > 0 {
                splits[len(splits)-1].Offsets.End = capture.Offsets.End
            } else {
                splits = append(splits, SplitMatch{
                    Offsets:      capture.Offsets,
                    ShouldRemove: false,
                })
            }
            previousMatch = capture.IsMatch
        }
    case SplitDelimiterMergedWithNext:
        previousMatch := false
        for i := len(captures) - 1; i >= 0; i-- {
            capture := captures[i]

            if capture.IsMatch && !previousMatch && len(splits) > 0 {
                splits[len(splits)-1].Offsets.Start = capture.Offsets.Start
            } else {
                splits = append(splits, SplitMatch{
                    Offsets:      capture.Offsets,
                    ShouldRemove: false,
                })
            }
            previousMatch = capture.IsMatch
        }
        // Reverse splits
        for i := len(splits)/2 - 1; i >= 0; i-- {
            opp := len(splits) - 1 - i
            splits[i], splits[opp] = splits[opp], splits[i]
        }
    case SplitDelimiterContiguous:
        previousMatch := false
        for _, capture := range captures {
            if capture.IsMatch == previousMatch && len(splits) > 0 {
                splits[len(splits)-1].Offsets.End = capture.Offsets.End
            } else {
                splits = append(splits, SplitMatch{
                    Offsets:      capture.Offsets,
                    ShouldRemove: false,
                })
            }
            previousMatch = capture.IsMatch
        }
    default:
        panic(fmt.Sprintf("unexpected SplitDelimiterBehavior: %d", behaviour))
    }

    // Then we split according to the computed splits

    result := make([]*NormalizedString, 0)
    for _, split := range splits {
        if split.ShouldRemove {
            continue
        }
        sliced, ok := ns.Slice(NewNormalizedRange(split.Offsets.Start, split.Offsets.End))
        if !ok {
            return nil, fmt.Errorf("NormalizedString bad split")
        }
        result = append(result, sliced)
    }
    return result, nil
}

// Replace replaces anything that matches the pattern with the given content.
func (ns *NormalizedString) Replace(
    pattern splitpattern.SplitPattern,
    content string,
) error {
    offset := 0

    captures, err := pattern.FindMatches(ns.normalized)
    if err != nil {
        return err
    }
    for _, capture := range captures {
        if !capture.IsMatch {
            continue
        }
        rStart := capture.Offsets.Start + offset
        if rStart < 0 {
            rStart = 0
        }
        rEnd := capture.Offsets.End + offset
        if rEnd < 0 {
            rEnd = 0
        }
        rng := NewNormalizedRange(rStart, rEnd)

        changes := make([]RuneChange, 0, len(content))
        for _, r := range content {
            changes = append(changes, RuneChange{Rune: r, Change: 1})
        }

        removedRunes := len([]rune(ns.normalized[rng.Start():rng.End()]))

        ns.TransformRange(rng, changes, removedRunes)

        newLen := len(content)
        oldLen := capture.Offsets.End - capture.Offsets.Start
        offset += newLen - oldLen
    }

    return nil
}

// Slice returns a slice of the current NormalizedString. It also returns a flag
// which reports whether the operation is successful or not.
func (ns *NormalizedString) Slice(rng Range) (*NormalizedString, bool) {
    if !ns.rangeIsValid(rng) {
        return nil, false
    }

    normalizedRange, nrOk := ns.CoerceRangeToNormalized(rng)
    if !nrOk {
        return nil, false
    }
    originalRange, orOk := ns.CoerceRangeToOriginal(rng)
    if !orOk {
        return nil, false
    }

    nShift := originalRange.Start()

    original, _ := ns.GetOriginalRange(rng)
    normalized, _ := ns.GetRange(rng)

    alignments := make([]AlignmentRange, normalizedRange.Len())

    for i, alignment := range ns.alignments[normalizedRange.start:normalizedRange.end] {
        alignments[i] = AlignmentRange{
            start: alignment.start - nShift,
            end:   alignment.end - nShift,
        }
    }

    originalShift := ns.originalShift + originalRange.start

    return New(original, normalized, alignments, originalShift), true
}

// Filter applies filtering over the characters of the NormalizedString.
func (ns *NormalizedString) Filter(keep func(rune) bool) {
    removed := 0
    removedStart := 0
    transforms := make([]RuneChange, 0, ns.Len())

    hasLastRune := false
    var lastRune rune

    for _, r := range ns.normalized {
        if !keep(r) {
            removed++
            continue
        }

        if hasLastRune {
            transforms = append(transforms, RuneChange{
                Rune:   lastRune,
                Change: -removed,
            })
        } else {
            removedStart = removed
        }

        lastRune = r
        hasLastRune = true
        removed = 0
    }

    if hasLastRune {
        transforms = append(transforms, RuneChange{
            Rune:   lastRune,
            Change: -removed,
        })
    }
    ns.Transform(transforms, removedStart)
}

// Map maps the characters of the NormalizedString.
func (ns *NormalizedString) Map(mapFunc func(rune) rune) {
    transforms := make([]RuneChange, 0, ns.Len())
    for _, r := range ns.normalized {
        transforms = append(transforms, RuneChange{
            Rune:   mapFunc(r),
            Change: 0,
        })
    }
    ns.Transform(transforms, 0)
}

// ToUpper remaps all Unicode letters of the "normalized" string to their
// upper case.
// FIXME: Go `unicode` package does not consider Unicode special casing
//        (see https://www.unicode.org/Public/UCD/latest/ucd/SpecialCasing.txt)
//        As a result, every single rune is always transformed to a single
//        new rune. This is not the case in the original rust implementation.
//        Should we consider using another "better" package?
//        Explanatory example:
//        ```rust
//        fn main() {
//            let a = '\u{00DF}';
//            println!("{} {:?}", a, a.to_string().chars().count());
//            // => ß 1
//            let b = a.to_uppercase();
//            println!("{} {}", b, b.to_string().chars().count());
//            // => SS 2
//        }
//        ```
//        ```go
//        package main
//        import (
//               "fmt"
//               "unicode"
//        )
//        func main() {
//               a := '\u00DF'
//               fmt.Println(string(a), len([]rune(string(a))))
//            // => ß 1
//               b := unicode.ToUpper(a)
//               fmt.Println(string(b), len([]rune(string(b))))
//            // => ß 1
//        }
//        ```
func (ns *NormalizedString) ToUpper() {
    ns.Map(unicode.ToUpper)
}

// ToLower remaps all Unicode letters of the "normalized" string to their
// lower case.
// FIXME: see Unicode special casing notes on `NormalizedString.ToUpper()`
func (ns *NormalizedString) ToLower() {
    ns.Map(unicode.ToLower)
}

// Trim removes leading and trailing spaces from the "normalized" string.
func (ns *NormalizedString) Trim() {
    ns.TrimLeftRight(true, true)
}

// TrimLeft removes leading spaces from the "normalized" string.
func (ns *NormalizedString) TrimLeft() {
    ns.TrimLeftRight(true, false)
}

// TrimRight removes trailing spaces from the "normalized" string.
func (ns *NormalizedString) TrimRight() {
    ns.TrimLeftRight(false, true)
}

// TrimLeftRight removes leading (left) and/or trailing (right) spaces from
// the "normalized" string.
func (ns *NormalizedString) TrimLeftRight(left, right bool) {
    runes := []rune(ns.normalized)
    lenRunes := len(runes)

    leadingSpaces := 0
    if left {
        leadingSpaces = countLeadingSpaces(runes)
    }

    trailingSpaces := 0
    if right && leadingSpaces < lenRunes {
        trailingSpaces = countTrailingSpaces(runes)
    }

    if leadingSpaces == 0 && trailingSpaces == 0 {
        return
    }

    lastIndex := lenRunes - trailingSpaces - 1

    transforms := make([]RuneChange, 0, lastIndex-leadingSpaces+1)
    for i := leadingSpaces; i <= lastIndex; i++ {
        change := 0
        if i == lastIndex {
            change = -trailingSpaces
        }
        transforms = append(transforms, RuneChange{
            Rune:   runes[i],
            Change: change,
        })
    }
    ns.Transform(transforms, leadingSpaces)
}

// rangeIsValid validates the given range, to make sure it is on rune boundaries.
func (ns *NormalizedString) rangeIsValid(r Range) bool {
    var s string

    switch r.(type) {
    case OriginalRange:
        s = ns.original
    case NormalizedRange:
        s = ns.normalized
    default:
        panic(fmt.Sprintf("unexpected Range implementation: %#v", r))
    }

    return strutils.IsRuneBoundary(s, r.Start()) &&
        strutils.IsRuneBoundary(s, r.End())
}

// countLeadingSpaces returns the number of leading spaces in the given slice
// of runes, if any.
func countLeadingSpaces(runes []rune) int {
    for i, r := range runes {
        if !unicode.In(r, unicode.White_Space) {
            return i
        }
    }
    return len(runes)
}

// countTrailingSpaces returns the number of trailing spaces in the given slice
// of runes, if any.
func countTrailingSpaces(runes []rune) int {
    runesLen := len(runes)
    if runesLen == 0 {
        return 0
    }

    lastIndex := runesLen - 1
    _ = runes[lastIndex]

    for i := lastIndex; i >= 0; i-- {
        if !unicode.In(runes[i], unicode.White_Space) {
            return lastIndex - i
        }
    }

    return len(runes)
}