nlpodyssey/gotokenizers

View on GitHub
splitpattern/string.go

Summary

Maintainability
A
0 mins
Test Coverage
// Copyright (c) 2020, NLP Odyssey Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package splitpattern

import (
    "github.com/nlpodyssey/gotokenizers/strutils"
    "regexp"
)

type StringSplitPattern struct {
    s string
    r *RegexpSplitPattern
}

var _ SplitPattern = &StringSplitPattern{}

func FromString(s string) *StringSplitPattern {
    sp := &StringSplitPattern{s: s}
    if len(s) > 0 {
        sp.r = FromRegexp(regexp.MustCompile(regexp.QuoteMeta(s)))
    }
    return sp
}

func (sp *StringSplitPattern) FindMatches(s string) ([]Capture, error) {
    if sp.r == nil {
        // If we try to find the matches with an empty string, just don't match anything
        return []Capture{{
            // FIXME: is len of runes (and not bytes) correct?
            Offsets: strutils.ByteOffsets{Start: 0, End: len([]rune(s))},
            IsMatch: false,
        }}, nil
    }
    return sp.r.FindMatches(s)
}