src/go/collectors/go.d.plugin/pkg/matcher/glob.go
// SPDX-License-Identifier: GPL-3.0-or-later
package matcher
import (
"errors"
"path/filepath"
"regexp"
"unicode/utf8"
)
// globMatcher implements Matcher, it uses filepath.MatchString to match.
type globMatcher string
var (
errBadGlobPattern = errors.New("bad glob pattern")
erGlobPattern = regexp.MustCompile(`(?s)^(?:[*?]|\[\^?([^\\-\]]|\\.|.-.)+\]|\\.|[^\*\?\\\[])*$`)
)
// NewGlobMatcher create a new matcher with glob format
func NewGlobMatcher(expr string) (Matcher, error) {
switch expr {
case "":
return stringFullMatcher(""), nil
case "*":
return TRUE(), nil
}
// any strings pass this regexp check are valid pattern
if !erGlobPattern.MatchString(expr) {
return nil, errBadGlobPattern
}
size := len(expr)
chars := []rune(expr)
startWith := true
endWith := true
startIdx := 0
endIdx := size - 1
if chars[startIdx] == '*' {
startWith = false
startIdx = 1
}
if chars[endIdx] == '*' {
endWith = false
endIdx--
}
unescapedExpr := make([]rune, 0, endIdx-startIdx+1)
for i := startIdx; i <= endIdx; i++ {
ch := chars[i]
if ch == '\\' {
nextCh := chars[i+1]
unescapedExpr = append(unescapedExpr, nextCh)
i++
} else if isGlobMeta(ch) {
return globMatcher(expr), nil
} else {
unescapedExpr = append(unescapedExpr, ch)
}
}
return NewStringMatcher(string(unescapedExpr), startWith, endWith)
}
func isGlobMeta(ch rune) bool {
switch ch {
case '*', '?', '[':
return true
default:
return false
}
}
// Match matches.
func (m globMatcher) Match(b []byte) bool {
return m.MatchString(string(b))
}
// MatchString matches.
func (m globMatcher) MatchString(line string) bool {
rs, _ := m.globMatch(line)
return rs
}
func (m globMatcher) globMatch(name string) (matched bool, err error) {
pattern := string(m)
Pattern:
for len(pattern) > 0 {
var star bool
var chunk string
star, chunk, pattern = scanChunk(pattern)
if star && chunk == "" {
// Trailing * matches rest of string unless it has a /.
// return !strings.Contains(name, string(Separator)), nil
return true, nil
}
// Look for match at current position.
t, ok, err := matchChunk(chunk, name)
// if we're the last chunk, make sure we've exhausted the name
// otherwise we'll give a false result even if we could still match
// using the star
if ok && (len(t) == 0 || len(pattern) > 0) {
name = t
continue
}
if err != nil {
return false, err
}
if star {
// Look for match skipping i+1 bytes.
// Cannot skip /.
for i := 0; i < len(name); i++ {
//for i := 0; i < len(name) && name[i] != Separator; i++ {
t, ok, err := matchChunk(chunk, name[i+1:])
if ok {
// if we're the last chunk, make sure we exhausted the name
if len(pattern) == 0 && len(t) > 0 {
continue
}
name = t
continue Pattern
}
if err != nil {
return false, err
}
}
}
return false, nil
}
return len(name) == 0, nil
}
// scanChunk gets the next segment of pattern, which is a non-star string
// possibly preceded by a star.
func scanChunk(pattern string) (star bool, chunk, rest string) {
for len(pattern) > 0 && pattern[0] == '*' {
pattern = pattern[1:]
star = true
}
inrange := false
var i int
Scan:
for i = 0; i < len(pattern); i++ {
switch pattern[i] {
case '\\':
if i+1 < len(pattern) {
i++
}
case '[':
inrange = true
case ']':
inrange = false
case '*':
if !inrange {
break Scan
}
}
}
return star, pattern[0:i], pattern[i:]
}
// matchChunk checks whether chunk matches the beginning of s.
// If so, it returns the remainder of s (after the match).
// Chunk is all single-character operators: literals, char classes, and ?.
func matchChunk(chunk, s string) (rest string, ok bool, err error) {
for len(chunk) > 0 {
if len(s) == 0 {
return
}
switch chunk[0] {
case '[':
// character class
r, n := utf8.DecodeRuneInString(s)
s = s[n:]
chunk = chunk[1:]
// We can't end right after '[', we're expecting at least
// a closing bracket and possibly a caret.
if len(chunk) == 0 {
err = filepath.ErrBadPattern
return
}
// possibly negated
negated := chunk[0] == '^'
if negated {
chunk = chunk[1:]
}
// parse all ranges
match := false
nrange := 0
for {
if len(chunk) > 0 && chunk[0] == ']' && nrange > 0 {
chunk = chunk[1:]
break
}
var lo, hi rune
if lo, chunk, err = getEsc(chunk); err != nil {
return
}
hi = lo
if chunk[0] == '-' {
if hi, chunk, err = getEsc(chunk[1:]); err != nil {
return
}
}
if lo <= r && r <= hi {
match = true
}
nrange++
}
if match == negated {
return
}
case '?':
//if s[0] == Separator {
// return
//}
_, n := utf8.DecodeRuneInString(s)
s = s[n:]
chunk = chunk[1:]
case '\\':
chunk = chunk[1:]
if len(chunk) == 0 {
err = filepath.ErrBadPattern
return
}
fallthrough
default:
if chunk[0] != s[0] {
return
}
s = s[1:]
chunk = chunk[1:]
}
}
return s, true, nil
}
// getEsc gets a possibly-escaped character from chunk, for a character class.
func getEsc(chunk string) (r rune, nchunk string, err error) {
if len(chunk) == 0 || chunk[0] == '-' || chunk[0] == ']' {
err = filepath.ErrBadPattern
return
}
if chunk[0] == '\\' {
chunk = chunk[1:]
if len(chunk) == 0 {
err = filepath.ErrBadPattern
return
}
}
r, n := utf8.DecodeRuneInString(chunk)
if r == utf8.RuneError && n == 1 {
err = filepath.ErrBadPattern
}
nchunk = chunk[n:]
if len(nchunk) == 0 {
err = filepath.ErrBadPattern
}
return
}