netdata/netdata

View on GitHub
src/go/collectors/go.d.plugin/pkg/matcher/glob.go

Summary

Maintainability
C
1 day
Test Coverage
// SPDX-License-Identifier: GPL-3.0-or-later

package matcher

import (
    "errors"
    "path/filepath"
    "regexp"
    "unicode/utf8"
)

// globMatcher implements Matcher, it uses filepath.MatchString to match.
type globMatcher string

var (
    errBadGlobPattern = errors.New("bad glob pattern")
    erGlobPattern     = regexp.MustCompile(`(?s)^(?:[*?]|\[\^?([^\\-\]]|\\.|.-.)+\]|\\.|[^\*\?\\\[])*$`)
)

// NewGlobMatcher create a new matcher with glob format
func NewGlobMatcher(expr string) (Matcher, error) {
    switch expr {
    case "":
        return stringFullMatcher(""), nil
    case "*":
        return TRUE(), nil
    }

    // any strings pass this regexp check are valid pattern
    if !erGlobPattern.MatchString(expr) {
        return nil, errBadGlobPattern
    }

    size := len(expr)
    chars := []rune(expr)
    startWith := true
    endWith := true
    startIdx := 0
    endIdx := size - 1
    if chars[startIdx] == '*' {
        startWith = false
        startIdx = 1
    }
    if chars[endIdx] == '*' {
        endWith = false
        endIdx--
    }

    unescapedExpr := make([]rune, 0, endIdx-startIdx+1)
    for i := startIdx; i <= endIdx; i++ {
        ch := chars[i]
        if ch == '\\' {
            nextCh := chars[i+1]
            unescapedExpr = append(unescapedExpr, nextCh)
            i++
        } else if isGlobMeta(ch) {
            return globMatcher(expr), nil
        } else {
            unescapedExpr = append(unescapedExpr, ch)
        }
    }

    return NewStringMatcher(string(unescapedExpr), startWith, endWith)
}

func isGlobMeta(ch rune) bool {
    switch ch {
    case '*', '?', '[':
        return true
    default:
        return false
    }
}

// Match matches.
func (m globMatcher) Match(b []byte) bool {
    return m.MatchString(string(b))
}

// MatchString matches.
func (m globMatcher) MatchString(line string) bool {
    rs, _ := m.globMatch(line)
    return rs
}

func (m globMatcher) globMatch(name string) (matched bool, err error) {
    pattern := string(m)
Pattern:
    for len(pattern) > 0 {
        var star bool
        var chunk string
        star, chunk, pattern = scanChunk(pattern)
        if star && chunk == "" {
            // Trailing * matches rest of string unless it has a /.
            // return !strings.Contains(name, string(Separator)), nil

            return true, nil
        }
        // Look for match at current position.
        t, ok, err := matchChunk(chunk, name)
        // if we're the last chunk, make sure we've exhausted the name
        // otherwise we'll give a false result even if we could still match
        // using the star
        if ok && (len(t) == 0 || len(pattern) > 0) {
            name = t
            continue
        }
        if err != nil {
            return false, err
        }
        if star {
            // Look for match skipping i+1 bytes.
            // Cannot skip /.
            for i := 0; i < len(name); i++ {
                //for i := 0; i < len(name) && name[i] != Separator; i++ {
                t, ok, err := matchChunk(chunk, name[i+1:])
                if ok {
                    // if we're the last chunk, make sure we exhausted the name
                    if len(pattern) == 0 && len(t) > 0 {
                        continue
                    }
                    name = t
                    continue Pattern
                }
                if err != nil {
                    return false, err
                }
            }
        }
        return false, nil
    }
    return len(name) == 0, nil
}

// scanChunk gets the next segment of pattern, which is a non-star string
// possibly preceded by a star.
func scanChunk(pattern string) (star bool, chunk, rest string) {
    for len(pattern) > 0 && pattern[0] == '*' {
        pattern = pattern[1:]
        star = true
    }
    inrange := false
    var i int
Scan:
    for i = 0; i < len(pattern); i++ {
        switch pattern[i] {
        case '\\':
            if i+1 < len(pattern) {
                i++
            }
        case '[':
            inrange = true
        case ']':
            inrange = false
        case '*':
            if !inrange {
                break Scan
            }
        }
    }
    return star, pattern[0:i], pattern[i:]
}

// matchChunk checks whether chunk matches the beginning of s.
// If so, it returns the remainder of s (after the match).
// Chunk is all single-character operators: literals, char classes, and ?.
func matchChunk(chunk, s string) (rest string, ok bool, err error) {
    for len(chunk) > 0 {
        if len(s) == 0 {
            return
        }
        switch chunk[0] {
        case '[':
            // character class
            r, n := utf8.DecodeRuneInString(s)
            s = s[n:]
            chunk = chunk[1:]
            // We can't end right after '[', we're expecting at least
            // a closing bracket and possibly a caret.
            if len(chunk) == 0 {
                err = filepath.ErrBadPattern
                return
            }
            // possibly negated
            negated := chunk[0] == '^'
            if negated {
                chunk = chunk[1:]
            }
            // parse all ranges
            match := false
            nrange := 0
            for {
                if len(chunk) > 0 && chunk[0] == ']' && nrange > 0 {
                    chunk = chunk[1:]
                    break
                }
                var lo, hi rune
                if lo, chunk, err = getEsc(chunk); err != nil {
                    return
                }
                hi = lo
                if chunk[0] == '-' {
                    if hi, chunk, err = getEsc(chunk[1:]); err != nil {
                        return
                    }
                }
                if lo <= r && r <= hi {
                    match = true
                }
                nrange++
            }
            if match == negated {
                return
            }

        case '?':
            //if s[0] == Separator {
            //    return
            //}
            _, n := utf8.DecodeRuneInString(s)
            s = s[n:]
            chunk = chunk[1:]

        case '\\':
            chunk = chunk[1:]
            if len(chunk) == 0 {
                err = filepath.ErrBadPattern
                return
            }
            fallthrough

        default:
            if chunk[0] != s[0] {
                return
            }
            s = s[1:]
            chunk = chunk[1:]
        }
    }
    return s, true, nil
}

// getEsc gets a possibly-escaped character from chunk, for a character class.
func getEsc(chunk string) (r rune, nchunk string, err error) {
    if len(chunk) == 0 || chunk[0] == '-' || chunk[0] == ']' {
        err = filepath.ErrBadPattern
        return
    }
    if chunk[0] == '\\' {
        chunk = chunk[1:]
        if len(chunk) == 0 {
            err = filepath.ErrBadPattern
            return
        }
    }
    r, n := utf8.DecodeRuneInString(chunk)
    if r == utf8.RuneError && n == 1 {
        err = filepath.ErrBadPattern
    }
    nchunk = chunk[n:]
    if len(nchunk) == 0 {
        err = filepath.ErrBadPattern
    }
    return
}