src/go/plugin/go.d/modules/weblog/logline.go
// SPDX-License-Identifier: GPL-3.0-or-later
package weblog
import (
"errors"
"fmt"
"regexp"
"strconv"
"strings"
)
// TODO: it is not clear how to handle "-", current handling is not good
// In general it is:
// - If a field is unused in a particular entry dash "-" marks the omitted field.
// In addition to that "-" is used as zero value in:
// - apache: %b '-' when no bytes are sent.
//
// Log Format:
// - CLF: https://www.w3.org/Daemon/User/Config/Logging.html#common-logfile-format
// - ELF: https://www.w3.org/TR/WD-logfile.html
// - Apache CLF: https://httpd.apache.org/docs/trunk/logs.html#common
// Variables:
// - nginx: http://nginx.org/en/docs/varindex.html
// - apache: http://httpd.apache.org/docs/current/mod/mod_log_config.html#logformat
// - IIS: https://learn.microsoft.com/en-us/windows/win32/http/w3c-logging
/*
| nginx | apache | description |
|-------------------------|-----------|-----------------------------------------------|
| $host ($http_host) | %v | Name of the server which accepted a request.
| $server_port | %p | Port of the server which accepted a request.
| $scheme | - | Request scheme. "http" or "https".
| $remote_addr | %a (%h) | Client address.
| $request | %r | Full original request line. The line is "$request_method $request_uri $server_protocol".
| $request_method | %m | Request method. Usually "GET" or "POST".
| $request_uri | %U | Full original request URI.
| $server_protocol | %H | Request protocol. Usually "HTTP/1.0", "HTTP/1.1", or "HTTP/2.0".
| $status | %s (%>s) | Response status code.
| $request_length | %I | Bytes received from a client, including request and headers.
| $bytes_sent | %O | Bytes sent to a client, including request and headers.
| $body_bytes_sent | %B (%b) | Bytes sent to a client, not counting the response header.
| $request_time | %D | Request processing time.
| $upstream_response_time | - | Time spent on receiving the response from the upstream server.
| $ssl_protocol | - | Protocol of an established SSL connection.
| $ssl_cipher | - | String of ciphers used for an established SSL connection.
*/
var (
errEmptyLine = errors.New("empty line")
errBadVhost = errors.New("bad vhost")
errBadVhostPort = errors.New("bad vhost with port")
errBadPort = errors.New("bad port")
errBadReqScheme = errors.New("bad req scheme")
errBadReqClient = errors.New("bad req client")
errBadRequest = errors.New("bad request")
errBadReqMethod = errors.New("bad req method")
errBadReqURL = errors.New("bad req url")
errBadReqProto = errors.New("bad req protocol")
errBadReqSize = errors.New("bad req size")
errBadRespCode = errors.New("bad resp status code")
errBadRespSize = errors.New("bad resp size")
errBadReqProcTime = errors.New("bad req processing time")
errBadUpsRespTime = errors.New("bad upstream resp time")
errBadSSLProto = errors.New("bad ssl protocol")
errBadSSLCipherSuite = errors.New("bad ssl cipher suite")
)
func newEmptyLogLine() *logLine {
var l logLine
l.custom.fields = make(map[string]struct{})
l.custom.values = make([]customValue, 0, 20)
l.reset()
return &l
}
type (
logLine struct {
web
custom custom
}
web struct {
vhost string
port string
reqScheme string
reqClient string
reqMethod string
reqURL string
reqProto string
reqSize int
reqProcTime float64
respCode int
respSize int
upsRespTime float64
sslProto string
sslCipherSuite string
}
custom struct {
fields map[string]struct{}
values []customValue
}
customValue struct {
name string
value string
}
)
func (l *logLine) Assign(field string, value string) (err error) {
if value == "" {
return
}
switch field {
case "host", "http_host", "v":
err = l.assignVhost(value)
case "server_port", "p":
err = l.assignPort(value)
case "host:$server_port", "v:%p":
err = l.assignVhostWithPort(value)
case "scheme":
err = l.assignReqScheme(value)
case "remote_addr", "a", "h":
err = l.assignReqClient(value)
case "request", "r":
err = l.assignRequest(value)
case "request_method", "m":
err = l.assignReqMethod(value)
case "request_uri", "U":
err = l.assignReqURL(value)
case "server_protocol", "H":
err = l.assignReqProto(value)
case "status", "s", ">s":
err = l.assignRespCode(value)
case "request_length", "I":
err = l.assignReqSize(value)
case "bytes_sent", "body_bytes_sent", "b", "O", "B":
err = l.assignRespSize(value)
case "request_time", "D":
err = l.assignReqProcTime(value)
case "upstream_response_time":
err = l.assignUpsRespTime(value)
case "ssl_protocol":
err = l.assignSSLProto(value)
case "ssl_cipher":
err = l.assignSSLCipherSuite(value)
default:
err = l.assignCustom(field, value)
}
if err != nil {
err = fmt.Errorf("assign '%s': %w", field, err)
}
return err
}
const hyphen = "-"
func (l *logLine) assignVhost(vhost string) error {
if vhost == hyphen {
return nil
}
// nginx $host and $http_host returns ipv6 in [], apache not
if idx := strings.IndexByte(vhost, ']'); idx > 0 {
vhost = vhost[1:idx]
}
l.vhost = vhost
return nil
}
func (l *logLine) assignPort(port string) error {
if port == hyphen {
return nil
}
if !isPortValid(port) {
return fmt.Errorf("assign '%s' : %w", port, errBadPort)
}
l.port = port
return nil
}
func (l *logLine) assignVhostWithPort(vhostPort string) error {
if vhostPort == hyphen {
return nil
}
idx := strings.LastIndexByte(vhostPort, ':')
if idx == -1 {
return fmt.Errorf("assign '%s' : %w", vhostPort, errBadVhostPort)
}
if err := l.assignPort(vhostPort[idx+1:]); err != nil {
return fmt.Errorf("assign '%s' : %w", vhostPort, errBadVhostPort)
}
if err := l.assignVhost(vhostPort[0:idx]); err != nil {
return fmt.Errorf("assign '%s' : %w", vhostPort, errBadVhostPort)
}
return nil
}
func (l *logLine) assignReqScheme(scheme string) error {
if scheme == hyphen {
return nil
}
if !isSchemeValid(scheme) {
return fmt.Errorf("assign '%s' : %w", scheme, errBadReqScheme)
}
l.reqScheme = scheme
return nil
}
func (l *logLine) assignReqClient(client string) error {
if client == hyphen {
return nil
}
l.reqClient = client
return nil
}
func (l *logLine) assignRequest(request string) error {
if request == hyphen {
return nil
}
var first, last int
if first = strings.IndexByte(request, ' '); first < 0 {
return fmt.Errorf("assign '%s': %w", request, errBadRequest)
}
if last = strings.LastIndexByte(request, ' '); first == last {
return fmt.Errorf("assign '%s': %w", request, errBadRequest)
}
proto := request[last+1:]
url := request[first+1 : last]
method := request[0:first]
if err := l.assignReqMethod(method); err != nil {
return err
}
if err := l.assignReqURL(url); err != nil {
return err
}
return l.assignReqProto(proto)
}
func (l *logLine) assignReqMethod(method string) error {
if method == hyphen {
return nil
}
if !isReqMethodValid(method) {
return fmt.Errorf("assign '%s' : %w", method, errBadReqMethod)
}
l.reqMethod = method
return nil
}
func (l *logLine) assignReqURL(url string) error {
if url == hyphen {
return nil
}
if isEmptyString(url) {
return fmt.Errorf("assign '%s' : %w", url, errBadReqURL)
}
l.reqURL = url
return nil
}
func (l *logLine) assignReqProto(proto string) error {
if proto == hyphen {
return nil
}
if !isReqProtoValid(proto) {
return fmt.Errorf("assign '%s': %w", proto, errBadReqProto)
}
l.reqProto = proto[5:]
return nil
}
func (l *logLine) assignRespCode(status string) error {
if status == hyphen {
return nil
}
v, err := strconv.Atoi(status)
if err != nil || !isRespCodeValid(v) {
return fmt.Errorf("assign '%s': %w", status, errBadRespCode)
}
l.respCode = v
return nil
}
func (l *logLine) assignReqSize(size string) error {
// apache: can be "-" according web_log py regexp.
if size == hyphen {
l.reqSize = 0
return nil
}
v, err := strconv.Atoi(size)
if err != nil || !isSizeValid(v) {
return fmt.Errorf("assign '%s': %w", size, errBadReqSize)
}
l.reqSize = v
return nil
}
func (l *logLine) assignRespSize(size string) error {
// apache: %b. In CLF format, i.e. a '-' rather than a 0 when no bytes are sent.
if size == hyphen {
l.respSize = 0
return nil
}
v, err := strconv.Atoi(size)
if err != nil || !isSizeValid(v) {
return fmt.Errorf("assign '%s': %w", size, errBadRespSize)
}
l.respSize = v
return nil
}
func (l *logLine) assignReqProcTime(time string) error {
if time == hyphen {
return nil
}
if time == "0.000" {
l.reqProcTime = 0
return nil
}
v, err := strconv.ParseFloat(time, 64)
if err != nil || !isTimeValid(v) {
return fmt.Errorf("assign '%s': %w", time, errBadReqProcTime)
}
l.reqProcTime = v * timeMultiplier(time)
return nil
}
func isUpstreamTimeSeparator(r rune) bool { return r == ',' || r == ':' }
func (l *logLine) assignUpsRespTime(time string) error {
if time == hyphen {
return nil
}
// the upstream response time string can contain multiple values, separated
// by commas (in case the request was handled by multiple servers), or colons
// (in case the request passed between multiple server groups via an internal redirect)
// the individual values should be summed up to obtain the correct amount of time
// the request spent in upstream
var sum float64
for _, val := range strings.FieldsFunc(time, isUpstreamTimeSeparator) {
val = strings.TrimSpace(val)
v, err := strconv.ParseFloat(val, 64)
if err != nil || !isTimeValid(v) {
return fmt.Errorf("assign '%s': %w", time, errBadUpsRespTime)
}
sum += v
}
l.upsRespTime = sum * timeMultiplier(time)
return nil
}
func (l *logLine) assignSSLProto(proto string) error {
if proto == hyphen {
return nil
}
if !isSSLProtoValid(proto) {
return fmt.Errorf("assign '%s': %w", proto, errBadSSLProto)
}
l.sslProto = proto
return nil
}
func (l *logLine) assignSSLCipherSuite(cipher string) error {
if cipher == hyphen {
return nil
}
if strings.IndexByte(cipher, '-') <= 0 && strings.IndexByte(cipher, '_') <= 0 {
return fmt.Errorf("assign '%s': %w", cipher, errBadSSLCipherSuite)
}
l.sslCipherSuite = cipher
return nil
}
func (l *logLine) assignCustom(field, value string) error {
if len(l.custom.fields) == 0 || value == hyphen {
return nil
}
if _, ok := l.custom.fields[field]; ok {
l.custom.values = append(l.custom.values, customValue{name: field, value: value})
}
return nil
}
func (l *logLine) verify() error {
if l.empty() {
return fmt.Errorf("verify: %w", errEmptyLine)
}
if l.hasRespCode() && !l.isRespCodeValid() {
return fmt.Errorf("verify '%d': %w", l.respCode, errBadRespCode)
}
if l.hasVhost() && !l.isVhostValid() {
return fmt.Errorf("verify '%s': %w", l.vhost, errBadVhost)
}
if l.hasPort() && !l.isPortValid() {
return fmt.Errorf("verify '%s': %w", l.port, errBadPort)
}
if l.hasReqScheme() && !l.isSchemeValid() {
return fmt.Errorf("verify '%s': %w", l.reqScheme, errBadReqScheme)
}
if l.hasReqClient() && !l.isClientValid() {
return fmt.Errorf("verify '%s': %w", l.reqClient, errBadReqClient)
}
if l.hasReqMethod() && !l.isMethodValid() {
return fmt.Errorf("verify '%s': %w", l.reqMethod, errBadReqMethod)
}
if l.hasReqURL() && !l.isURLValid() {
return fmt.Errorf("verify '%s': %w", l.reqURL, errBadReqURL)
}
if l.hasReqProto() && !l.isProtoValid() {
return fmt.Errorf("verify '%s': %w", l.reqProto, errBadReqProto)
}
if l.hasReqSize() && !l.isReqSizeValid() {
return fmt.Errorf("verify '%d': %w", l.reqSize, errBadReqSize)
}
if l.hasRespSize() && !l.isRespSizeValid() {
return fmt.Errorf("verify '%d': %w", l.respSize, errBadRespSize)
}
if l.hasReqProcTime() && !l.isReqProcTimeValid() {
return fmt.Errorf("verify '%f': %w", l.reqProcTime, errBadReqProcTime)
}
if l.hasUpsRespTime() && !l.isUpsRespTimeValid() {
return fmt.Errorf("verify '%f': %w", l.upsRespTime, errBadUpsRespTime)
}
if l.hasSSLProto() && !l.isSSLProtoValid() {
return fmt.Errorf("verify '%s': %w", l.sslProto, errBadSSLProto)
}
if l.hasSSLCipherSuite() && !l.isSSLCipherSuiteValid() {
return fmt.Errorf("verify '%s': %w", l.sslCipherSuite, errBadSSLCipherSuite)
}
return nil
}
func (l *logLine) empty() bool { return !l.hasWebFields() && !l.hasCustomFields() }
func (l *logLine) hasCustomFields() bool { return len(l.custom.values) > 0 }
func (l *logLine) hasWebFields() bool { return l.web != emptyWebFields }
func (l *logLine) hasVhost() bool { return !isEmptyString(l.vhost) }
func (l *logLine) hasPort() bool { return !isEmptyString(l.port) }
func (l *logLine) hasReqScheme() bool { return !isEmptyString(l.reqScheme) }
func (l *logLine) hasReqClient() bool { return !isEmptyString(l.reqClient) }
func (l *logLine) hasReqMethod() bool { return !isEmptyString(l.reqMethod) }
func (l *logLine) hasReqURL() bool { return !isEmptyString(l.reqURL) }
func (l *logLine) hasReqProto() bool { return !isEmptyString(l.reqProto) }
func (l *logLine) hasRespCode() bool { return !isEmptyNumber(l.respCode) }
func (l *logLine) hasReqSize() bool { return !isEmptyNumber(l.reqSize) }
func (l *logLine) hasRespSize() bool { return !isEmptyNumber(l.respSize) }
func (l *logLine) hasReqProcTime() bool { return !isEmptyNumber(int(l.reqProcTime)) }
func (l *logLine) hasUpsRespTime() bool { return !isEmptyNumber(int(l.upsRespTime)) }
func (l *logLine) hasSSLProto() bool { return !isEmptyString(l.sslProto) }
func (l *logLine) hasSSLCipherSuite() bool { return !isEmptyString(l.sslCipherSuite) }
func (l *logLine) isVhostValid() bool { return reVhost.MatchString(l.vhost) }
func (l *logLine) isPortValid() bool { return isPortValid(l.port) }
func (l *logLine) isSchemeValid() bool { return isSchemeValid(l.reqScheme) }
func (l *logLine) isClientValid() bool { return reClient.MatchString(l.reqClient) }
func (l *logLine) isMethodValid() bool { return isReqMethodValid(l.reqMethod) }
func (l *logLine) isURLValid() bool { return !isEmptyString(l.reqURL) }
func (l *logLine) isProtoValid() bool { return isReqProtoVerValid(l.reqProto) }
func (l *logLine) isRespCodeValid() bool { return isRespCodeValid(l.respCode) }
func (l *logLine) isReqSizeValid() bool { return isSizeValid(l.reqSize) }
func (l *logLine) isRespSizeValid() bool { return isSizeValid(l.respSize) }
func (l *logLine) isReqProcTimeValid() bool { return isTimeValid(l.reqProcTime) }
func (l *logLine) isUpsRespTimeValid() bool { return isTimeValid(l.upsRespTime) }
func (l *logLine) isSSLProtoValid() bool { return isSSLProtoValid(l.sslProto) }
func (l *logLine) isSSLCipherSuiteValid() bool { return reCipherSuite.MatchString(l.sslCipherSuite) }
func (l *logLine) reset() {
l.web = emptyWebFields
l.custom.values = l.custom.values[:0]
}
var (
// TODO: reClient doesn't work with %h when HostnameLookups is On.
reVhost = regexp.MustCompile(`^[a-zA-Z0-9-:.]+$`)
reClient = regexp.MustCompile(`^([\da-f:.]+|localhost)$`)
reCipherSuite = regexp.MustCompile(`^[A-Z0-9-_]+$`) // openssl -v
)
var emptyWebFields = web{
vhost: emptyString,
port: emptyString,
reqScheme: emptyString,
reqClient: emptyString,
reqMethod: emptyString,
reqURL: emptyString,
reqProto: emptyString,
reqSize: emptyNumber,
reqProcTime: emptyNumber,
respCode: emptyNumber,
respSize: emptyNumber,
upsRespTime: emptyNumber,
sslProto: emptyString,
sslCipherSuite: emptyString,
}
const (
emptyString = "__empty_string__"
emptyNumber = -9999
)
func isEmptyString(s string) bool {
return s == emptyString || s == ""
}
func isEmptyNumber(n int) bool {
return n == emptyNumber
}
func isReqMethodValid(method string) bool {
// https://www.iana.org/assignments/http-methods/http-methods.xhtml
switch method {
case "GET",
"ACL",
"BASELINE-CONTROL",
"BIND",
"CHECKIN",
"CHECKOUT",
"CONNECT",
"COPY",
"DELETE",
"HEAD",
"LABEL",
"LINK",
"LOCK",
"MERGE",
"MKACTIVITY",
"MKCALENDAR",
"MKCOL",
"MKREDIRECTREF",
"MKWORKSPACE",
"MOVE",
"OPTIONS",
"ORDERPATCH",
"PATCH",
"POST",
"PRI",
"PROPFIND",
"PROPPATCH",
"PURGE", // not a standardized HTTP method
"PUT",
"REBIND",
"REPORT",
"SEARCH",
"TRACE",
"UNBIND",
"UNCHECKOUT",
"UNLINK",
"UNLOCK",
"UPDATE",
"UPDATEREDIRECTREF":
return true
}
return false
}
func isReqProtoValid(proto string) bool {
return len(proto) >= 6 && proto[:5] == "HTTP/" && isReqProtoVerValid(proto[5:])
}
func isReqProtoVerValid(version string) bool {
switch version {
case "1.1", "1", "1.0", "2", "2.0", "3", "3.0":
return true
}
return false
}
func isPortValid(port string) bool {
v, err := strconv.Atoi(port)
return err == nil && v >= 80 && v <= 49151
}
func isSchemeValid(scheme string) bool {
return scheme == "http" || scheme == "https"
}
func isRespCodeValid(code int) bool {
// rfc7231
// Informational responses (100–199),
// Successful responses (200–299),
// Redirects (300–399),
// Client errors (400–499),
// Server errors (500–599).
return code >= 100 && code <= 600
}
func isSizeValid(size int) bool {
return size >= 0
}
func isTimeValid(time float64) bool {
return time >= 0
}
func isSSLProtoValid(proto string) bool {
if proto == "TLSv1.2" {
return true
}
switch proto {
case "TLSv1.3", "SSLv2", "SSLv3", "TLSv1", "TLSv1.1":
return true
}
return false
}
func timeMultiplier(time string) float64 {
// TODO: Change code to detect and modify properly IIS time (in milliseconds)
// Convert to microseconds:
// - nginx time is in seconds with a milliseconds' resolution.
if strings.IndexByte(time, '.') > 0 {
return 1e6
}
// - apache time is in microseconds.
return 1
}