xmidt-org/caduceus

View on GitHub
.release/docker/caduceus_spruce.yaml

Summary

Maintainability
Test Coverage
# SPDX-FileCopyrightText: 2023 Comcast Cable Communications Management, LLC
# SPDX-License-Identifier: Apache-2.0
---
# The unique fully-qualified-domain-name of the server.
# (Optional)
fqdn: (( grab $HOSTNAME || "caduceus" ))

# Unsure where server is used.
# (Optional)
server: (( grab $HOSTNAME || "caduceus" ))

########################################
#   Labeling/Tracing via HTTP Headers Configuration
########################################

# Provides this build number to the X-Caduceus-Build header for
# showing machine version information.  The build number SHOULD
# match the scheme `version-build` but there is not a strict requirement.
# (Optional)
build: (( grab $BUILD || "unkown" ))

# Provides the region information to the X-Caduceus-Region header
# for showing what region this machine is located in.  The region
# is arbitrary and optional.
# (Optional)
region: "east"

# Provides the flavor information to the X-Caduceus-Flavor header
# for showing what flavor this machine is associated with.  The flavor
# is arbitrary and optional.
# (Optional)
flavor: (( grab $FLAVOR || "mint" ))

# stage is used for doing complex spruce manipulation
# change this will only effect spruce and the vars referencing it
stage: (( grab $STAGE || "dev" ))

########################################
#   primary endpoint Configuration
########################################

# primary defines the details needed for the primary endpoint.  The
# primary endpoint accepts the events from talaria (typically).
primary:
  # address provides the port number for the endpoint to bind to.
  # ":443" is ideal, but may require some special handling due to it being
  # a reserved (by the kernel) port.
  address: ":6000"
  # HTTPS/TLS
  #
  # certificateFile provides the public key and CA chain in PEM format if
  # TLS is used.  Note: the certificate needs to match the fqdn for clients
  # to accept without issue.
  #
  # keyFile provides the private key that matches the certificateFile
  # (Optional)
  # certificateFile: "/etc/caduceus/public.pem"
  # keyFile: "/etc/caduceus/private.pem"

########################################
#   health endpoint Configuration
########################################

# health defines the details needed for the health check endpoint.  The
# health check endpoint is generally used by services (like AWS Route53
# or consul) to determine if this particular machine is healthy or not.
health:
  # address provides the port number for the endpoint to bind to.
  # ":80" is ideal, but may require some special handling due to it being
  # a reserved (by the kernel) port.
  address: ":6001"

  # logInterval appears to be present from before we had formal metrics
  # (Deprecated)
  # logInterval: "60s"
  # options appears to be present from before we had formal metrics
  # (Deprecated)
  # options:
  #  - "PayloadsOverZero"
  #  - "PayloadsOverHundred"
  #  - "PayloadsOverThousand"
  #  - "PayloadsOverTenThousand"

########################################
#   Debugging/pprof Configuration
########################################

# pprof defines the details needed for the pprof debug endpoint.
# (Optional)
pprof:
  # address provides the port number for the endpoint to bind to.
  address: ":6002"

########################################
#   Metrics Configuration
########################################

# metric defines the details needed for the prometheus metrics endpoint
# (Optional)
metric:
  # address provides the port number for the endpoint to bind to.  Port 9389
  # was chosen because it does not conflict with any of the other prometheus
  # metrics or other machines in the xmidt cluster.  You may use any port you
  # wish.
  address: ":6003"

  # metricsOptions provides the details needed to configure the prometheus
  # metric data.  Metrics generally have the form:
  #
  # {namespace}_{subsystem}_{metric}
  #
  # so if you use the suggested value below, your metrics are prefixed like
  # this:
  #
  # xmidt_caduceus_{metric}
  #
  # (Optional)
  metricsOptions:
    # namespace is the namespace of the metrics provided
    # (Optional)
    namespace: "xmidt"
    # subsystem is the subsystem of the metrics provided
    # (Optional)
    subsystem: "caduceus"

touchstone:
  # DefaultNamespace is the prometheus namespace to apply when a metric has no namespace
  defaultNamespace: "xmidt"
  # DefaultSubsystem is the prometheus subsystem to apply when a metric has no subsystem
  defaultSubsystem: "caduceus"

########################################
#   Service Discovery Configuration
########################################

# service defines the parameters needed to interact with the consul cluster
# for service discovery.  Presently only consul is supported.  This is
# presently only used by Prometheus to discover machines to monitor, but
# in the not-too-distant future talaria will use this interaction to load
# balance across all caduceus machines instead of using DNS.
# (Optional)
service:
  # consul configures the consul library in caduceus to use the local
  # service discovery agent
  consul:
    # client defines how to connect to the local consul agent (on the same
    # VM/container)
    client:
      # address is the address of the local consul agent
      address: (( grab $CONSUL_HOST || "consul:8500" ))
      # scheme is how the consul library should interact with the local
      # consul agent
      scheme: "http"
      # waitTime is TBD
      waitTime: "30s"

    # disableGenerateID is TBD
    disableGenerateID: true

    # registrations defines what services caduceus should register with
    # consul
    #
    #     id      - the VM/container instance name registered with consul
    #     name    - the name of service being registered
    #     tags    - a list of tags to associate with this registration
    #     address - the mechanism to reach the service (generally unique fqdn)
    #     port    - the port to reach the service at
    #     checks  - the list of checks to perform to determine if the service
    #               is available/healthy
    #         checkID                        - TBD
    #         ttl                            - how long the check is valid for
    #         deregisterCriticalServiceAfter - the duration to wait before the
    #                                          service is removed due to check
    #                                          failures
    registrations:
      -
        id: (( grab server ))
        name: "caduceus"
        tags:
          - (( concat "stage=" stage))
          - (( concat "flavor=" flavor))
        address: (( concat "http://" server ))
        scheme: "http"
        port: 6000
        checks:
          -
            checkID: (( concat server ":http" ))
            http: (( concat "http://" server ":6001/health" ))
            interval: "30s"
            deregisterCriticalServiceAfter: "70s"

########################################
#   Logging Related Configuration
########################################

# log configures the logging subsystem details
log:
  # file is the name of the most recent log file.  If set to "stdout" this
  # will log to os.Stdout.
  # (Optional) defaults to os.TempDir()
  file: "stdout"

  # level is the logging level to use - INFO, DEBUG, WARN, ERROR
  # (Optional) defaults to ERROR
  level: (( grab $LOG_LEVEL || "DEBUG" ))

  # maxsize is the maximum file size in MB
  # (Optional) defaults to max 100MB
  maxsize: 50

  # maxage is the maximum number of days to retain old log files
  # (Optional) defaults to ignore age limit (0)
  maxage: 30

  # maxbackups is the maximum number of old log files to retain
  # (Optional) defaults to retain all (0)
  maxbackups: 10

  # json is a flag indicating whether JSON logging output should be used.
  # (Optional) defaults to false
  json: true

zap:
  # OutputPaths is a list of URLs or file paths to write logging output to.
  outputPaths:
    - stdout
    # - /var/log/caduceus/caduceus.log

  # Level is the minimum enabled logging level. Note that this is a dynamic
  # level, so calling Config.Level.SetLevel will atomically change the log
  # level of all loggers descended from this config.
  level: debug

  # EncoderConfig sets options for the chosen encoder. See
  # zapcore.EncoderConfig for details.
  errorOutputPaths:
    - stderr
    # - /var/log/caduceus/caduceus.log

  # EncoderConfig sets options for the chosen encoder. See
  # zapcore.EncoderConfig for details.
  encoderConfig:
    messageKey: message
    levelKey: key
    levelEncoder: lowercase

  # Encoding sets the logger's encoding. Valid values are "json" and
  # "console", as well as any third-party encodings registered via
  # RegisterEncoder.
  encoding: json

########################################
#   Authorization Related Configuration
########################################

# Any combination of these configurations may be used for authorization.
# If ANY match, the request goes onwards.

# jwtValidator provides the details about where to get the keys for JWT
# kid values and their associated information (expiration, etc) for JWTs
# used as authorization.
# (Optional)
jwtValidator:
  Config:
    Resolve:
      Template: (( grab $THEMIS_ENDPOINT || "http://themis:6500/keys/{keyID}" ))

# authHeader provides the list of basic auth headers that caduceus will accept
# as authorization
# (Optional)
authHeader:
  - (( grab $AUTH_HEADER || "dXNlcjpwYXNz" ))

# authtoken used to make spruce work better for authAcquirer
authToken: (( grab $AUTH_TOKEN || "dXNlcjpwYXNz" ))

# capabilityCheck provides the details needed for checking an incoming JWT's
# capabilities.  If the type of check isn't provided, no checking is done.  The
# type can be "monitor" or "enforce".  If it is empty or a different value, no
# checking is done.  If "monitor" is provided, the capabilities are checked but
# the request isn't rejected when there isn't a valid capability for the
# request. Instead, a message is logged.  When "enforce" is provided, a request
# that doesn't have the needed capability is rejected.
#
# The capability is expected to have the format:
#
# {prefix}{endpoint}:{method}
#
# The prefix can be a regular expression.  If it's empty, no capability check
# is done.  The endpoint is a regular expression that should match the endpoint
# the request was sent to. The method is usually the method of the request, such as
# GET.  The accept all method is a catchall string that indicates the capability
# is approved for all methods.
# (Optional)
# capabilityCheck:
#   # type provides the mode for capability checking.
#   type: "enforce"
#   # prefix provides the regex to match the capability before the endpoint.
#   prefix: "prefix Here"
#   # acceptAllMethod provides a way to have a capability that allows all
#   # methods for a specific endpoint.
#   acceptAllMethod: "all"
#   # endpointBuckets provides regular expressions to use against the request
#   # endpoint in order to group requests for a metric label.
#   endpointBuckets:
#     - "hook\\b"
#     - "hooks\\b"
#     - "notify\\b"

########################################
#   Webhook Related Configuration
########################################
# Argus Config for storing the webhook information
webhook:
  # JWTParserType establishes which parser type will be used by the JWT token
  # acquirer used by Argus. Options include 'simple' and 'raw'.
  # Simple: parser assumes token payloads have the following structure: https://github.com/xmidt-org/bascule/blob/c011b128d6b95fa8358228535c63d1945347adaa/acquire/bearer.go#L77
  # Raw: parser assumes all of the token payload == JWT token
  # (Optional). Defaults to 'simple'.
  JWTParserType: (( grab $WEBHOOK_JWT_PARSER_TYPE || "raw" ))
  BasicClientConfig:
    # listen is the subsection that configures the listening feature of the argus client
    # (Optional)
    listen:
      # pullInterval is how often to call argus to update the webhook structure.
      pullInterval:  (( grab $ARGUS_PULL_INTERVAL || "5s" ))

    # bucket to store and retrieve webhooks.
    bucket: (( grab $ARGUS_BUCKET || "webhooks" ))

    # address it the location to talk to argus.
    address: (( grab  $ARGUS_ENDPOINT || "http://argus:6600" ))

    # auth the authentication method for argus.
    auth:
      # basic configures basic authentication for argus.
      # Must be of form: 'Basic xyz=='
      basic: (( concat "Basic " authToken ))

      # jwt configures jwt style authentication for argus.
      jwt:
      # requestHeaders are added to the request for the token.
      # (Optional)
      # requestHeaders:
      #   "": ""

      # authURL is the URL to access for the token.
        authURL: (( grab $ARGUS_ACQUIRE_JWT_URL || "http://themis:6501/issue" ))

        # timeout is how long the request to get the token will take before
        # timing out.
        timeout: (( grab $ARGUS_ACQUIRE_TIMEOUT || "1m" ))

        # buffer is the length of time before a token expires to get a new token.
        buffer: (( grab $ARGUS_ACQUIRE_BUFFER || "2m" ))
########################################
#   Delivery Pipeline Related Configuration
########################################

# (Deprecated)
# numWorkerThreads: 3000
# jobQueueSize: 6000

# sender provides the details for each "sender" that services the unique
# webhook url endpoint
sender:
  # numWorkersPerSender defines the maximum number of outgoing concurrent
  # HTTP client requests to a particular webhook url.  This number is for
  # THIS server only, to determine the total maximum, multiply this value
  # by the number of caducues machines.
  numWorkersPerSender: 5000

  # queueSizePerSender the maximum queue depth (in events) the sender will
  # store before cutting off the webhook because the delivery pipeline has
  # backed up.
  queueSizePerSender: 10000

  # cutOffPeriod is the duration of time the webhook will be cut off if the
  # delivery pipeline gets backed up.  All outstanding events will be
  # dropped, as well as all new events otherwise destined for this webhook
  # will be dropped.  This period of time is to allow the webhook server
  # time to recover.
  cutOffPeriod: 10s

  # linger is the duration of time after a webhook has not been registered
  # before the delivery pipeline is torn down.
  linger: 180s

  # (Deprecated)
  # clientTimeout: 60s

  # disableClientHostnameValidation provides a way to bypass TLS validation
  # failures on HTTPS requests when sending events to webhooks.
  # NOTE: Setting this to true allows for a potential man-in-the-middle
  # scenario between caduceus and a webhook.
  # (Optional) defaults to false
  disableClientHostnameValidation: false

  # deliveryRetries is the maximum number of delivery attempts caduceus will
  # make before dropping an event
  deliveryRetries: 1

  # deliveryInterval is the time to wait after a failed delivery attempt
  # before attempting to deliver again
  deliveryInterval: 10ms

  # responseHeaderTimeout is the time to wait for a response before giving up
  # and marking the delivery a failure
  responseHeaderTimeout: 10s

  # customPIDs is a custom list of allowed PartnerIDs that will be used if a message
  # has no partner IDs.  When empty, a message with no partner IDs will not be sent
  # to any listeners when enforcing the partner ID check.
  customPIDs: []

  # disablePartnerIDs dictates whether or not to enforce the partnerID check
  # Defaults to 'false'.
  disablePartnerIDs: true

# (Deprecated)
# profilerFrequency: 15
# profilerDuration: 15
# profilerQueueSize: 100
# totalIncomingPayloadSizeBuckets:
#   - 100
#   - 1000
#   - 10000
# perSourceIncomingPayloadSizeBuckets:
#   - 100
#   - 1000
#   - 10000

# tracing provides configuration for OpenTelemetry
tracing:
  # provider is the provider name. Currently, stdout, jaegar and zipkin are supported.
  # 'noop' can also be used as provider to explicitly disable tracing.
  provider: (( grab $TRACING_PROVIDER_NAME || "noop" ))

  # skipTraceExport only applies when provider is stdout. Set skipTraceExport to true
  # so that trace information is not written to stdout.
  # skipTraceExport: true

  # endpoint is where trace information should be routed. Applies to zipkin and jaegar.
  # endpoint: (( grab $TRACING_PROVIDER_ENDPOINT || "http://zipkin:9411/api/v2/spans" ))

# timeouts that apply to the Argus HTTP client.
# (Optional) By default, the values below will be used.
argusClientTimeout:
  # clientTimeout is the timeout for requests made through this
  # HTTP client. This timeout includes connection time, any
  # redirects, and reading the response body.
  clientTimeout: 50s

  # netDialerTimeout is the maximum amount of time the HTTP Client Dialer will
  # wait for a connect to complete.
  netDialerTimeout: 5s

# previousVersionSupport allows us to support two different major versions of
# the API at the same time from the same application.  When this is true,
# caduceus will support both "/v3" and "/v4" endpoints.  When false, only "/v4"
# endpoints will be supported.
previousVersionSupport: (( grab $PREV_VERSION_SUPPORT || true ))