auto-discovery/cloud-aws/pkg/kubernetes/kubernetes.go
// SPDX-FileCopyrightText: the secureCodeBox authors
//
// SPDX-License-Identifier: Apache-2.0
package kubernetes
import (
"context"
"fmt"
"strings"
"github.com/go-logr/logr"
"github.com/secureCodeBox/secureCodeBox/auto-discovery/cloud-aws/pkg/config"
configv1 "github.com/secureCodeBox/secureCodeBox/auto-discovery/kubernetes/api/v1"
"github.com/secureCodeBox/secureCodeBox/auto-discovery/kubernetes/pkg/util"
executionv1 "github.com/secureCodeBox/secureCodeBox/operator/apis/execution/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
"k8s.io/cli-runtime/pkg/genericclioptions"
"sigs.k8s.io/controller-runtime/pkg/client"
)
// A notification to the Reconciler, that a monitored cloud resource changed
type Request struct {
// The current state the container has in its lifecycle
State string
// Details about the running container instance
Container ContainerInfo
}
// Information about the monitored instance of a container
type ContainerInfo struct {
// A unique way to identify this container to make sure the state can be tracked properly
Id string
// Container image reference
Image ImageInfo
}
// Reconciler interface so that it can be mocked in tests
type CloudReconciler interface {
Reconcile(ctx context.Context, req Request) error
}
// Kubernetes reconciler that updates ScheduledScans based on the Requests it receives
type CloudScanReconciler struct {
client.Client
Config *config.AutoDiscoveryConfig
Log logr.Logger
// Track which images are actually running and which instances use them
// Technically a HashMap to a HashSet but Go doesn't have sets
// Containers are tracked per image because each image only needs one scan
// To achieve this, RunningContainers maps the normalized image references to a "set" wich
// contains the unique container instance IDs, scans are deleted when the last instance stops
RunningContainers map[string]map[string]struct{}
}
// Values that can be referenced in templates when scans are created. Adapted from the kubernetes
// AutoDiscovery, but not all values offered there make sense here.
type ContainerAutoDiscoveryTemplateArgs struct {
Config config.AutoDiscoveryConfig
ScanConfig configv1.ScanConfig
Target ContainerInfo
Image ImageDetails
ImageID string
}
// Called with a Request if any monitored resource changed, updates kubernetes to match
func (r *CloudScanReconciler) Reconcile(ctx context.Context, req Request) error {
r.Log.V(1).Info("Received reconcile request", "State", req.State, "Image", req.Container.Image.reference())
// Decide what to do based on the current state we are notified about and the information we
// saved about this container
switch req.State {
case "RUNNING":
// Track this container and create a ScheduledScan for it
return r.handleCreateRequest(ctx, req)
case "PENDING", "STOPPED":
// This container is not running, check if the ScheduledScan needs to be deleted
return r.handleDeleteRequest(ctx, req)
default:
return fmt.Errorf("unexpected container state: %s", req.State)
}
}
// Create a Reconciler with a new kubernetes connection
func NewReconciler(cfg *config.AutoDiscoveryConfig, log logr.Logger) *CloudScanReconciler {
client, cfgNamespace, err := getClient(log)
if err != nil {
log.Error(err, "Unable to create Kubernetes client")
panic(err)
}
if cfg.Kubernetes.Namespace == "" {
cfg.Kubernetes.Namespace = cfgNamespace
}
return NewReconcilerWith(client, cfg, log)
}
// Create a Reconciler with a provided kubernetes client
func NewReconcilerWith(client client.Client, cfg *config.AutoDiscoveryConfig, log logr.Logger) *CloudScanReconciler {
return &CloudScanReconciler{
Client: client,
Config: cfg,
Log: log,
RunningContainers: make(map[string]map[string]struct{}),
}
}
// Check requests for RUNNING containers and start scans if needed
func (r *CloudScanReconciler) handleCreateRequest(ctx context.Context, req Request) error {
// Make sure there is at least an empty "set"
if r.RunningContainers[req.Container.Image.reference()] == nil {
r.RunningContainers[req.Container.Image.reference()] = make(map[string]struct{})
}
// Add this container instance to the "set" for this image
r.RunningContainers[req.Container.Image.reference()][req.Container.Id] = struct{}{}
// Create all configured scans
var err error = nil
for _, scanConfig := range r.Config.Kubernetes.ScanConfigs {
scan := getScheduledScanForRequest(req, r.Config, scanConfig)
r.Log.V(1).Info("Creating ScheduledScan", "Name", scan.ObjectMeta.Name)
// Always try to create the scan and later check if it might have existed already
res, thiserr := r.createScheduledScan(ctx, scan)
if thiserr != nil {
// Avoid TOCTOU problems by checking the err instead of checking before if the scan exists
if apierrors.IsAlreadyExists(thiserr) {
r.Log.V(1).Info("ScheduledScan already exists, nothing to do")
} else {
// What even is the AWS Monitor supposed to do with an error? Ignoring the message won't do
// much anyway. Retry somehow?
r.Log.Error(err, "Unexpected error while trying to create ScheduledScan")
err = thiserr
}
} else {
r.Log.Info("Successfully created ScheduledScan", "Name", res.ObjectMeta.Name)
}
}
return err
}
// Check requests for containers in other states but RUNNING and delete scans if needed
func (r *CloudScanReconciler) handleDeleteRequest(ctx context.Context, req Request) error {
if r.RunningContainers[req.Container.Image.reference()] == nil {
// We received a PENDING/STOPPED event but this container wasn't running before either
r.Log.V(1).Info("Container was not running before, nothing to do")
return nil
}
// Remove this instance from the Running set
delete(r.RunningContainers[req.Container.Image.reference()], req.Container.Id)
if len(r.RunningContainers[req.Container.Image.reference()]) > 0 {
// More containers using this image are running, keep the ScheduledScan
r.Log.V(1).Info("There are still instances of this image running, keeping the ScheduledScan")
return nil
}
// Delete all ScheduledScans since this was the last one
var err error = nil
for _, scanConfig := range r.Config.Kubernetes.ScanConfigs {
name := getScanName(req, scanConfig.Name)
r.Log.V(1).Info("Deleting ScheduledScan", "Name", name)
thiserr := r.deleteScheduledScan(ctx, name)
if thiserr != nil {
// If the scan was already gone ignore this, since we only wanted to delete it
if apierrors.IsNotFound(thiserr) {
r.Log.V(1).Info("ScheduledScan was already deleted, nothing to do")
} else {
// Same problem here, how should the AWS Monitor even handle other errors?
r.Log.Error(thiserr, "Unexpected error while trying to delete ScheduledScan")
err = thiserr
}
} else {
r.Log.Info("Successfully deleted ScheduledScan", "Name", name)
}
}
return err
}
func (r *CloudScanReconciler) createScheduledScan(ctx context.Context, scheduledScan *executionv1.ScheduledScan) (*executionv1.ScheduledScan, error) {
scheduledScan.ObjectMeta.Namespace = r.Config.Kubernetes.Namespace
err := r.Client.Create(ctx, scheduledScan)
return scheduledScan, err
}
func (r *CloudScanReconciler) deleteScheduledScan(ctx context.Context, name string) error {
scan := &executionv1.ScheduledScan{
ObjectMeta: metav1.ObjectMeta{
Name: name,
Namespace: r.Config.Kubernetes.Namespace,
},
}
return r.Client.Delete(ctx, scan)
}
// Generate a ScheduledScan based on the config and template values for a request the Reconciler received
func getScheduledScanForRequest(req Request, cfg *config.AutoDiscoveryConfig, scanConfig configv1.ScanConfig) *executionv1.ScheduledScan {
templateArgs := ContainerAutoDiscoveryTemplateArgs{
Config: *cfg,
ScanConfig: scanConfig,
Target: req.Container,
Image: req.Container.Image.details(),
ImageID: req.Container.Image.reference(),
}
scanSpec := util.GenerateScanSpec(scanConfig, templateArgs)
newScheduledScan := executionv1.ScheduledScan{
ObjectMeta: metav1.ObjectMeta{
Name: getScanName(req, scanConfig.Name),
Namespace: cfg.Kubernetes.Namespace,
Annotations: getScanAnnotations(scanConfig, templateArgs),
Labels: getScanLabels(scanConfig, templateArgs),
},
Spec: scanSpec,
}
return &newScheduledScan
}
func getScanName(req Request, name string) string {
// adapted from the kubernetes container autodiscovery
// function builds string like: _appName_-_customScanName_-at-_imageID_HASH_ eg: nginx-myTrivyScan-at-0123456789
appName := req.Container.Image.shortName()
hash := req.Container.Image.hash()
// cutoff appname if it is longer than 20 chars
maxAppLength := 20
if len(appName) > maxAppLength {
appName = appName[:maxAppLength]
}
result := fmt.Sprintf("%s-%s-at-%s", appName, name, hash)
result = strings.ReplaceAll(result, ".", "-")
result = strings.ReplaceAll(result, "/", "-")
// limit scan name length to kubernetes limits
if len(result) > 62 {
result = result[:62]
}
return result
}
// Templating helper function taken from the kubernetes AutoDiscovery
func getScanAnnotations(scanConfig configv1.ScanConfig, templateArgs ContainerAutoDiscoveryTemplateArgs) map[string]string {
return util.ParseMapTemplate(templateArgs, scanConfig.Annotations)
}
// Templating helper function taken from the kubernetes AutoDiscovery
func getScanLabels(scanConfig configv1.ScanConfig, templateArgs ContainerAutoDiscoveryTemplateArgs) map[string]string {
generatedLabels := util.ParseMapTemplate(templateArgs, scanConfig.Labels)
generatedLabels["app.kubernetes.io/managed-by"] = "securecodebox-autodiscovery"
return generatedLabels
}
// Connect to kubernetes
func getClient(log logr.Logger) (client.Client, string, error) {
log.Info("Connecting to Kubernetes cluster...")
kubeconfigArgs := genericclioptions.NewConfigFlags(false)
cnfLoader := kubeconfigArgs.ToRawKubeConfigLoader()
cnf, err := cnfLoader.ClientConfig()
if err != nil {
return nil, "", fmt.Errorf("failed to generate config from kubeconfig")
}
namespace, _, err := cnfLoader.Namespace()
if err != nil {
return nil, "", fmt.Errorf("failed to read namespace from kubeconfig")
}
scheme := runtime.NewScheme()
utilruntime.Must(executionv1.AddToScheme(scheme))
client, err := client.New(cnf, client.Options{Scheme: scheme})
if err != nil {
return nil, "", err
}
return client, namespace, nil
}