docker/swarmkit

View on GitHub
api/types.proto

Summary

Maintainability
Test Coverage
syntax = "proto3";

package docker.swarmkit.v1;

import "google/protobuf/timestamp.proto";
import "google/protobuf/duration.proto";
import "google/protobuf/wrappers.proto";
import "gogoproto/gogo.proto";

// This file contains types that are common to objects and spec or that are not
// considered first-class within the cluster object-model.

// Version tracks the last time an object in the store was updated.
message Version {
    uint64 index = 1;
}

message IndexEntry {
    string key = 1;
    string val = 2;
}

// Annotations provide useful information to identify API objects. They are
// common to all API specs.
message Annotations {
    string name = 1;
    map<string, string> labels = 2;

    // Indices provides keys and values for indexing this object.
    // A single key may have multiple values.
    repeated IndexEntry indices = 4 [(gogoproto.nullable) = false];
}

// NamedGenericResource represents a "user defined" resource which is defined
// as a string.
// "Kind" is used to describe the Kind of a resource (e.g: "GPU", "FPGA", "SSD", ...)
// Value is used to identify the resource (GPU="UUID-1", FPGA="/dev/sdb5", ...)
message NamedGenericResource {
    string kind = 1;
    string value = 2;
}

// DiscreteGenericResource represents a "user defined" resource which is defined
// as an integer
// "Kind" is used to describe the Kind of a resource (e.g: "GPU", "FPGA", "SSD", ...)
// Value is used to count the resource (SSD=5, HDD=3, ...)
message DiscreteGenericResource {
    string kind = 1;
    int64 value = 2;
}

// GenericResource represents a "user defined" resource which can
// be either an integer (e.g: SSD=3) or a string (e.g: SSD=sda1)
message GenericResource {
    oneof resource {
        NamedGenericResource named_resource_spec = 1;
        DiscreteGenericResource discrete_resource_spec = 2;
    }
}

enum ResourceType {
    TASK = 0;
    SECRET = 1;
    CONFIG = 2;
    VOLUME = 3;
}

message Resources {
    // Amount of CPUs (e.g. 2000000000 = 2 CPU cores)
    int64 nano_cpus = 1 [(gogoproto.customname) = "NanoCPUs"];

    // Amount of memory in bytes.
    int64 memory_bytes = 2;

    // User specified resource (e.g: bananas=2;apple={red,yellow,green})
    repeated GenericResource generic = 3;
}

message ResourceRequirements {
    Resources limits = 1;
    Resources reservations = 2;

    // Amount of swap in bytes - can only be used together with a memory limit
    // -1 means unlimited
    // a null pointer indicates that the default behaviour of granting twice
    // the memory is maintained
    google.protobuf.Int64Value swap_bytes = 3;

    // Tune container memory swappiness (0 to 100) - if not specified, defaults
    // to the container OS's default - generally 60, or the value predefined in
    // the image; set to -1 to unset a previously set value
    google.protobuf.Int64Value memory_swappiness = 4;
}

message Platform {
    // Architecture (e.g. x86_64)
    string architecture = 1;

    // Operating System (e.g. linux)
    string os = 2 [(gogoproto.customname) = "OS"];
}

// PluginDescription describes an engine plugin.
message PluginDescription {
    // Type of plugin. Canonical values for existing types are
    // Volume, Network, and Authorization. More types could be
    // supported in the future.
    string type = 1;

    // Name of the plugin
    string name = 2;
}

message EngineDescription {
    // Docker daemon version running on the node.
    string engine_version = 1;

    // Labels attached to the engine.
    map<string, string> labels = 2;

    // Volume, Network, and Auth plugins
    repeated PluginDescription plugins = 3 [(gogoproto.nullable) = false];
}

message NodeDescription {
    // Hostname of the node as reported by the agent.
    // This is different from spec.meta.name which is user-defined.
    string hostname = 1;

    // Platform of the node.
    Platform platform = 2;

    // Total resources on the node.
    Resources resources = 3;

    // Information about the Docker Engine on the node.
    EngineDescription engine = 4;

    // Information on the node's TLS setup
    NodeTLSInfo tls_info = 5 [(gogoproto.customname) = "TLSInfo"];

    // FIPS indicates whether the node has FIPS-enabled
    bool fips = 6 [(gogoproto.customname) = "FIPS"];

    repeated NodeCSIInfo csi_info = 7 [(gogoproto.customname) = "CSIInfo"];
}

message NodeTLSInfo {
    // Information about which root certs the node trusts
    bytes trust_root = 1;

    // Information about the node's current TLS certificate
    bytes cert_issuer_subject = 2;
    bytes cert_issuer_public_key = 3;
}

// NodeCSIInfo represents information about a Node returned by calling the
// NodeGetInfo RPC on the CSI plugin present on the node. There is a separate
// NodeCSIInfo object for each CSI plugin present.
message NodeCSIInfo {

    // PluginName is the name of the CSI plugin.
    string plugin_name = 1;

    // NodeID is the ID of the node as reported by the CSI plugin. This will be
    // different from the swarmkit node ID.
    string node_id = 2;

    // MaxVolumesPerNode is the maximum number of volumes that may be published
    // to this node.
    int64 max_volumes_per_node = 3;

    // AccessibleTopology indicates the location of this node in the CSI plugin's
    // topology
    Topology accessible_topology = 4;
}

message RaftMemberStatus {
    bool leader = 1;

    enum Reachability {
        // Unknown indicates that the manager state cannot be resolved
        UNKNOWN = 0;

        // Unreachable indicates that the node cannot be contacted by other
        // raft cluster members.
        UNREACHABLE = 1;

        // Reachable indicates that the node is healthy and reachable
        // by other members.
        REACHABLE = 2;
    }

    Reachability reachability = 2;
    string message = 3;
}

message NodeStatus {
    // TODO(aluzzardi) These should be using `gogoproto.enumvalue_customname`.
    enum State {
        // Unknown indicates the node state cannot be resolved.
        UNKNOWN = 0;

        // Down indicates the node is down.
        DOWN = 1;

        // Ready indicates the node is ready to accept tasks.
        READY = 2;

        // Disconnected indicates the node is currently trying to find new manager.
        DISCONNECTED = 3;
    }

    State state = 1;
    string message = 2;
    // Addr is the node's IP address as observed by the manager
    string addr = 3;
}

message Image {
    // reference is a docker image reference. This can include a rpository, tag
    // or be fully qualified witha digest. The format is specified in the
    // distribution/reference package.
    string reference = 1;
}

// Mount describes volume mounts for a container.
//
// The Mount type follows the structure of the mount syscall, including a type,
// source, target. Top-level flags, such as writable, are common to all kinds
// of mounts, where we also provide options that are specific to a type of
// mount. This corresponds to flags and data, respectively, in the syscall.
message Mount {
    enum Type {
        option (gogoproto.goproto_enum_prefix) = false;
        option (gogoproto.enum_customname) = "MountType";

        BIND = 0 [(gogoproto.enumvalue_customname) = "MountTypeBind"]; // Bind mount host dir
        VOLUME = 1 [(gogoproto.enumvalue_customname) = "MountTypeVolume"];  // Remote storage volumes
        TMPFS = 2 [(gogoproto.enumvalue_customname) = "MountTypeTmpfs"]; // Mount a tmpfs
        NPIPE = 3 [(gogoproto.enumvalue_customname) = "MountTypeNamedPipe"]; // Windows named pipes
        CLUSTER = 4 [(gogoproto.enumvalue_customname) = "MountTypeCluster"]; // CSI volume
    }

    // Type defines the nature of the mount.
    Type type = 1;

    // Source specifies the name of the mount. Depending on mount type, this
    // may be a volume name or a host path, or even ignored.
    //
    // For CSI type mounts, the source is either the name of the volume or the
    // name of the volume group. To specify a volume group, the source should be
    // prefixed with "group:", as in "group:groupname"
    string source = 2;

    // Target path in container
    string target = 3;

    // ReadOnly should be set to true if the mount should not be writable.
    bool readonly = 4 [(gogoproto.customname) = "ReadOnly"];

    // Consistency indicates the tolerable level of file system consistency
    enum Consistency {
        option (gogoproto.goproto_enum_prefix) = false;
        option (gogoproto.enum_customname) = "MountConsistency";

        DEFAULT = 0 [(gogoproto.enumvalue_customname) = "MountConsistencyDefault"];
        CONSISTENT = 1 [(gogoproto.enumvalue_customname) = "MountConsistencyFull"];
        CACHED = 2 [(gogoproto.enumvalue_customname) = "MountConsistencyCached"];
        DELEGATED = 3 [(gogoproto.enumvalue_customname) = "MountConsistencyDelegated"];
    }
    Consistency consistency = 8;

    // BindOptions specifies options that are specific to a bind mount.
    message BindOptions {
        enum Propagation {
            option (gogoproto.goproto_enum_prefix) = false;
            option (gogoproto.enum_customname) = "MountPropagation";

            RPRIVATE = 0 [(gogoproto.enumvalue_customname) = "MountPropagationRPrivate"];
            PRIVATE = 1 [(gogoproto.enumvalue_customname) = "MountPropagationPrivate"];
            RSHARED = 2 [(gogoproto.enumvalue_customname) = "MountPropagationRShared"];
            SHARED = 3 [(gogoproto.enumvalue_customname) = "MountPropagationShared"];
            RSLAVE = 4 [(gogoproto.enumvalue_customname) = "MountPropagationRSlave"];
            SLAVE = 5 [(gogoproto.enumvalue_customname) = "MountPropagationSlave"];
        }

        // Propagation mode of mount.
        Propagation propagation = 1;
        // allows non-recursive bind-mount, i.e. mount(2) with "bind" rather than "rbind".
        bool nonrecursive = 2 [(gogoproto.customname) = "NonRecursive"];
        // Create the mount point
        bool createmountpoint = 3 [(gogoproto.customname) = "CreateMountpoint"];
        // ReadOnlyNonRecursive makes the mount non-recursively read-only, but still leaves the mount recursive
        // (unless NonRecursive is set to true in conjunction).
        bool readonlynonrecursive = 4 [(gogoproto.customname) = "ReadOnlyNonRecursive"];
        // ReadOnlyForceRecursive raises an error if the mount cannot be made recursively read-only.
        bool readonlyforcerecursive = 5 [(gogoproto.customname) = "ReadOnlyForceRecursive"];
    }

    // VolumeOptions contains parameters for mounting the volume.
    message VolumeOptions {
        // nocopy prevents automatic copying of data to the volume with data from target
        bool nocopy = 1 [(gogoproto.customname) = "NoCopy"];

        // labels to apply to the volume if creating
        map<string, string> labels = 2;

        // DriverConfig specifies the options that may be passed to the driver
        // if the volume is created.
        //
        // If this is empty, no volume will be created if the volume is missing.
        Driver driver_config = 3;

        // subpath inside the volume to mount.
        string subpath = 4 [(gogoproto.customname) = "Subpath"];
    }

    message TmpfsOptions {
        // Size sets the size of the tmpfs, in bytes.
        //
        // This will be converted to an operating system specific value
        // depending on the host. For example, on linux, it will be convered to
        // use a 'k', 'm' or 'g' syntax. BSD, though not widely supported with
        // docker, uses a straight byte value.
        //
        // Percentages are not supported.
        int64 size_bytes = 1;

        // Mode of the tmpfs upon creation
        uint32 mode = 2 [(gogoproto.customtype) = "os.FileMode", (gogoproto.nullable) = false];

        // Options passed to tmpfs mount
        string options = 3;
        // TODO(stevvooe): There are several more tmpfs flags, specified in the
        // daemon, that are accepted. Only the most basic are added for now.
        //
        // From docker/docker/pkg/mount/flags.go:
        //
        // var validFlags = map[string]bool{
        //     "":          true,
        //     "size":      true, X
        //     "mode":      true, X
        //     "uid":       true,
        //     "gid":       true,
        //     "nr_inodes": true,
        //     "nr_blocks": true,
        //     "mpol":      true,
        // }
        //
        // Some of these may be straightforward to add, but others, such as
        // uid/gid have implications in a clustered system.
    }

    // Depending on type, one of bind_options or volumes_options will be set.

    // BindOptions configures properties of a bind mount type.
    //
    // For mounts of type bind, the source must be an absolute host path.
    BindOptions bind_options = 5;

    // VolumeOptions configures the properties specific to a volume mount type.
    //
    // For mounts of type volume, the source will be used as the volume name.
    VolumeOptions volume_options = 6;

    // TmpfsOptions allows one to set options for mounting a temporary
    // filesystem.
    //
    // The source field will be ignored when using mounts of type tmpfs.
    TmpfsOptions tmpfs_options = 7;

    // TODO(stevvooe): It be better to use a oneof field above, although the
    // type is enough to make the decision, while being primary to the
    // datastructure.
}

message RestartPolicy {
    enum RestartCondition {
        option (gogoproto.goproto_enum_prefix) = false;
        option (gogoproto.enum_customname) = "RestartCondition";
        NONE = 0 [(gogoproto.enumvalue_customname) = "RestartOnNone"];
        ON_FAILURE = 1 [(gogoproto.enumvalue_customname) = "RestartOnFailure"];
        ANY = 2 [(gogoproto.enumvalue_customname) = "RestartOnAny"];
    }

    RestartCondition condition = 1;

    // Delay between restart attempts
    // Note: can't use stdduration because this field needs to be nullable.
    google.protobuf.Duration delay = 2;

    // MaxAttempts is the maximum number of restarts to attempt on an
    // instance before giving up. Ignored if 0.
    uint64 max_attempts = 3;

    // Window is the time window used to evaluate the restart policy.
    // The time window is unbounded if this is 0.
    // Note: can't use stdduration because this field needs to be nullable.
    google.protobuf.Duration window = 4;
}

// UpdateConfig specifies the rate and policy of updates.
// TODO(aluzzardi): Consider making this a oneof with RollingStrategy and LockstepStrategy.
message UpdateConfig {
    // Maximum number of tasks to be updated in one iteration.
    // 0 means unlimited parallelism.
    uint64 parallelism = 1;

    // Amount of time between updates.
    google.protobuf.Duration delay = 2 [(gogoproto.stdduration) = true, (gogoproto.nullable) = false];

    enum FailureAction {
        PAUSE = 0;
        CONTINUE = 1;
        ROLLBACK = 2;
    }

    // FailureAction is the action to take when an update failures.
    FailureAction failure_action = 3;

    // Monitor indicates how long to monitor a task for failure after it is
    // created. If the task fails by ending up in one of the states
    // REJECTED, COMPLETED, or FAILED, within Monitor from its creation,
    // this counts as a failure. If it fails after Monitor, it does not
    // count as a failure. If Monitor is unspecified, a default value will
    // be used.
    // Note: can't use stdduration because this field needs to be nullable.
    google.protobuf.Duration monitor = 4;

    // MaxFailureRatio is the fraction of tasks that may fail during
    // an update before the failure action is invoked. Any task created by
    // the current update which ends up in one of the states REJECTED,
    // COMPLETED or FAILED within Monitor from its creation counts as a
    // failure. The number of failures is divided by the number of tasks
    // being updated, and if this fraction is greater than
    // MaxFailureRatio, the failure action is invoked.
    //
    // If the failure action is CONTINUE, there is no effect.
    // If the failure action is PAUSE, no more tasks will be updated until
    // another update is started.
    // If the failure action is ROLLBACK, the orchestrator will attempt to
    // roll back to the previous service spec. If the MaxFailureRatio
    // threshold is hit during the rollback, the rollback will pause.
    float max_failure_ratio = 5;

    // UpdateOrder controls the order of operations when rolling out an
    // updated task. Either the old task is shut down before the new task
    // is started, or the new task is started before the old task is shut
    // down.
    enum UpdateOrder {
        STOP_FIRST = 0;
        START_FIRST = 1;
    }

    UpdateOrder order = 6;
}

// UpdateStatus is the status of an update in progress.
message UpdateStatus {
    enum UpdateState {
        UNKNOWN = 0;
        UPDATING = 1;
        PAUSED = 2;
        COMPLETED = 3;
        ROLLBACK_STARTED = 4;
        ROLLBACK_PAUSED = 5; // if a rollback fails
        ROLLBACK_COMPLETED = 6;
    }

    // State is the state of this update. It indicates whether the
    // update is in progress, completed, paused, rolling back, or
    // finished rolling back.
    UpdateState state = 1;

    // StartedAt is the time at which the update was started.
    // Note: can't use stdtime because this field is nullable.
    google.protobuf.Timestamp started_at = 2;

    // CompletedAt is the time at which the update completed successfully,
    // paused, or finished rolling back.
    // Note: can't use stdtime because this field is nullable.
    google.protobuf.Timestamp completed_at = 3;

    // TODO(aaronl): Consider adding a timestamp showing when the most
    // recent task update took place. Currently, this is nontrivial
    // because each service update kicks off a replacement update, so
    // updating the service object with a timestamp at every step along
    // the rolling update would cause the rolling update to be constantly
    // restarted.

    // Message explains how the update got into its current state. For
    // example, if the update is paused, it will explain what is preventing
    // the update from proceeding (typically the failure of a task to start up
    // when OnFailure is PAUSE).
    string message = 4;
}

// TaskState enumerates the states that a task progresses through within an
// agent. States are designed to be monotonically increasing, such that if two
// states are seen by a task, the greater of the new represents the true state.

// Only the manager create a NEW task, and move the task to PENDING and ASSIGNED.
// Afterward, the manager must rely on the agent to update the task status
// (pre-run: preparing, ready, starting;
//  running;
//  end-state: complete, shutdown, failed, rejected)
enum TaskState {
    // TODO(aluzzardi): Move it back into `TaskStatus` because of the naming
    // collisions of enums.

    option (gogoproto.goproto_enum_prefix) = false;
    option (gogoproto.enum_customname) = "TaskState";
    NEW = 0 [(gogoproto.enumvalue_customname)="TaskStateNew"];
    PENDING = 64 [(gogoproto.enumvalue_customname)="TaskStatePending"]; // waiting for scheduling decision
    ASSIGNED = 192 [(gogoproto.enumvalue_customname)="TaskStateAssigned"];
    ACCEPTED = 256 [(gogoproto.enumvalue_customname)="TaskStateAccepted"]; // task has been accepted by an agent.
    PREPARING = 320 [(gogoproto.enumvalue_customname)="TaskStatePreparing"];
    READY = 384 [(gogoproto.enumvalue_customname)="TaskStateReady"];
    STARTING = 448 [(gogoproto.enumvalue_customname)="TaskStateStarting"];
    RUNNING = 512 [(gogoproto.enumvalue_customname)="TaskStateRunning"];
    COMPLETE = 576 [(gogoproto.enumvalue_customname)="TaskStateCompleted"]; // successful completion of task (not error code, just ran)
    SHUTDOWN = 640 [(gogoproto.enumvalue_customname)="TaskStateShutdown"]; // orchestrator requested shutdown
    FAILED = 704 [(gogoproto.enumvalue_customname)="TaskStateFailed"]; // task execution failed with error
    // TaskStateRejected means a task never ran, for instance if something about
    // the environment failed (e.g. setting up a port on that node failed).
    REJECTED = 768 [(gogoproto.enumvalue_customname)="TaskStateRejected"]; // task could not be executed here.
    // TaskStateRemove is used to correctly handle service deletions and scale
    // downs. This allows us to keep track of tasks that have been marked for
    // deletion, but can't yet be removed because the agent is in the process of
    // shutting them down. Once the agent has shut down tasks with desired state
    // REMOVE, the task reaper is responsible for removing them.
    REMOVE = 800 [(gogoproto.enumvalue_customname)="TaskStateRemove"];
    // TaskStateOrphaned is used to free up resources associated with service
    // tasks on unresponsive nodes without having to delete those tasks. This
    // state is directly assigned to the task by the orchestrator.
    ORPHANED = 832 [(gogoproto.enumvalue_customname)="TaskStateOrphaned"];

    // NOTE(stevvooe): The state of a task is actually a lamport clock, in that
    // given two observations, the greater of the two can be considered
    // correct. To enforce this, we only allow tasks to proceed to a greater
    // state.
    //
    // A byproduct of this design decision is that we must also maintain this
    // invariant in the protobuf enum values, such that when comparing two
    // values, the one with the greater value is also the greater state.
    //
    // Because we may want to add intervening states a later date, we've left
    // 64 spaces between each one. This should allow us to make 5 or 6
    // insertions between each state if we find that we made a mistake and need
    // another state.
    //
    // Remove this message when the states are deemed perfect.
}

// Container specific status.
message ContainerStatus {
    string container_id = 1;

    int32 pid = 2 [(gogoproto.customname) = "PID"];
    int32 exit_code = 3;
}

// PortStatus specifies the actual allocated runtime state of a list
// of port configs.
message PortStatus {
    repeated PortConfig ports = 1;
}

message TaskStatus {
    // Note: can't use stdtime because this field is nullable.
    google.protobuf.Timestamp timestamp = 1;

    // State expresses the current state of the task.
    TaskState state = 2;

    // Message reports a message for the task status. This should provide a
    // human readable message that can point to how the task actually arrived
    // at a current state.
    //
    // As a convention, we place the a small message here that led to the
    // current state. For example, if the task is in ready, because it was
    // prepared, we'd place "prepared" in this field. If we skipped preparation
    // because the task is prepared, we would put "already prepared" in this
    // field.
    string message = 3;

    // Err is set if the task is in an error state, or is unable to
    // progress from an earlier state because a precondition is
    // unsatisfied.
    //
    // The following states should report a companion error:
    //
    //    FAILED, REJECTED
    //
    // In general, messages that should be surfaced to users belong in the
    // Err field, and notes on routine state transitions belong in Message.
    //
    // TODO(stevvooe) Integrate this field with the error interface.
    string err = 4;

    // Container status contains container specific status information.
    oneof runtime_status {
        ContainerStatus container = 5;
    }

    // HostPorts provides a list of ports allocated at the host
    // level.
    PortStatus port_status = 6;

    // AppliedBy gives the node ID of the manager that applied this task
    // status update to the Task object.
    string applied_by = 7;

    // AppliedAt gives a timestamp of when this status update was applied to
    // the Task object.
    // Note: can't use stdtime because this field is nullable.
    google.protobuf.Timestamp applied_at = 8;
}

// NetworkAttachmentConfig specifies how a service should be attached to a particular network.
//
// For now, this is a simple struct, but this can include future information
// instructing Swarm on how this service should work on the particular
// network.
message NetworkAttachmentConfig {
    // Target specifies the target network for attachment. This value must be a
    // network ID.
    string target = 1;
    // Aliases specifies a list of discoverable alternate names for the service on this Target.
    repeated string aliases = 2;
    // Addresses specifies a list of ipv4 and ipv6 addresses
    // preferred. If these addresses are not available then the
    // attachment might fail.
    repeated string addresses = 3;
    // DriverAttachmentOpts is a map of driver attachment options for the network target
    map<string, string> driver_attachment_opts = 4;
}

// IPAMConfig specifies parameters for IP Address Management.
message IPAMConfig {
    // TODO(stevvooe): It may make more sense to manage IPAM and network
    // definitions separately. This will allow multiple networks to share IPAM
    // instances. For now, we will follow the conventions of libnetwork and
    // specify this as part of the network specification.

    // AddressFamily specifies the network address family that
    // this IPAMConfig belongs to.
    enum AddressFamily {
        UNKNOWN = 0; // satisfy proto3
        IPV4 = 4;
        IPV6 = 6;
    }

    AddressFamily family = 1;

    // Subnet defines a network as a CIDR address (ie network and mask
    // 192.168.0.1/24).
    string subnet = 2;

    // Range defines the portion of the subnet to allocate to tasks. This is
    // defined as a subnet within the primary subnet.
    string range = 3;

    // Gateway address within the subnet.
    string gateway = 4;

    // Reserved is a list of address from the master pool that should *not* be
    // allocated. These addresses may have already been allocated or may be
    // reserved for another allocation manager.
    map<string, string> reserved = 5;
}

// PortConfig specifies an exposed port which can be
// addressed using the given name. This can be later queried
// using a service discovery api or a DNS SRV query. The node
// port specifies a port that can be used to address this
// service external to the cluster by sending a connection
// request to this port to any node on the cluster.
message PortConfig {
    enum Protocol {
        option (gogoproto.goproto_enum_prefix) = false;

        TCP = 0 [(gogoproto.enumvalue_customname) = "ProtocolTCP"];
        UDP = 1 [(gogoproto.enumvalue_customname) = "ProtocolUDP"];
        SCTP = 2 [(gogoproto.enumvalue_customname) = "ProtocolSCTP"];
    }

    // PublishMode controls how ports are published on the swarm.
    enum PublishMode {
        option (gogoproto.enum_customname) = "PublishMode";
        option (gogoproto.goproto_enum_prefix) = false;

        // PublishModeIngress exposes the port across the cluster on all nodes.
        INGRESS = 0 [(gogoproto.enumvalue_customname) = "PublishModeIngress"];

        // PublishModeHost exposes the port on just the target host.  If the
        // published port is undefined, an ephemeral port will be allocated. If
        // the published port is defined, the node will attempt to allocate it,
        // erroring the task if it fails.
        HOST = 1 [(gogoproto.enumvalue_customname) = "PublishModeHost"];
    }

    // Name for the port. If provided the port information can
    // be queried using the name as in a DNS SRV query.
    string name = 1;

    // Protocol for the port which is exposed.
    Protocol protocol = 2;

    // The port which the application is exposing and is bound to.
    uint32 target_port = 3;

    // PublishedPort specifies the port on which the service is exposed. If
    // specified, the port must be within the available range. If not specified
    // (value is zero), an available port is automatically assigned.
    uint32 published_port = 4;

    // PublishMode controls how the port is published.
    PublishMode publish_mode = 5;
}

// Driver is a generic driver type to be used throughout the API. For now, a
// driver is simply a name and set of options. The field contents depend on the
// target use case and driver application. For example, a network driver may
// have different rules than a volume driver.
message Driver {
    string name = 1;
    map <string, string> options = 2;
}

message IPAMOptions {
    Driver driver = 1;
    repeated IPAMConfig configs = 3;
}

// Peer should be used anywhere where we are describing a remote peer.
message Peer {
    string node_id = 1;
    string addr = 2;
}

// WeightedPeer should be used anywhere where we are describing a remote peer
// with a weight.
message WeightedPeer {
    Peer peer = 1;
    int64 weight = 2;
}


message IssuanceStatus {
    enum State {
        option (gogoproto.goproto_enum_prefix) = false;

        UNKNOWN = 0 [(gogoproto.enumvalue_customname) = "IssuanceStateUnknown"];
        // A new certificate should be issued
        RENEW = 1 [(gogoproto.enumvalue_customname)="IssuanceStateRenew"];
        // Certificate is pending acceptance
        PENDING = 2 [(gogoproto.enumvalue_customname)="IssuanceStatePending"];
        // successful completion certificate issuance
        ISSUED = 3 [(gogoproto.enumvalue_customname)="IssuanceStateIssued"];
        // Certificate issuance failed
        FAILED = 4 [(gogoproto.enumvalue_customname)="IssuanceStateFailed"];
        // Signals workers to renew their certificate. From the CA's perspective
        // this is equivalent to IssuanceStateIssued: a noop.
        ROTATE = 5 [(gogoproto.enumvalue_customname)="IssuanceStateRotate"];
    }
    State state = 1;

    // Err is set if the Certificate Issuance is in an error state.
    // The following states should report a companion error:
    //    FAILED
    string err = 2;
}

message AcceptancePolicy {
    message RoleAdmissionPolicy {
        message Secret {
            // The actual content (possibly hashed)
            bytes data = 1;
            // The type of hash we are using, or "plaintext"
            string alg = 2;
        }

        NodeRole role = 1;
        // Autoaccept controls which roles' certificates are automatically
        // issued without administrator intervention.
        bool autoaccept = 2;
        // Secret represents a user-provided string that is necessary for new
        // nodes to join the cluster
        Secret secret = 3;
    }

    repeated RoleAdmissionPolicy policies = 1;
}

message ExternalCA {
    enum CAProtocol {
        CFSSL = 0 [(gogoproto.enumvalue_customname) = "CAProtocolCFSSL"];
    }

    // Protocol is the protocol used by this external CA.
    CAProtocol protocol = 1;

    // URL is the URL where the external CA can be reached.
    string url = 2 [(gogoproto.customname) = "URL"];

    // Options is a set of additional key/value pairs whose interpretation
    // depends on the specified CA type.
    map<string, string> options = 3;

    // CACert specifies which root CA is used by this external CA
    bytes ca_cert = 4 [(gogoproto.customname) = "CACert"];
}

message CAConfig {
    // NodeCertExpiry is the duration certificates should be issued for
    // Note: can't use stdduration because this field needs to be nullable.
    google.protobuf.Duration node_cert_expiry = 1;

    // ExternalCAs is a list of CAs to which a manager node will make
    // certificate signing requests for node certificates.
    repeated ExternalCA external_cas = 2 [(gogoproto.customname) = "ExternalCAs"];

    // SigningCACert is the desired CA certificate to be used as the root and
    // signing CA for the swarm.  If not provided, indicates that we are either happy
    // with the current configuration, or (together with a bump in the ForceRotate value)
    // that we want a certificate and key generated for us.
    bytes signing_ca_cert = 3 [(gogoproto.customname) = "SigningCACert"];

    // SigningCAKey is the desired private key, matching the signing CA cert, to be used
    // to sign certificates for the swarm
    bytes signing_ca_key = 4 [(gogoproto.customname) = "SigningCAKey"];

    // ForceRotate is a counter that triggers a root CA rotation even if no relevant
    // parameters have been in the spec. This will force the manager to generate a new
    // certificate and key, if none have been provided.
    uint64 force_rotate = 5;
}

// OrchestrationConfig defines cluster-level orchestration settings.
message OrchestrationConfig {
    // TaskHistoryRetentionLimit is the number of historic tasks to keep per instance or
    // node. If negative, never remove completed or failed tasks.
    int64 task_history_retention_limit = 1;

}

// TaskDefaults specifies default values for task creation.
message TaskDefaults {
    // LogDriver specifies the log driver to use for the cluster if not
    // specified for each task.
    //
    // If this is changed, only new tasks will pick up the new log driver.
    // Existing tasks will continue to use the previous default until rescheduled.
    Driver log_driver = 1;
}

// DispatcherConfig defines cluster-level dispatcher settings.
message DispatcherConfig {
    // HeartbeatPeriod defines how often agent should send heartbeats to
    // dispatcher.
    // Note: can't use stdduration because this field needs to be nullable.
    google.protobuf.Duration heartbeat_period = 1;
}

// RaftConfig defines raft settings for the cluster.
message RaftConfig {
    // SnapshotInterval is the number of log entries between snapshots.
    uint64 snapshot_interval = 1;
    // KeepOldSnapshots is the number of snapshots to keep beyond the
    // current snapshot.
    uint64 keep_old_snapshots = 2;
    // LogEntriesForSlowFollowers is the number of log entries to keep
    // around to sync up slow followers after a snapshot is created.
    uint64 log_entries_for_slow_followers = 3;
    // HeartbeatTick defines the amount of ticks (in seconds) between
    // each heartbeat message sent to other members for health-check.
    uint32 heartbeat_tick = 4;
    // ElectionTick defines the amount of ticks (in seconds) needed
    // without a leader to trigger a new election.
    uint32 election_tick = 5;
}

message EncryptionConfig {
    // AutoLockManagers specifies whether or not managers TLS keys and raft data
    // should be encrypted at rest in such a way that they must be unlocked
    // before the manager node starts up again.
    bool auto_lock_managers = 1;
}

message SpreadOver {
    string spread_descriptor = 1; // label descriptor, such as engine.labels.az
    // TODO: support node information beyond engine and node labels

    // TODO: in the future, add a map that provides weights for weighted
    // spreading.
}

message PlacementPreference {
    oneof Preference {
        SpreadOver spread = 1;
    }
}

// Placement specifies task distribution constraints.
message Placement {
    // Constraints specifies a set of requirements a node should meet for a task.
    repeated string constraints = 1;

    // Preferences provide a way to make the scheduler aware of factors
    // such as topology. They are provided in order from highest to lowest
    // precedence.
    repeated PlacementPreference preferences = 2;

    // Platforms stores all the platforms that the image can run on.
    // This field is used in the platform filter for scheduling. If empty,
    // then the platform filter is off, meaning there are no scheduling restrictions.
    repeated Platform platforms = 3;

    // MaxReplicas specifies the limit for maximum number of replicas running on one node.
    uint64 max_replicas = 4;
}

// JoinToken contains the join tokens for workers and managers.
message JoinTokens {
    // Worker is the join token workers may use to join the swarm.
    string worker = 1;

    // Manager is the join token workers may use to join the swarm.
    string manager = 2;
}

message RootCA {
    // CAKey is the root CA private key.
    bytes ca_key = 1 [(gogoproto.customname) = "CAKey"];

    // CACert is the root CA certificate.
    bytes ca_cert = 2 [(gogoproto.customname) = "CACert"];

    // CACertHash is the digest of the CA Certificate.
    string ca_cert_hash = 3 [(gogoproto.customname) = "CACertHash"];

    // JoinTokens contains the join tokens for workers and managers.
    JoinTokens join_tokens = 4 [(gogoproto.nullable) = false];

    // RootRotation contains the new root cert and key we want to rotate to - if this is nil, we are not in the
    // middle of a root rotation
    RootRotation root_rotation = 5;

    // LastForcedRotation matches the Cluster Spec's CAConfig's ForceRotation counter.
    // It indicates when the current CA cert and key were generated (or updated).
    uint64 last_forced_rotation = 6;
}


enum NodeRole {
    option (gogoproto.enum_customname) = "NodeRole";
    option (gogoproto.goproto_enum_prefix) = false;

    WORKER = 0 [(gogoproto.enumvalue_customname) = "NodeRoleWorker"];
    MANAGER = 1 [(gogoproto.enumvalue_customname) = "NodeRoleManager"];
}

message Certificate {
    NodeRole role = 1;

    bytes csr = 2 [(gogoproto.customname) = "CSR"];

    IssuanceStatus status = 3 [(gogoproto.nullable) = false];

    bytes certificate = 4;

    // CN represents the node ID.
    string cn = 5 [(gogoproto.customname) = "CN"];
}


// Symmetric keys to encrypt inter-agent communication.
message EncryptionKey {
    // Agent subsystem the key is intended for. Example:
    // networking:gossip
    string subsystem = 1;

    // Encryption algorithm that can implemented using this key
    enum Algorithm {
        option (gogoproto.goproto_enum_prefix) = false;

        AES_128_GCM = 0;
    }

    Algorithm algorithm = 2;

    bytes key = 3;

    // Time stamp from the lamport clock of the key allocator to
    // identify the relative age of the key.
    uint64 lamport_time = 4;
}

// ManagerStatus provides informations about the state of a manager in the cluster.
message ManagerStatus {
    // RaftID specifies the internal ID used by the manager in a raft context, it can never be modified
    // and is used only for information purposes
    uint64 raft_id = 1;

    // Addr is the address advertised to raft.
    string addr = 2;

    // Leader is set to true if this node is the raft leader.
    bool leader = 3;

    // Reachability specifies whether this node is reachable.
    RaftMemberStatus.Reachability reachability = 4;
}

// FileTarget represents a specific target that is backed by a file
message FileTarget {
    // Name represents the final filename in the filesystem
    string name = 1;

    // UID represents the file UID
    string uid = 2 [(gogoproto.customname) = "UID"];

    // GID represents the file GID
    string gid = 3 [(gogoproto.customname) = "GID"];

    // Mode represents the FileMode of the file
    uint32 mode = 4 [(gogoproto.customtype) = "os.FileMode", (gogoproto.nullable) = false];
}

// RuntimeTarget represents that this secret is _not_ mounted into the
// container, but is used for some other purpose by the container runtime.
//
// Currently, RuntimeTarget has no fields; it's just a placeholder.
message RuntimeTarget {}

// SecretReference is the linkage between a service and a secret that it uses.
message SecretReference {
    // SecretID represents the ID of the specific Secret that we're
    // referencing. This identifier exists so that SecretReferences don't leak
    // any information about the secret contents.
    string secret_id = 1;

    // SecretName is the name of the secret that this references, but this is just provided for
    // lookup/display purposes.  The secret in the reference will be identified by its ID.
    string secret_name = 2;

    // Target specifies how this secret should be exposed to the task.
    oneof target {
        FileTarget file = 3;
    }
}

// ConfigReference is the linkage between a service and a config that it uses.
message ConfigReference {
    // ConfigID represents the ID of the specific Config that we're
    // referencing.
    string config_id = 1;

    // ConfigName is the name of the config that this references, but this is just provided for
    // lookup/display purposes. The config in the reference will be identified by its ID.
    string config_name = 2;

    // Target specifies how this config should be exposed to the task.
    oneof target {
        FileTarget file = 3;
        RuntimeTarget runtime = 4;
    }
}

// BlacklistedCertificate is a record for a blacklisted certificate. It does not
// contain the certificate's CN, because these records are indexed by CN.
message BlacklistedCertificate {
    // Expiry is the latest known expiration time of a certificate that
    // was issued for the given CN.
    // Note: can't use stdtime because this field is nullable.
    google.protobuf.Timestamp expiry = 1;
}

// HealthConfig holds configuration settings for the HEALTHCHECK feature.
message HealthConfig {
    // Test is the test to perform to check that the container is healthy.
    // An empty slice means to inherit the default.
    // The options are:
    // {} : inherit healthcheck
    // {"NONE"} : disable healthcheck
    // {"CMD", args...} : exec arguments directly
    // {"CMD-SHELL", command} : run command with system's default shell
    repeated string test = 1;

    // Interval is the time to wait between checks. Zero means inherit.
    // Note: can't use stdduration because this field needs to be nullable.
    google.protobuf.Duration interval = 2;

    // Timeout is the time to wait before considering the check to have hung.
    // Zero means inherit.
    // Note: can't use stdduration because this field needs to be nullable.
    google.protobuf.Duration timeout = 3;

    // Retries is the number of consecutive failures needed to consider a
    // container as unhealthy. Zero means inherit.
    int32 retries = 4;

    // Start period is the period for container initialization during
    // which health check failures will note count towards the maximum
    // number of retries.
    google.protobuf.Duration start_period = 5;

    // StartInterval is the time to wait between checks during the start period.
    // Zero means inherit.
    // Note: can't use stdduration because this field needs to be nullable.
    google.protobuf.Duration start_interval = 6;

}

message MaybeEncryptedRecord {
    enum Algorithm {
        NONE = 0 [(gogoproto.enumvalue_customname) = "NotEncrypted"];
        SECRETBOX_SALSA20_POLY1305 = 1 [(gogoproto.enumvalue_customname) = "NACLSecretboxSalsa20Poly1305"];
        FERNET_AES_128_CBC = 2 [(gogoproto.enumvalue_customname) = "FernetAES128CBC"];
    }

    Algorithm algorithm = 1;
    bytes data = 2;
    bytes nonce = 3;
}


message RootRotation {
    bytes ca_cert = 1 [(gogoproto.customname) = "CACert"];
    bytes ca_key = 2 [(gogoproto.customname) = "CAKey"];
    // cross-signed CA cert is the CACert that has been cross-signed by the previous root
    bytes cross_signed_ca_cert = 3 [(gogoproto.customname) = "CrossSignedCACert"];
}

// Privileges specifies security configuration/permissions.
message Privileges {
    // CredentialSpec for managed service account (Windows only).
    message CredentialSpec {
        oneof source {
            string file = 1;
            string registry = 2;

            // Config represents a Config ID from which to get the CredentialSpec.
            // The Config MUST be included in the SecretReferences with a RuntimeTarget
            string config = 3;
        }
    }
    CredentialSpec credential_spec = 1;

    // SELinuxContext contains the SELinux labels for the container.
    message SELinuxContext {
        bool disable = 1;

        string user = 2;
        string role = 3;
        string type = 4;
        string level = 5;
    }
    SELinuxContext selinux_context = 2 [(gogoproto.customname) = "SELinuxContext"];

    // SeccompOpts contains options for configuring seccomp profiles on the
    // container. See https://docs.docker.com/engine/security/seccomp/ for more
    // information.
    message SeccompOpts {
        enum SeccompMode {
            DEFAULT = 0;
            UNCONFINED = 1;
            CUSTOM =  2;
        }
        SeccompMode mode = 1;
        // Profile contains the json definition of the seccomp profile to use,
        // if Mode is set to custom.
        bytes profile = 2;
    }
    SeccompOpts seccomp = 3;

    // AppArmorOpts contains options for configuring AppArmor profiles on the
    // container. Currently, custom profiles are not supported. See
    // https://docs.docker.com/engine/security/apparmor/ for more information.
    message AppArmorOpts {
        enum AppArmorMode {
            DEFAULT = 0;
            DISABLED = 1;
        }
        AppArmorMode mode = 1;
    }
    AppArmorOpts apparmor = 4;

    // NoNewPrivileges, if set to true, disables the container from gaining new
    // privileges. See https://docs.kernel.org/userspace-api/no_new_privs.html
    // for details.
    bool no_new_privileges = 5;
}

// JobStatus indicates the status of a Service that is in one of the Job modes.
message JobStatus {
    // JobIteration is the count of how many times the Job has been excecuted,
    // successfully or otherwise. "Executed" refers to the job as a whole being
    // started, not to the individual Tasks being launched. This is used to
    // disambiguate which Tasks belong to which iteration of a Job.
    Version job_iteration = 1 [(gogoproto.nullable) = false];

    // LastExecution is the time that the job was last executed. This is set by
    // the orchestrator in the same transaction that JobIteration is incremented.
    // While time is a fungible concept in distributed systems like Swarmkit,
    // this value gives us a best-effort attempt to prevent weird behavior like
    // newly added nodes executing long-forgotten jobs.
    google.protobuf.Timestamp last_execution = 2;
}

// VolumeAccessMode is the access mode of the volume, and is used to determine
// the CSI AccessMode value, as well as the volume access type (block vs
// mount). In this way, it is more similar to the CSI VolumeCapability message.
//
// This defines how and where a volume can be accessed by more than
// one Task, but does not imply anything about the accessible topology of the
// volume.
//
// For analogy, a flash drive can be used on many computers, but only one of
// them at a time, and so would have a scope of "Single". But, it can be used
// by any number of programs simultaneously, so would have a sharing of "All".
message VolumeAccessMode {
    // Scope enumerates the possible volume access scopes.
    enum Scope {
        option (gogoproto.goproto_enum_prefix) = false;
        // VolumeScopeSingleNode indicates that only one node at a time may have
        // access to the volume.
        SINGLE_NODE = 0 [(gogoproto.enumvalue_customname) = "VolumeScopeSingleNode"];
        // VolumeScopeMultiNode indicates that multiple nodes may access the volume
        // at the same time.
        MULTI_NODE = 1 [(gogoproto.enumvalue_customname) = "VolumeScopeMultiNode"];
    }

    // Sharing enumerates the possible volume sharing modes.
    enum Sharing {
        option (gogoproto.goproto_enum_prefix) = false;
        // VolumeSharingNone indicates that the volume may only be used by a single
        // Task at any given time.
        NONE = 0 [(gogoproto.enumvalue_customname) = "VolumeSharingNone"];
        // VolumeSharingReadOnly indicates that the volume may be accessed by
        // multiple Tasks, but all Tasks only have have read access.
        READ_ONLY = 1 [(gogoproto.enumvalue_customname) = "VolumeSharingReadOnly"];
        // VolumeSharingOneWriter indicates that the Volume may be accessed by
        // multiple Tasks, but only the one Task may have write permission for the
        // Volume.
        ONE_WRITER = 2 [(gogoproto.enumvalue_customname) = "VolumeSharingOneWriter"];
        // VolumeSharingAll indicates that any number of Tasks may have read and
        // write access to the volume.
        ALL = 3 [(gogoproto.enumvalue_customname) = "VolumeSharingAll"];
    }

    // BlockVolume indicates the volume will be accessed with the block device
    // API.
    message BlockVolume {
        // intentionally empty
    }

    // MountVolume indicates the volume will be access with the filesystem API.
    message MountVolume {
        // FsType is the filesystem type. This field is optional, and an empty
        // string is equal to an unspecified value.
        string fs_type = 1;

        // MountFlags indicates mount options to be used for the volume. This
        // field is optional, and may contain sensitive data.
        repeated string mount_flags = 2;
    }

    // Scope defines on how many nodes this volume can be accessed
    // simultaneously. If unset, will default to the zero-value of SINGLE_NODE.
    Scope scope = 1;

    // Sharing defines how many tasks can use this volume at the same time, and
    // in what way. If unset, will default to the zero-value of NONE.
    Sharing sharing = 2;

    // AccessType defines the access type of the volume. Unlike Sharing and
    // Scope, Swarmkit itself doesn't define either of these as a default, but
    // but the upstream is free to do so. However, one of these MUST be set.
    oneof access_type {
        BlockVolume block = 3;
        MountVolume mount = 4;
    }
}

// VolumeSecret indicates a secret value that must be passed to CSI plugin
// operations.
message VolumeSecret {
    // Key represents the key that will be passed as a controller secret to the
    // CSI plugin.
    string key = 1;

    // Secret represents the swarmkit Secret object from which to read data to
    // use as the value to pass to the CSI plugin. This can be either a secret
    // name or ID.
    //
    // TODO(dperny): should this be a SecretReference instead?
    string secret = 2;
}

// VolumePublishStatus contains information about the volume's publishing to a
// specific node.
//
// Publishing or unpublishing a volume to a node is a two-step process.
//
// When a Volume is needed on a Node, a VolumePublishStatus with state
// PendingPublish is added. This indicates that the volume should be published,
// but the RPCs have not been executed.
//
// Then, afterward, ControllerPublishVolume is called for the Volume, and the
// State is changed to Published, indicating that the call was a success.
//
// When a Volume is no longer needed, the process is similar, with the State
// being changed to PendingUnpublish. When ControllerUnpublishVolume succeeds,
// the PublishStatus for that Node is simply removed.
//
// Without this two-step process, the following could happen:
//
//   1. ControllerPublishVolume is called and the Volume is successfully
//      published.
//   2. A crash or leadership change disrupts the cluster before
//      the Volume with the updated VolumePublishStatus can be added to the
//      store.
//   3. The Task that required the Volume to be published is deleted.
//
// In this case, the Volume would be published to the Node, but Swarm would be
// unaware of this, and would additionally be unaware that the Volume _should_
// be published to the Node.
//
// By first committing our intention to publish a Volume, we guarantee that the
// Volume itself is sufficient to know which Nodes it may have been published
// to.
message VolumePublishStatus {
    // State is the state of the volume in the publish/unpublish
    // lifecycle, on a particular node.
    enum State {
        // PendingPublish indicates that the volume should be published on this
        // node, but the call to ControllerPublishVolume has not been
        // successfully completed yet and the result recorded by swarmkit.
        PENDING_PUBLISH = 0;

        // Published means the volume is published successfully to the node.
        PUBLISHED = 1;

        // PendingNodeUnpublish indicates that the Volume should be unpublished
        // on the Node, and we're waiting for confirmation that it has done so.
        // After the Node has confirmed that the Volume has been unpublished,
        // the state will move to PendingUnpublish.
        PENDING_NODE_UNPUBLISH = 2;

        // PendingUnpublish means the volume is published to the node, and
        // needs to not be, but the call to ControllerUnpublishVolume has not
        // verifiably succeeded yet. There is no Unpublished state, because
        // after the volume has been verifiably unpublished, the
        // VolumePublishStatus for the node is removed.
        PENDING_UNPUBLISH = 3;
    }

    // NodeID is the swarm (not CSI plugin) node ID that this volume is
    // published to.
    string node_id = 1;

    // State is the publish state of the volume.
    State state = 2;

    // PublishContext is the same PublishContext returned by a call to
    // ControllerPublishVolume.
    map<string, string> publish_context = 3;

    // Message is a human-readable message explaining the state of the volume.
    // It exists to convey the current situation with the volume to the user,
    // allowing, for example, the user to see error messages why a volume might
    // not be published yet.
    string message = 5;
}

// VolumeInfo contains information about the volume originating from the CSI
// plugin.
message VolumeInfo {
    // CapacityBytes is the capacity of this volume in bytes. A value of 0
    // indicates that the capcity is unknown.
    int64 capacity_bytes = 1;

    // VolumeContext includes fields that are opaque to Swarmkit.
    map<string, string> volume_context = 2;

    // VolumeID is the ID of the volume as reported by the CSI plugin.
    // Information about the volume is not cached in swarmkit's object store;
    // instead, it is retrieved on-demand as needed. If the VolumeID field is an
    // empty string, and the plugin advertises CREATE_DELETE_VOLUME capability,
    // then Swarmkit has not yet called CreateVolume.
    string volume_id = 3;

    // AccessibleTopology is the topology this volume is actually accessible
    // from.
    repeated Topology accessible_topology = 4;
}

// CapacityRange describes the minimum and maximum capacity a volume should be
// created with.
message CapacityRange {
    // RequiredBytes specifies that a volume must be at least this big. The value
    // of 0 indicates an unspecified minimum. Must not be negative.
    int64 required_bytes = 1;

    // LimitBytes specifies that a volume must not be bigger than this. The value
    // of 0 indicates an unspecified maximum. Must not be negative.
    int64 limit_bytes = 2;
}

// VolumeAssignment contains the information needed by a Node to use a CSI
// volume. This includes the information need to Stage and Publish the volume
// on the node, but never the full Volume object.
message VolumeAssignment {
    // ID is the swarmkit ID for the volume. This is used by swarmkit components
    // to identify the volume.
    string id = 1;

    // VolumeID is the CSI volume ID as returned from CreateVolume. This is used
    // by the CSI driver to identify the volume.
    string volume_id = 2;

    // Driver is the CSI Driver that this volume is managed by.
    Driver driver = 3;

    // VolumeContext is a map returned from the CSI Controller service when a
    // Volume is created. It is optional for the driver to provide, but if it is
    // provided, it must be passed to subsequent calls.
    map<string,string> volume_context = 4;

    // PublishContext is a map returned from the Controller service when
    // ControllerPublishVolume is called. Again, it is optional, but if provided,
    // must be passed.
    map<string,string> publish_context = 5;

    // AccessMode specifies the access mode of the volume.
    VolumeAccessMode access_mode = 6;

    // Secrets is the set of secrets required by the CSI plugin. These refer to
    // swarmkit Secrets that will be distributed separately to the node.
    repeated VolumeSecret secrets = 7;
}

// VolumeAttachment is the information associating a Volume with a Task.
message VolumeAttachment {
    // ID is the swarmkit ID of the volume assigned to this task, not the CSI
    // volume ID.
    string id = 1;

    // Source indicates the Mount source that this volume is assigned for.
    string source = 2;

    // Target indicates the Mount target that this volume is assigned for.
    string target = 3;
}


// These types are copied from the CSI spec. They are copied because there is
// difficulty in compatibility between the CSI protos and the swarmkit protos,
// and straight importing them is difficult.

// TopologyRequirement expresses the user's requirements for a volume's
// accessible topology.
message TopologyRequirement {
    // Specifies the list of topologies the provisioned volume MUST be
    // accessible from.
    // This field is OPTIONAL. If TopologyRequirement is specified either
    // requisite or preferred or both MUST be specified.
    //
    // If requisite is specified, the provisioned volume MUST be
    // accessible from at least one of the requisite topologies.
    //
    // Given
    //   x = number of topologies provisioned volume is accessible from
    //   n = number of requisite topologies
    // The CO MUST ensure n >= 1. The SP MUST ensure x >= 1
    // If x==n, then the SP MUST make the provisioned volume available to
    // all topologies from the list of requisite topologies. If it is
    // unable to do so, the SP MUST fail the CreateVolume call.
    // For example, if a volume should be accessible from a single zone,
    // and requisite =
    //   {"region": "R1", "zone": "Z2"}
    // then the provisioned volume MUST be accessible from the "region"
    // "R1" and the "zone" "Z2".
    // Similarly, if a volume should be accessible from two zones, and
    // requisite =
    //   {"region": "R1", "zone": "Z2"},
    //   {"region": "R1", "zone": "Z3"}
    // then the provisioned volume MUST be accessible from the "region"
    // "R1" and both "zone" "Z2" and "zone" "Z3".
    //
    // If x<n, then the SP SHALL choose x unique topologies from the list
    // of requisite topologies. If it is unable to do so, the SP MUST fail
    // the CreateVolume call.
    // For example, if a volume should be accessible from a single zone,
    // and requisite =
    //   {"region": "R1", "zone": "Z2"},
    //   {"region": "R1", "zone": "Z3"}
    // then the SP may choose to make the provisioned volume available in
    // either the "zone" "Z2" or the "zone" "Z3" in the "region" "R1".
    // Similarly, if a volume should be accessible from two zones, and
    // requisite =
    //   {"region": "R1", "zone": "Z2"},
    //   {"region": "R1", "zone": "Z3"},
    //   {"region": "R1", "zone": "Z4"}
    // then the provisioned volume MUST be accessible from any combination
    // of two unique topologies: e.g. "R1/Z2" and "R1/Z3", or "R1/Z2" and
    //  "R1/Z4", or "R1/Z3" and "R1/Z4".
    //
    // If x>n, then the SP MUST make the provisioned volume available from
    // all topologies from the list of requisite topologies and MAY choose
    // the remaining x-n unique topologies from the list of all possible
    // topologies. If it is unable to do so, the SP MUST fail the
    // CreateVolume call.
    // For example, if a volume should be accessible from two zones, and
    // requisite =
    //   {"region": "R1", "zone": "Z2"}
    // then the provisioned volume MUST be accessible from the "region"
    // "R1" and the "zone" "Z2" and the SP may select the second zone
    // independently, e.g. "R1/Z4".
    repeated Topology requisite = 1;

    // Specifies the list of topologies the CO would prefer the volume to
    // be provisioned in.
    //
    // This field is OPTIONAL. If TopologyRequirement is specified either
    // requisite or preferred or both MUST be specified.
    //
    // An SP MUST attempt to make the provisioned volume available using
    // the preferred topologies in order from first to last.
    //
    // If requisite is specified, all topologies in preferred list MUST
    // also be present in the list of requisite topologies.
    //
    // If the SP is unable to to make the provisioned volume available
    // from any of the preferred topologies, the SP MAY choose a topology
    // from the list of requisite topologies.
    // If the list of requisite topologies is not specified, then the SP
    // MAY choose from the list of all possible topologies.
    // If the list of requisite topologies is specified and the SP is
    // unable to to make the provisioned volume available from any of the
    // requisite topologies it MUST fail the CreateVolume call.
    //
    // Example 1:
    // Given a volume should be accessible from a single zone, and
    // requisite =
    //   {"region": "R1", "zone": "Z2"},
    //   {"region": "R1", "zone": "Z3"}
    // preferred =
    //   {"region": "R1", "zone": "Z3"}
    // then the the SP SHOULD first attempt to make the provisioned volume
    // available from "zone" "Z3" in the "region" "R1" and fall back to
    // "zone" "Z2" in the "region" "R1" if that is not possible.
    //
    // Example 2:
    // Given a volume should be accessible from a single zone, and
    // requisite =
    //   {"region": "R1", "zone": "Z2"},
    //   {"region": "R1", "zone": "Z3"},
    //   {"region": "R1", "zone": "Z4"},
    //   {"region": "R1", "zone": "Z5"}
    // preferred =
    //   {"region": "R1", "zone": "Z4"},
    //   {"region": "R1", "zone": "Z2"}
    // then the the SP SHOULD first attempt to make the provisioned volume
    // accessible from "zone" "Z4" in the "region" "R1" and fall back to
    // "zone" "Z2" in the "region" "R1" if that is not possible. If that
    // is not possible, the SP may choose between either the "zone"
    // "Z3" or "Z5" in the "region" "R1".
    //
    // Example 3:
    // Given a volume should be accessible from TWO zones (because an
    // opaque parameter in CreateVolumeRequest, for example, specifies
    // the volume is accessible from two zones, aka synchronously
    // replicated), and
    // requisite =
    //   {"region": "R1", "zone": "Z2"},
    //   {"region": "R1", "zone": "Z3"},
    //   {"region": "R1", "zone": "Z4"},
    //   {"region": "R1", "zone": "Z5"}
    // preferred =
    //   {"region": "R1", "zone": "Z5"},
    //   {"region": "R1", "zone": "Z3"}
    // then the the SP SHOULD first attempt to make the provisioned volume
    // accessible from the combination of the two "zones" "Z5" and "Z3" in
    // the "region" "R1". If that's not possible, it should fall back to
    // a combination of "Z5" and other possibilities from the list of
    // requisite. If that's not possible, it should fall back  to a
    // combination of "Z3" and other possibilities from the list of
    // requisite. If that's not possible, it should fall back  to a
    // combination of other possibilities from the list of requisite.
    repeated Topology preferred = 2;
}

// Topology is a map of topological domains to topological segments.
// A topological domain is a sub-division of a cluster, like "region",
// "zone", "rack", etc.
// A topological segment is a specific instance of a topological domain,
// like "zone3", "rack3", etc.
// For example {"com.company/zone": "Z1", "com.company/rack": "R3"}
// Valid keys have two segments: an OPTIONAL prefix and name, separated
// by a slash (/), for example: "com.company.example/zone".
// The key name segment is REQUIRED. The prefix is OPTIONAL.
// The key name MUST be 63 characters or less, begin and end with an
// alphanumeric character ([a-z0-9A-Z]), and contain only dashes (-),
// underscores (_), dots (.), or alphanumerics in between, for example
// "zone".
// The key prefix MUST be 63 characters or less, begin and end with a
// lower-case alphanumeric character ([a-z0-9]), contain only
// dashes (-), dots (.), or lower-case alphanumerics in between, and
// follow domain name notation format
// (https://tools.ietf.org/html/rfc1035#section-2.3.1).
// The key prefix SHOULD include the plugin's host company name and/or
// the plugin name, to minimize the possibility of collisions with keys
// from other plugins.
// If a key prefix is specified, it MUST be identical across all
// topology keys returned by the SP (across all RPCs).
// Keys MUST be case-insensitive. Meaning the keys "Zone" and "zone"
// MUST not both exist.
// Each value (topological segment) MUST contain 1 or more strings.
// Each string MUST be 63 characters or less and begin and end with an
// alphanumeric character with '-', '_', '.', or alphanumerics in
// between.
message Topology {
    map<string, string> segments = 1;
}

// VolumeCapability specifies a capability of a volume.
message VolumeCapability {
    // Indicate that the volume will be accessed via the block device API.
    message BlockVolume {
        // Intentionally empty, for now.
    }

    // Indicate that the volume will be accessed via the filesystem API.
    message MountVolume {
        // The filesystem type. This field is OPTIONAL.
        // An empty string is equal to an unspecified field value.
        string fs_type = 1;

        // The mount options that can be used for the volume. This field is
        // OPTIONAL. `mount_flags` MAY contain sensitive information.
        // Therefore, the CO and the Plugin MUST NOT leak this information
        // to untrusted entities. The total size of this repeated field
        // SHALL NOT exceed 4 KiB.
        repeated string mount_flags = 2;
    }

    // Specify how a volume can be accessed.
    message AccessMode {
        enum Mode {
            UNKNOWN = 0;

            // Can only be published once as read/write on a single node, at
            // any given time.
            SINGLE_NODE_WRITER = 1;

            // Can only be published once as readonly on a single node, at
            // any given time.
            SINGLE_NODE_READER_ONLY = 2;

            // Can be published as readonly at multiple nodes simultaneously.
            MULTI_NODE_READER_ONLY = 3;

            // Can be published at multiple nodes simultaneously. Only one of
            // the node can be used as read/write. The rest will be readonly.
            MULTI_NODE_SINGLE_WRITER = 4;

            // Can be published as read/write at multiple nodes
            // simultaneously.
            MULTI_NODE_MULTI_WRITER = 5;
        }

        // This field is REQUIRED.
        Mode mode = 1;
    }

    // Specifies what API the volume will be accessed using. One of the
    // following fields MUST be specified.
    oneof access_type {
        BlockVolume block = 1;
        MountVolume mount = 2;
    }

    // This is a REQUIRED field.
    AccessMode access_mode = 3;
}