api/types.proto
syntax = "proto3";
package docker.swarmkit.v1;
import "google/protobuf/timestamp.proto";
import "google/protobuf/duration.proto";
import "google/protobuf/wrappers.proto";
import "gogoproto/gogo.proto";
// This file contains types that are common to objects and spec or that are not
// considered first-class within the cluster object-model.
// Version tracks the last time an object in the store was updated.
message Version {
uint64 index = 1;
}
message IndexEntry {
string key = 1;
string val = 2;
}
// Annotations provide useful information to identify API objects. They are
// common to all API specs.
message Annotations {
string name = 1;
map<string, string> labels = 2;
// Indices provides keys and values for indexing this object.
// A single key may have multiple values.
repeated IndexEntry indices = 4 [(gogoproto.nullable) = false];
}
// NamedGenericResource represents a "user defined" resource which is defined
// as a string.
// "Kind" is used to describe the Kind of a resource (e.g: "GPU", "FPGA", "SSD", ...)
// Value is used to identify the resource (GPU="UUID-1", FPGA="/dev/sdb5", ...)
message NamedGenericResource {
string kind = 1;
string value = 2;
}
// DiscreteGenericResource represents a "user defined" resource which is defined
// as an integer
// "Kind" is used to describe the Kind of a resource (e.g: "GPU", "FPGA", "SSD", ...)
// Value is used to count the resource (SSD=5, HDD=3, ...)
message DiscreteGenericResource {
string kind = 1;
int64 value = 2;
}
// GenericResource represents a "user defined" resource which can
// be either an integer (e.g: SSD=3) or a string (e.g: SSD=sda1)
message GenericResource {
oneof resource {
NamedGenericResource named_resource_spec = 1;
DiscreteGenericResource discrete_resource_spec = 2;
}
}
enum ResourceType {
TASK = 0;
SECRET = 1;
CONFIG = 2;
VOLUME = 3;
}
message Resources {
// Amount of CPUs (e.g. 2000000000 = 2 CPU cores)
int64 nano_cpus = 1 [(gogoproto.customname) = "NanoCPUs"];
// Amount of memory in bytes.
int64 memory_bytes = 2;
// User specified resource (e.g: bananas=2;apple={red,yellow,green})
repeated GenericResource generic = 3;
}
message ResourceRequirements {
Resources limits = 1;
Resources reservations = 2;
// Amount of swap in bytes - can only be used together with a memory limit
// -1 means unlimited
// a null pointer indicates that the default behaviour of granting twice
// the memory is maintained
google.protobuf.Int64Value swap_bytes = 3;
// Tune container memory swappiness (0 to 100) - if not specified, defaults
// to the container OS's default - generally 60, or the value predefined in
// the image; set to -1 to unset a previously set value
google.protobuf.Int64Value memory_swappiness = 4;
}
message Platform {
// Architecture (e.g. x86_64)
string architecture = 1;
// Operating System (e.g. linux)
string os = 2 [(gogoproto.customname) = "OS"];
}
// PluginDescription describes an engine plugin.
message PluginDescription {
// Type of plugin. Canonical values for existing types are
// Volume, Network, and Authorization. More types could be
// supported in the future.
string type = 1;
// Name of the plugin
string name = 2;
}
message EngineDescription {
// Docker daemon version running on the node.
string engine_version = 1;
// Labels attached to the engine.
map<string, string> labels = 2;
// Volume, Network, and Auth plugins
repeated PluginDescription plugins = 3 [(gogoproto.nullable) = false];
}
message NodeDescription {
// Hostname of the node as reported by the agent.
// This is different from spec.meta.name which is user-defined.
string hostname = 1;
// Platform of the node.
Platform platform = 2;
// Total resources on the node.
Resources resources = 3;
// Information about the Docker Engine on the node.
EngineDescription engine = 4;
// Information on the node's TLS setup
NodeTLSInfo tls_info = 5 [(gogoproto.customname) = "TLSInfo"];
// FIPS indicates whether the node has FIPS-enabled
bool fips = 6 [(gogoproto.customname) = "FIPS"];
repeated NodeCSIInfo csi_info = 7 [(gogoproto.customname) = "CSIInfo"];
}
message NodeTLSInfo {
// Information about which root certs the node trusts
bytes trust_root = 1;
// Information about the node's current TLS certificate
bytes cert_issuer_subject = 2;
bytes cert_issuer_public_key = 3;
}
// NodeCSIInfo represents information about a Node returned by calling the
// NodeGetInfo RPC on the CSI plugin present on the node. There is a separate
// NodeCSIInfo object for each CSI plugin present.
message NodeCSIInfo {
// PluginName is the name of the CSI plugin.
string plugin_name = 1;
// NodeID is the ID of the node as reported by the CSI plugin. This will be
// different from the swarmkit node ID.
string node_id = 2;
// MaxVolumesPerNode is the maximum number of volumes that may be published
// to this node.
int64 max_volumes_per_node = 3;
// AccessibleTopology indicates the location of this node in the CSI plugin's
// topology
Topology accessible_topology = 4;
}
message RaftMemberStatus {
bool leader = 1;
enum Reachability {
// Unknown indicates that the manager state cannot be resolved
UNKNOWN = 0;
// Unreachable indicates that the node cannot be contacted by other
// raft cluster members.
UNREACHABLE = 1;
// Reachable indicates that the node is healthy and reachable
// by other members.
REACHABLE = 2;
}
Reachability reachability = 2;
string message = 3;
}
message NodeStatus {
// TODO(aluzzardi) These should be using `gogoproto.enumvalue_customname`.
enum State {
// Unknown indicates the node state cannot be resolved.
UNKNOWN = 0;
// Down indicates the node is down.
DOWN = 1;
// Ready indicates the node is ready to accept tasks.
READY = 2;
// Disconnected indicates the node is currently trying to find new manager.
DISCONNECTED = 3;
}
State state = 1;
string message = 2;
// Addr is the node's IP address as observed by the manager
string addr = 3;
}
message Image {
// reference is a docker image reference. This can include a rpository, tag
// or be fully qualified witha digest. The format is specified in the
// distribution/reference package.
string reference = 1;
}
// Mount describes volume mounts for a container.
//
// The Mount type follows the structure of the mount syscall, including a type,
// source, target. Top-level flags, such as writable, are common to all kinds
// of mounts, where we also provide options that are specific to a type of
// mount. This corresponds to flags and data, respectively, in the syscall.
message Mount {
enum Type {
option (gogoproto.goproto_enum_prefix) = false;
option (gogoproto.enum_customname) = "MountType";
BIND = 0 [(gogoproto.enumvalue_customname) = "MountTypeBind"]; // Bind mount host dir
VOLUME = 1 [(gogoproto.enumvalue_customname) = "MountTypeVolume"]; // Remote storage volumes
TMPFS = 2 [(gogoproto.enumvalue_customname) = "MountTypeTmpfs"]; // Mount a tmpfs
NPIPE = 3 [(gogoproto.enumvalue_customname) = "MountTypeNamedPipe"]; // Windows named pipes
CLUSTER = 4 [(gogoproto.enumvalue_customname) = "MountTypeCluster"]; // CSI volume
}
// Type defines the nature of the mount.
Type type = 1;
// Source specifies the name of the mount. Depending on mount type, this
// may be a volume name or a host path, or even ignored.
//
// For CSI type mounts, the source is either the name of the volume or the
// name of the volume group. To specify a volume group, the source should be
// prefixed with "group:", as in "group:groupname"
string source = 2;
// Target path in container
string target = 3;
// ReadOnly should be set to true if the mount should not be writable.
bool readonly = 4 [(gogoproto.customname) = "ReadOnly"];
// Consistency indicates the tolerable level of file system consistency
enum Consistency {
option (gogoproto.goproto_enum_prefix) = false;
option (gogoproto.enum_customname) = "MountConsistency";
DEFAULT = 0 [(gogoproto.enumvalue_customname) = "MountConsistencyDefault"];
CONSISTENT = 1 [(gogoproto.enumvalue_customname) = "MountConsistencyFull"];
CACHED = 2 [(gogoproto.enumvalue_customname) = "MountConsistencyCached"];
DELEGATED = 3 [(gogoproto.enumvalue_customname) = "MountConsistencyDelegated"];
}
Consistency consistency = 8;
// BindOptions specifies options that are specific to a bind mount.
message BindOptions {
enum Propagation {
option (gogoproto.goproto_enum_prefix) = false;
option (gogoproto.enum_customname) = "MountPropagation";
RPRIVATE = 0 [(gogoproto.enumvalue_customname) = "MountPropagationRPrivate"];
PRIVATE = 1 [(gogoproto.enumvalue_customname) = "MountPropagationPrivate"];
RSHARED = 2 [(gogoproto.enumvalue_customname) = "MountPropagationRShared"];
SHARED = 3 [(gogoproto.enumvalue_customname) = "MountPropagationShared"];
RSLAVE = 4 [(gogoproto.enumvalue_customname) = "MountPropagationRSlave"];
SLAVE = 5 [(gogoproto.enumvalue_customname) = "MountPropagationSlave"];
}
// Propagation mode of mount.
Propagation propagation = 1;
// allows non-recursive bind-mount, i.e. mount(2) with "bind" rather than "rbind".
bool nonrecursive = 2 [(gogoproto.customname) = "NonRecursive"];
// Create the mount point
bool createmountpoint = 3 [(gogoproto.customname) = "CreateMountpoint"];
// ReadOnlyNonRecursive makes the mount non-recursively read-only, but still leaves the mount recursive
// (unless NonRecursive is set to true in conjunction).
bool readonlynonrecursive = 4 [(gogoproto.customname) = "ReadOnlyNonRecursive"];
// ReadOnlyForceRecursive raises an error if the mount cannot be made recursively read-only.
bool readonlyforcerecursive = 5 [(gogoproto.customname) = "ReadOnlyForceRecursive"];
}
// VolumeOptions contains parameters for mounting the volume.
message VolumeOptions {
// nocopy prevents automatic copying of data to the volume with data from target
bool nocopy = 1 [(gogoproto.customname) = "NoCopy"];
// labels to apply to the volume if creating
map<string, string> labels = 2;
// DriverConfig specifies the options that may be passed to the driver
// if the volume is created.
//
// If this is empty, no volume will be created if the volume is missing.
Driver driver_config = 3;
// subpath inside the volume to mount.
string subpath = 4 [(gogoproto.customname) = "Subpath"];
}
message TmpfsOptions {
// Size sets the size of the tmpfs, in bytes.
//
// This will be converted to an operating system specific value
// depending on the host. For example, on linux, it will be convered to
// use a 'k', 'm' or 'g' syntax. BSD, though not widely supported with
// docker, uses a straight byte value.
//
// Percentages are not supported.
int64 size_bytes = 1;
// Mode of the tmpfs upon creation
uint32 mode = 2 [(gogoproto.customtype) = "os.FileMode", (gogoproto.nullable) = false];
// Options passed to tmpfs mount
string options = 3;
// TODO(stevvooe): There are several more tmpfs flags, specified in the
// daemon, that are accepted. Only the most basic are added for now.
//
// From docker/docker/pkg/mount/flags.go:
//
// var validFlags = map[string]bool{
// "": true,
// "size": true, X
// "mode": true, X
// "uid": true,
// "gid": true,
// "nr_inodes": true,
// "nr_blocks": true,
// "mpol": true,
// }
//
// Some of these may be straightforward to add, but others, such as
// uid/gid have implications in a clustered system.
}
// Depending on type, one of bind_options or volumes_options will be set.
// BindOptions configures properties of a bind mount type.
//
// For mounts of type bind, the source must be an absolute host path.
BindOptions bind_options = 5;
// VolumeOptions configures the properties specific to a volume mount type.
//
// For mounts of type volume, the source will be used as the volume name.
VolumeOptions volume_options = 6;
// TmpfsOptions allows one to set options for mounting a temporary
// filesystem.
//
// The source field will be ignored when using mounts of type tmpfs.
TmpfsOptions tmpfs_options = 7;
// TODO(stevvooe): It be better to use a oneof field above, although the
// type is enough to make the decision, while being primary to the
// datastructure.
}
message RestartPolicy {
enum RestartCondition {
option (gogoproto.goproto_enum_prefix) = false;
option (gogoproto.enum_customname) = "RestartCondition";
NONE = 0 [(gogoproto.enumvalue_customname) = "RestartOnNone"];
ON_FAILURE = 1 [(gogoproto.enumvalue_customname) = "RestartOnFailure"];
ANY = 2 [(gogoproto.enumvalue_customname) = "RestartOnAny"];
}
RestartCondition condition = 1;
// Delay between restart attempts
// Note: can't use stdduration because this field needs to be nullable.
google.protobuf.Duration delay = 2;
// MaxAttempts is the maximum number of restarts to attempt on an
// instance before giving up. Ignored if 0.
uint64 max_attempts = 3;
// Window is the time window used to evaluate the restart policy.
// The time window is unbounded if this is 0.
// Note: can't use stdduration because this field needs to be nullable.
google.protobuf.Duration window = 4;
}
// UpdateConfig specifies the rate and policy of updates.
// TODO(aluzzardi): Consider making this a oneof with RollingStrategy and LockstepStrategy.
message UpdateConfig {
// Maximum number of tasks to be updated in one iteration.
// 0 means unlimited parallelism.
uint64 parallelism = 1;
// Amount of time between updates.
google.protobuf.Duration delay = 2 [(gogoproto.stdduration) = true, (gogoproto.nullable) = false];
enum FailureAction {
PAUSE = 0;
CONTINUE = 1;
ROLLBACK = 2;
}
// FailureAction is the action to take when an update failures.
FailureAction failure_action = 3;
// Monitor indicates how long to monitor a task for failure after it is
// created. If the task fails by ending up in one of the states
// REJECTED, COMPLETED, or FAILED, within Monitor from its creation,
// this counts as a failure. If it fails after Monitor, it does not
// count as a failure. If Monitor is unspecified, a default value will
// be used.
// Note: can't use stdduration because this field needs to be nullable.
google.protobuf.Duration monitor = 4;
// MaxFailureRatio is the fraction of tasks that may fail during
// an update before the failure action is invoked. Any task created by
// the current update which ends up in one of the states REJECTED,
// COMPLETED or FAILED within Monitor from its creation counts as a
// failure. The number of failures is divided by the number of tasks
// being updated, and if this fraction is greater than
// MaxFailureRatio, the failure action is invoked.
//
// If the failure action is CONTINUE, there is no effect.
// If the failure action is PAUSE, no more tasks will be updated until
// another update is started.
// If the failure action is ROLLBACK, the orchestrator will attempt to
// roll back to the previous service spec. If the MaxFailureRatio
// threshold is hit during the rollback, the rollback will pause.
float max_failure_ratio = 5;
// UpdateOrder controls the order of operations when rolling out an
// updated task. Either the old task is shut down before the new task
// is started, or the new task is started before the old task is shut
// down.
enum UpdateOrder {
STOP_FIRST = 0;
START_FIRST = 1;
}
UpdateOrder order = 6;
}
// UpdateStatus is the status of an update in progress.
message UpdateStatus {
enum UpdateState {
UNKNOWN = 0;
UPDATING = 1;
PAUSED = 2;
COMPLETED = 3;
ROLLBACK_STARTED = 4;
ROLLBACK_PAUSED = 5; // if a rollback fails
ROLLBACK_COMPLETED = 6;
}
// State is the state of this update. It indicates whether the
// update is in progress, completed, paused, rolling back, or
// finished rolling back.
UpdateState state = 1;
// StartedAt is the time at which the update was started.
// Note: can't use stdtime because this field is nullable.
google.protobuf.Timestamp started_at = 2;
// CompletedAt is the time at which the update completed successfully,
// paused, or finished rolling back.
// Note: can't use stdtime because this field is nullable.
google.protobuf.Timestamp completed_at = 3;
// TODO(aaronl): Consider adding a timestamp showing when the most
// recent task update took place. Currently, this is nontrivial
// because each service update kicks off a replacement update, so
// updating the service object with a timestamp at every step along
// the rolling update would cause the rolling update to be constantly
// restarted.
// Message explains how the update got into its current state. For
// example, if the update is paused, it will explain what is preventing
// the update from proceeding (typically the failure of a task to start up
// when OnFailure is PAUSE).
string message = 4;
}
// TaskState enumerates the states that a task progresses through within an
// agent. States are designed to be monotonically increasing, such that if two
// states are seen by a task, the greater of the new represents the true state.
// Only the manager create a NEW task, and move the task to PENDING and ASSIGNED.
// Afterward, the manager must rely on the agent to update the task status
// (pre-run: preparing, ready, starting;
// running;
// end-state: complete, shutdown, failed, rejected)
enum TaskState {
// TODO(aluzzardi): Move it back into `TaskStatus` because of the naming
// collisions of enums.
option (gogoproto.goproto_enum_prefix) = false;
option (gogoproto.enum_customname) = "TaskState";
NEW = 0 [(gogoproto.enumvalue_customname)="TaskStateNew"];
PENDING = 64 [(gogoproto.enumvalue_customname)="TaskStatePending"]; // waiting for scheduling decision
ASSIGNED = 192 [(gogoproto.enumvalue_customname)="TaskStateAssigned"];
ACCEPTED = 256 [(gogoproto.enumvalue_customname)="TaskStateAccepted"]; // task has been accepted by an agent.
PREPARING = 320 [(gogoproto.enumvalue_customname)="TaskStatePreparing"];
READY = 384 [(gogoproto.enumvalue_customname)="TaskStateReady"];
STARTING = 448 [(gogoproto.enumvalue_customname)="TaskStateStarting"];
RUNNING = 512 [(gogoproto.enumvalue_customname)="TaskStateRunning"];
COMPLETE = 576 [(gogoproto.enumvalue_customname)="TaskStateCompleted"]; // successful completion of task (not error code, just ran)
SHUTDOWN = 640 [(gogoproto.enumvalue_customname)="TaskStateShutdown"]; // orchestrator requested shutdown
FAILED = 704 [(gogoproto.enumvalue_customname)="TaskStateFailed"]; // task execution failed with error
// TaskStateRejected means a task never ran, for instance if something about
// the environment failed (e.g. setting up a port on that node failed).
REJECTED = 768 [(gogoproto.enumvalue_customname)="TaskStateRejected"]; // task could not be executed here.
// TaskStateRemove is used to correctly handle service deletions and scale
// downs. This allows us to keep track of tasks that have been marked for
// deletion, but can't yet be removed because the agent is in the process of
// shutting them down. Once the agent has shut down tasks with desired state
// REMOVE, the task reaper is responsible for removing them.
REMOVE = 800 [(gogoproto.enumvalue_customname)="TaskStateRemove"];
// TaskStateOrphaned is used to free up resources associated with service
// tasks on unresponsive nodes without having to delete those tasks. This
// state is directly assigned to the task by the orchestrator.
ORPHANED = 832 [(gogoproto.enumvalue_customname)="TaskStateOrphaned"];
// NOTE(stevvooe): The state of a task is actually a lamport clock, in that
// given two observations, the greater of the two can be considered
// correct. To enforce this, we only allow tasks to proceed to a greater
// state.
//
// A byproduct of this design decision is that we must also maintain this
// invariant in the protobuf enum values, such that when comparing two
// values, the one with the greater value is also the greater state.
//
// Because we may want to add intervening states a later date, we've left
// 64 spaces between each one. This should allow us to make 5 or 6
// insertions between each state if we find that we made a mistake and need
// another state.
//
// Remove this message when the states are deemed perfect.
}
// Container specific status.
message ContainerStatus {
string container_id = 1;
int32 pid = 2 [(gogoproto.customname) = "PID"];
int32 exit_code = 3;
}
// PortStatus specifies the actual allocated runtime state of a list
// of port configs.
message PortStatus {
repeated PortConfig ports = 1;
}
message TaskStatus {
// Note: can't use stdtime because this field is nullable.
google.protobuf.Timestamp timestamp = 1;
// State expresses the current state of the task.
TaskState state = 2;
// Message reports a message for the task status. This should provide a
// human readable message that can point to how the task actually arrived
// at a current state.
//
// As a convention, we place the a small message here that led to the
// current state. For example, if the task is in ready, because it was
// prepared, we'd place "prepared" in this field. If we skipped preparation
// because the task is prepared, we would put "already prepared" in this
// field.
string message = 3;
// Err is set if the task is in an error state, or is unable to
// progress from an earlier state because a precondition is
// unsatisfied.
//
// The following states should report a companion error:
//
// FAILED, REJECTED
//
// In general, messages that should be surfaced to users belong in the
// Err field, and notes on routine state transitions belong in Message.
//
// TODO(stevvooe) Integrate this field with the error interface.
string err = 4;
// Container status contains container specific status information.
oneof runtime_status {
ContainerStatus container = 5;
}
// HostPorts provides a list of ports allocated at the host
// level.
PortStatus port_status = 6;
// AppliedBy gives the node ID of the manager that applied this task
// status update to the Task object.
string applied_by = 7;
// AppliedAt gives a timestamp of when this status update was applied to
// the Task object.
// Note: can't use stdtime because this field is nullable.
google.protobuf.Timestamp applied_at = 8;
}
// NetworkAttachmentConfig specifies how a service should be attached to a particular network.
//
// For now, this is a simple struct, but this can include future information
// instructing Swarm on how this service should work on the particular
// network.
message NetworkAttachmentConfig {
// Target specifies the target network for attachment. This value must be a
// network ID.
string target = 1;
// Aliases specifies a list of discoverable alternate names for the service on this Target.
repeated string aliases = 2;
// Addresses specifies a list of ipv4 and ipv6 addresses
// preferred. If these addresses are not available then the
// attachment might fail.
repeated string addresses = 3;
// DriverAttachmentOpts is a map of driver attachment options for the network target
map<string, string> driver_attachment_opts = 4;
}
// IPAMConfig specifies parameters for IP Address Management.
message IPAMConfig {
// TODO(stevvooe): It may make more sense to manage IPAM and network
// definitions separately. This will allow multiple networks to share IPAM
// instances. For now, we will follow the conventions of libnetwork and
// specify this as part of the network specification.
// AddressFamily specifies the network address family that
// this IPAMConfig belongs to.
enum AddressFamily {
UNKNOWN = 0; // satisfy proto3
IPV4 = 4;
IPV6 = 6;
}
AddressFamily family = 1;
// Subnet defines a network as a CIDR address (ie network and mask
// 192.168.0.1/24).
string subnet = 2;
// Range defines the portion of the subnet to allocate to tasks. This is
// defined as a subnet within the primary subnet.
string range = 3;
// Gateway address within the subnet.
string gateway = 4;
// Reserved is a list of address from the master pool that should *not* be
// allocated. These addresses may have already been allocated or may be
// reserved for another allocation manager.
map<string, string> reserved = 5;
}
// PortConfig specifies an exposed port which can be
// addressed using the given name. This can be later queried
// using a service discovery api or a DNS SRV query. The node
// port specifies a port that can be used to address this
// service external to the cluster by sending a connection
// request to this port to any node on the cluster.
message PortConfig {
enum Protocol {
option (gogoproto.goproto_enum_prefix) = false;
TCP = 0 [(gogoproto.enumvalue_customname) = "ProtocolTCP"];
UDP = 1 [(gogoproto.enumvalue_customname) = "ProtocolUDP"];
SCTP = 2 [(gogoproto.enumvalue_customname) = "ProtocolSCTP"];
}
// PublishMode controls how ports are published on the swarm.
enum PublishMode {
option (gogoproto.enum_customname) = "PublishMode";
option (gogoproto.goproto_enum_prefix) = false;
// PublishModeIngress exposes the port across the cluster on all nodes.
INGRESS = 0 [(gogoproto.enumvalue_customname) = "PublishModeIngress"];
// PublishModeHost exposes the port on just the target host. If the
// published port is undefined, an ephemeral port will be allocated. If
// the published port is defined, the node will attempt to allocate it,
// erroring the task if it fails.
HOST = 1 [(gogoproto.enumvalue_customname) = "PublishModeHost"];
}
// Name for the port. If provided the port information can
// be queried using the name as in a DNS SRV query.
string name = 1;
// Protocol for the port which is exposed.
Protocol protocol = 2;
// The port which the application is exposing and is bound to.
uint32 target_port = 3;
// PublishedPort specifies the port on which the service is exposed. If
// specified, the port must be within the available range. If not specified
// (value is zero), an available port is automatically assigned.
uint32 published_port = 4;
// PublishMode controls how the port is published.
PublishMode publish_mode = 5;
}
// Driver is a generic driver type to be used throughout the API. For now, a
// driver is simply a name and set of options. The field contents depend on the
// target use case and driver application. For example, a network driver may
// have different rules than a volume driver.
message Driver {
string name = 1;
map <string, string> options = 2;
}
message IPAMOptions {
Driver driver = 1;
repeated IPAMConfig configs = 3;
}
// Peer should be used anywhere where we are describing a remote peer.
message Peer {
string node_id = 1;
string addr = 2;
}
// WeightedPeer should be used anywhere where we are describing a remote peer
// with a weight.
message WeightedPeer {
Peer peer = 1;
int64 weight = 2;
}
message IssuanceStatus {
enum State {
option (gogoproto.goproto_enum_prefix) = false;
UNKNOWN = 0 [(gogoproto.enumvalue_customname) = "IssuanceStateUnknown"];
// A new certificate should be issued
RENEW = 1 [(gogoproto.enumvalue_customname)="IssuanceStateRenew"];
// Certificate is pending acceptance
PENDING = 2 [(gogoproto.enumvalue_customname)="IssuanceStatePending"];
// successful completion certificate issuance
ISSUED = 3 [(gogoproto.enumvalue_customname)="IssuanceStateIssued"];
// Certificate issuance failed
FAILED = 4 [(gogoproto.enumvalue_customname)="IssuanceStateFailed"];
// Signals workers to renew their certificate. From the CA's perspective
// this is equivalent to IssuanceStateIssued: a noop.
ROTATE = 5 [(gogoproto.enumvalue_customname)="IssuanceStateRotate"];
}
State state = 1;
// Err is set if the Certificate Issuance is in an error state.
// The following states should report a companion error:
// FAILED
string err = 2;
}
message AcceptancePolicy {
message RoleAdmissionPolicy {
message Secret {
// The actual content (possibly hashed)
bytes data = 1;
// The type of hash we are using, or "plaintext"
string alg = 2;
}
NodeRole role = 1;
// Autoaccept controls which roles' certificates are automatically
// issued without administrator intervention.
bool autoaccept = 2;
// Secret represents a user-provided string that is necessary for new
// nodes to join the cluster
Secret secret = 3;
}
repeated RoleAdmissionPolicy policies = 1;
}
message ExternalCA {
enum CAProtocol {
CFSSL = 0 [(gogoproto.enumvalue_customname) = "CAProtocolCFSSL"];
}
// Protocol is the protocol used by this external CA.
CAProtocol protocol = 1;
// URL is the URL where the external CA can be reached.
string url = 2 [(gogoproto.customname) = "URL"];
// Options is a set of additional key/value pairs whose interpretation
// depends on the specified CA type.
map<string, string> options = 3;
// CACert specifies which root CA is used by this external CA
bytes ca_cert = 4 [(gogoproto.customname) = "CACert"];
}
message CAConfig {
// NodeCertExpiry is the duration certificates should be issued for
// Note: can't use stdduration because this field needs to be nullable.
google.protobuf.Duration node_cert_expiry = 1;
// ExternalCAs is a list of CAs to which a manager node will make
// certificate signing requests for node certificates.
repeated ExternalCA external_cas = 2 [(gogoproto.customname) = "ExternalCAs"];
// SigningCACert is the desired CA certificate to be used as the root and
// signing CA for the swarm. If not provided, indicates that we are either happy
// with the current configuration, or (together with a bump in the ForceRotate value)
// that we want a certificate and key generated for us.
bytes signing_ca_cert = 3 [(gogoproto.customname) = "SigningCACert"];
// SigningCAKey is the desired private key, matching the signing CA cert, to be used
// to sign certificates for the swarm
bytes signing_ca_key = 4 [(gogoproto.customname) = "SigningCAKey"];
// ForceRotate is a counter that triggers a root CA rotation even if no relevant
// parameters have been in the spec. This will force the manager to generate a new
// certificate and key, if none have been provided.
uint64 force_rotate = 5;
}
// OrchestrationConfig defines cluster-level orchestration settings.
message OrchestrationConfig {
// TaskHistoryRetentionLimit is the number of historic tasks to keep per instance or
// node. If negative, never remove completed or failed tasks.
int64 task_history_retention_limit = 1;
}
// TaskDefaults specifies default values for task creation.
message TaskDefaults {
// LogDriver specifies the log driver to use for the cluster if not
// specified for each task.
//
// If this is changed, only new tasks will pick up the new log driver.
// Existing tasks will continue to use the previous default until rescheduled.
Driver log_driver = 1;
}
// DispatcherConfig defines cluster-level dispatcher settings.
message DispatcherConfig {
// HeartbeatPeriod defines how often agent should send heartbeats to
// dispatcher.
// Note: can't use stdduration because this field needs to be nullable.
google.protobuf.Duration heartbeat_period = 1;
}
// RaftConfig defines raft settings for the cluster.
message RaftConfig {
// SnapshotInterval is the number of log entries between snapshots.
uint64 snapshot_interval = 1;
// KeepOldSnapshots is the number of snapshots to keep beyond the
// current snapshot.
uint64 keep_old_snapshots = 2;
// LogEntriesForSlowFollowers is the number of log entries to keep
// around to sync up slow followers after a snapshot is created.
uint64 log_entries_for_slow_followers = 3;
// HeartbeatTick defines the amount of ticks (in seconds) between
// each heartbeat message sent to other members for health-check.
uint32 heartbeat_tick = 4;
// ElectionTick defines the amount of ticks (in seconds) needed
// without a leader to trigger a new election.
uint32 election_tick = 5;
}
message EncryptionConfig {
// AutoLockManagers specifies whether or not managers TLS keys and raft data
// should be encrypted at rest in such a way that they must be unlocked
// before the manager node starts up again.
bool auto_lock_managers = 1;
}
message SpreadOver {
string spread_descriptor = 1; // label descriptor, such as engine.labels.az
// TODO: support node information beyond engine and node labels
// TODO: in the future, add a map that provides weights for weighted
// spreading.
}
message PlacementPreference {
oneof Preference {
SpreadOver spread = 1;
}
}
// Placement specifies task distribution constraints.
message Placement {
// Constraints specifies a set of requirements a node should meet for a task.
repeated string constraints = 1;
// Preferences provide a way to make the scheduler aware of factors
// such as topology. They are provided in order from highest to lowest
// precedence.
repeated PlacementPreference preferences = 2;
// Platforms stores all the platforms that the image can run on.
// This field is used in the platform filter for scheduling. If empty,
// then the platform filter is off, meaning there are no scheduling restrictions.
repeated Platform platforms = 3;
// MaxReplicas specifies the limit for maximum number of replicas running on one node.
uint64 max_replicas = 4;
}
// JoinToken contains the join tokens for workers and managers.
message JoinTokens {
// Worker is the join token workers may use to join the swarm.
string worker = 1;
// Manager is the join token workers may use to join the swarm.
string manager = 2;
}
message RootCA {
// CAKey is the root CA private key.
bytes ca_key = 1 [(gogoproto.customname) = "CAKey"];
// CACert is the root CA certificate.
bytes ca_cert = 2 [(gogoproto.customname) = "CACert"];
// CACertHash is the digest of the CA Certificate.
string ca_cert_hash = 3 [(gogoproto.customname) = "CACertHash"];
// JoinTokens contains the join tokens for workers and managers.
JoinTokens join_tokens = 4 [(gogoproto.nullable) = false];
// RootRotation contains the new root cert and key we want to rotate to - if this is nil, we are not in the
// middle of a root rotation
RootRotation root_rotation = 5;
// LastForcedRotation matches the Cluster Spec's CAConfig's ForceRotation counter.
// It indicates when the current CA cert and key were generated (or updated).
uint64 last_forced_rotation = 6;
}
enum NodeRole {
option (gogoproto.enum_customname) = "NodeRole";
option (gogoproto.goproto_enum_prefix) = false;
WORKER = 0 [(gogoproto.enumvalue_customname) = "NodeRoleWorker"];
MANAGER = 1 [(gogoproto.enumvalue_customname) = "NodeRoleManager"];
}
message Certificate {
NodeRole role = 1;
bytes csr = 2 [(gogoproto.customname) = "CSR"];
IssuanceStatus status = 3 [(gogoproto.nullable) = false];
bytes certificate = 4;
// CN represents the node ID.
string cn = 5 [(gogoproto.customname) = "CN"];
}
// Symmetric keys to encrypt inter-agent communication.
message EncryptionKey {
// Agent subsystem the key is intended for. Example:
// networking:gossip
string subsystem = 1;
// Encryption algorithm that can implemented using this key
enum Algorithm {
option (gogoproto.goproto_enum_prefix) = false;
AES_128_GCM = 0;
}
Algorithm algorithm = 2;
bytes key = 3;
// Time stamp from the lamport clock of the key allocator to
// identify the relative age of the key.
uint64 lamport_time = 4;
}
// ManagerStatus provides informations about the state of a manager in the cluster.
message ManagerStatus {
// RaftID specifies the internal ID used by the manager in a raft context, it can never be modified
// and is used only for information purposes
uint64 raft_id = 1;
// Addr is the address advertised to raft.
string addr = 2;
// Leader is set to true if this node is the raft leader.
bool leader = 3;
// Reachability specifies whether this node is reachable.
RaftMemberStatus.Reachability reachability = 4;
}
// FileTarget represents a specific target that is backed by a file
message FileTarget {
// Name represents the final filename in the filesystem
string name = 1;
// UID represents the file UID
string uid = 2 [(gogoproto.customname) = "UID"];
// GID represents the file GID
string gid = 3 [(gogoproto.customname) = "GID"];
// Mode represents the FileMode of the file
uint32 mode = 4 [(gogoproto.customtype) = "os.FileMode", (gogoproto.nullable) = false];
}
// RuntimeTarget represents that this secret is _not_ mounted into the
// container, but is used for some other purpose by the container runtime.
//
// Currently, RuntimeTarget has no fields; it's just a placeholder.
message RuntimeTarget {}
// SecretReference is the linkage between a service and a secret that it uses.
message SecretReference {
// SecretID represents the ID of the specific Secret that we're
// referencing. This identifier exists so that SecretReferences don't leak
// any information about the secret contents.
string secret_id = 1;
// SecretName is the name of the secret that this references, but this is just provided for
// lookup/display purposes. The secret in the reference will be identified by its ID.
string secret_name = 2;
// Target specifies how this secret should be exposed to the task.
oneof target {
FileTarget file = 3;
}
}
// ConfigReference is the linkage between a service and a config that it uses.
message ConfigReference {
// ConfigID represents the ID of the specific Config that we're
// referencing.
string config_id = 1;
// ConfigName is the name of the config that this references, but this is just provided for
// lookup/display purposes. The config in the reference will be identified by its ID.
string config_name = 2;
// Target specifies how this config should be exposed to the task.
oneof target {
FileTarget file = 3;
RuntimeTarget runtime = 4;
}
}
// BlacklistedCertificate is a record for a blacklisted certificate. It does not
// contain the certificate's CN, because these records are indexed by CN.
message BlacklistedCertificate {
// Expiry is the latest known expiration time of a certificate that
// was issued for the given CN.
// Note: can't use stdtime because this field is nullable.
google.protobuf.Timestamp expiry = 1;
}
// HealthConfig holds configuration settings for the HEALTHCHECK feature.
message HealthConfig {
// Test is the test to perform to check that the container is healthy.
// An empty slice means to inherit the default.
// The options are:
// {} : inherit healthcheck
// {"NONE"} : disable healthcheck
// {"CMD", args...} : exec arguments directly
// {"CMD-SHELL", command} : run command with system's default shell
repeated string test = 1;
// Interval is the time to wait between checks. Zero means inherit.
// Note: can't use stdduration because this field needs to be nullable.
google.protobuf.Duration interval = 2;
// Timeout is the time to wait before considering the check to have hung.
// Zero means inherit.
// Note: can't use stdduration because this field needs to be nullable.
google.protobuf.Duration timeout = 3;
// Retries is the number of consecutive failures needed to consider a
// container as unhealthy. Zero means inherit.
int32 retries = 4;
// Start period is the period for container initialization during
// which health check failures will note count towards the maximum
// number of retries.
google.protobuf.Duration start_period = 5;
// StartInterval is the time to wait between checks during the start period.
// Zero means inherit.
// Note: can't use stdduration because this field needs to be nullable.
google.protobuf.Duration start_interval = 6;
}
message MaybeEncryptedRecord {
enum Algorithm {
NONE = 0 [(gogoproto.enumvalue_customname) = "NotEncrypted"];
SECRETBOX_SALSA20_POLY1305 = 1 [(gogoproto.enumvalue_customname) = "NACLSecretboxSalsa20Poly1305"];
FERNET_AES_128_CBC = 2 [(gogoproto.enumvalue_customname) = "FernetAES128CBC"];
}
Algorithm algorithm = 1;
bytes data = 2;
bytes nonce = 3;
}
message RootRotation {
bytes ca_cert = 1 [(gogoproto.customname) = "CACert"];
bytes ca_key = 2 [(gogoproto.customname) = "CAKey"];
// cross-signed CA cert is the CACert that has been cross-signed by the previous root
bytes cross_signed_ca_cert = 3 [(gogoproto.customname) = "CrossSignedCACert"];
}
// Privileges specifies security configuration/permissions.
message Privileges {
// CredentialSpec for managed service account (Windows only).
message CredentialSpec {
oneof source {
string file = 1;
string registry = 2;
// Config represents a Config ID from which to get the CredentialSpec.
// The Config MUST be included in the SecretReferences with a RuntimeTarget
string config = 3;
}
}
CredentialSpec credential_spec = 1;
// SELinuxContext contains the SELinux labels for the container.
message SELinuxContext {
bool disable = 1;
string user = 2;
string role = 3;
string type = 4;
string level = 5;
}
SELinuxContext selinux_context = 2 [(gogoproto.customname) = "SELinuxContext"];
// SeccompOpts contains options for configuring seccomp profiles on the
// container. See https://docs.docker.com/engine/security/seccomp/ for more
// information.
message SeccompOpts {
enum SeccompMode {
DEFAULT = 0;
UNCONFINED = 1;
CUSTOM = 2;
}
SeccompMode mode = 1;
// Profile contains the json definition of the seccomp profile to use,
// if Mode is set to custom.
bytes profile = 2;
}
SeccompOpts seccomp = 3;
// AppArmorOpts contains options for configuring AppArmor profiles on the
// container. Currently, custom profiles are not supported. See
// https://docs.docker.com/engine/security/apparmor/ for more information.
message AppArmorOpts {
enum AppArmorMode {
DEFAULT = 0;
DISABLED = 1;
}
AppArmorMode mode = 1;
}
AppArmorOpts apparmor = 4;
// NoNewPrivileges, if set to true, disables the container from gaining new
// privileges. See https://docs.kernel.org/userspace-api/no_new_privs.html
// for details.
bool no_new_privileges = 5;
}
// JobStatus indicates the status of a Service that is in one of the Job modes.
message JobStatus {
// JobIteration is the count of how many times the Job has been excecuted,
// successfully or otherwise. "Executed" refers to the job as a whole being
// started, not to the individual Tasks being launched. This is used to
// disambiguate which Tasks belong to which iteration of a Job.
Version job_iteration = 1 [(gogoproto.nullable) = false];
// LastExecution is the time that the job was last executed. This is set by
// the orchestrator in the same transaction that JobIteration is incremented.
// While time is a fungible concept in distributed systems like Swarmkit,
// this value gives us a best-effort attempt to prevent weird behavior like
// newly added nodes executing long-forgotten jobs.
google.protobuf.Timestamp last_execution = 2;
}
// VolumeAccessMode is the access mode of the volume, and is used to determine
// the CSI AccessMode value, as well as the volume access type (block vs
// mount). In this way, it is more similar to the CSI VolumeCapability message.
//
// This defines how and where a volume can be accessed by more than
// one Task, but does not imply anything about the accessible topology of the
// volume.
//
// For analogy, a flash drive can be used on many computers, but only one of
// them at a time, and so would have a scope of "Single". But, it can be used
// by any number of programs simultaneously, so would have a sharing of "All".
message VolumeAccessMode {
// Scope enumerates the possible volume access scopes.
enum Scope {
option (gogoproto.goproto_enum_prefix) = false;
// VolumeScopeSingleNode indicates that only one node at a time may have
// access to the volume.
SINGLE_NODE = 0 [(gogoproto.enumvalue_customname) = "VolumeScopeSingleNode"];
// VolumeScopeMultiNode indicates that multiple nodes may access the volume
// at the same time.
MULTI_NODE = 1 [(gogoproto.enumvalue_customname) = "VolumeScopeMultiNode"];
}
// Sharing enumerates the possible volume sharing modes.
enum Sharing {
option (gogoproto.goproto_enum_prefix) = false;
// VolumeSharingNone indicates that the volume may only be used by a single
// Task at any given time.
NONE = 0 [(gogoproto.enumvalue_customname) = "VolumeSharingNone"];
// VolumeSharingReadOnly indicates that the volume may be accessed by
// multiple Tasks, but all Tasks only have have read access.
READ_ONLY = 1 [(gogoproto.enumvalue_customname) = "VolumeSharingReadOnly"];
// VolumeSharingOneWriter indicates that the Volume may be accessed by
// multiple Tasks, but only the one Task may have write permission for the
// Volume.
ONE_WRITER = 2 [(gogoproto.enumvalue_customname) = "VolumeSharingOneWriter"];
// VolumeSharingAll indicates that any number of Tasks may have read and
// write access to the volume.
ALL = 3 [(gogoproto.enumvalue_customname) = "VolumeSharingAll"];
}
// BlockVolume indicates the volume will be accessed with the block device
// API.
message BlockVolume {
// intentionally empty
}
// MountVolume indicates the volume will be access with the filesystem API.
message MountVolume {
// FsType is the filesystem type. This field is optional, and an empty
// string is equal to an unspecified value.
string fs_type = 1;
// MountFlags indicates mount options to be used for the volume. This
// field is optional, and may contain sensitive data.
repeated string mount_flags = 2;
}
// Scope defines on how many nodes this volume can be accessed
// simultaneously. If unset, will default to the zero-value of SINGLE_NODE.
Scope scope = 1;
// Sharing defines how many tasks can use this volume at the same time, and
// in what way. If unset, will default to the zero-value of NONE.
Sharing sharing = 2;
// AccessType defines the access type of the volume. Unlike Sharing and
// Scope, Swarmkit itself doesn't define either of these as a default, but
// but the upstream is free to do so. However, one of these MUST be set.
oneof access_type {
BlockVolume block = 3;
MountVolume mount = 4;
}
}
// VolumeSecret indicates a secret value that must be passed to CSI plugin
// operations.
message VolumeSecret {
// Key represents the key that will be passed as a controller secret to the
// CSI plugin.
string key = 1;
// Secret represents the swarmkit Secret object from which to read data to
// use as the value to pass to the CSI plugin. This can be either a secret
// name or ID.
//
// TODO(dperny): should this be a SecretReference instead?
string secret = 2;
}
// VolumePublishStatus contains information about the volume's publishing to a
// specific node.
//
// Publishing or unpublishing a volume to a node is a two-step process.
//
// When a Volume is needed on a Node, a VolumePublishStatus with state
// PendingPublish is added. This indicates that the volume should be published,
// but the RPCs have not been executed.
//
// Then, afterward, ControllerPublishVolume is called for the Volume, and the
// State is changed to Published, indicating that the call was a success.
//
// When a Volume is no longer needed, the process is similar, with the State
// being changed to PendingUnpublish. When ControllerUnpublishVolume succeeds,
// the PublishStatus for that Node is simply removed.
//
// Without this two-step process, the following could happen:
//
// 1. ControllerPublishVolume is called and the Volume is successfully
// published.
// 2. A crash or leadership change disrupts the cluster before
// the Volume with the updated VolumePublishStatus can be added to the
// store.
// 3. The Task that required the Volume to be published is deleted.
//
// In this case, the Volume would be published to the Node, but Swarm would be
// unaware of this, and would additionally be unaware that the Volume _should_
// be published to the Node.
//
// By first committing our intention to publish a Volume, we guarantee that the
// Volume itself is sufficient to know which Nodes it may have been published
// to.
message VolumePublishStatus {
// State is the state of the volume in the publish/unpublish
// lifecycle, on a particular node.
enum State {
// PendingPublish indicates that the volume should be published on this
// node, but the call to ControllerPublishVolume has not been
// successfully completed yet and the result recorded by swarmkit.
PENDING_PUBLISH = 0;
// Published means the volume is published successfully to the node.
PUBLISHED = 1;
// PendingNodeUnpublish indicates that the Volume should be unpublished
// on the Node, and we're waiting for confirmation that it has done so.
// After the Node has confirmed that the Volume has been unpublished,
// the state will move to PendingUnpublish.
PENDING_NODE_UNPUBLISH = 2;
// PendingUnpublish means the volume is published to the node, and
// needs to not be, but the call to ControllerUnpublishVolume has not
// verifiably succeeded yet. There is no Unpublished state, because
// after the volume has been verifiably unpublished, the
// VolumePublishStatus for the node is removed.
PENDING_UNPUBLISH = 3;
}
// NodeID is the swarm (not CSI plugin) node ID that this volume is
// published to.
string node_id = 1;
// State is the publish state of the volume.
State state = 2;
// PublishContext is the same PublishContext returned by a call to
// ControllerPublishVolume.
map<string, string> publish_context = 3;
// Message is a human-readable message explaining the state of the volume.
// It exists to convey the current situation with the volume to the user,
// allowing, for example, the user to see error messages why a volume might
// not be published yet.
string message = 5;
}
// VolumeInfo contains information about the volume originating from the CSI
// plugin.
message VolumeInfo {
// CapacityBytes is the capacity of this volume in bytes. A value of 0
// indicates that the capcity is unknown.
int64 capacity_bytes = 1;
// VolumeContext includes fields that are opaque to Swarmkit.
map<string, string> volume_context = 2;
// VolumeID is the ID of the volume as reported by the CSI plugin.
// Information about the volume is not cached in swarmkit's object store;
// instead, it is retrieved on-demand as needed. If the VolumeID field is an
// empty string, and the plugin advertises CREATE_DELETE_VOLUME capability,
// then Swarmkit has not yet called CreateVolume.
string volume_id = 3;
// AccessibleTopology is the topology this volume is actually accessible
// from.
repeated Topology accessible_topology = 4;
}
// CapacityRange describes the minimum and maximum capacity a volume should be
// created with.
message CapacityRange {
// RequiredBytes specifies that a volume must be at least this big. The value
// of 0 indicates an unspecified minimum. Must not be negative.
int64 required_bytes = 1;
// LimitBytes specifies that a volume must not be bigger than this. The value
// of 0 indicates an unspecified maximum. Must not be negative.
int64 limit_bytes = 2;
}
// VolumeAssignment contains the information needed by a Node to use a CSI
// volume. This includes the information need to Stage and Publish the volume
// on the node, but never the full Volume object.
message VolumeAssignment {
// ID is the swarmkit ID for the volume. This is used by swarmkit components
// to identify the volume.
string id = 1;
// VolumeID is the CSI volume ID as returned from CreateVolume. This is used
// by the CSI driver to identify the volume.
string volume_id = 2;
// Driver is the CSI Driver that this volume is managed by.
Driver driver = 3;
// VolumeContext is a map returned from the CSI Controller service when a
// Volume is created. It is optional for the driver to provide, but if it is
// provided, it must be passed to subsequent calls.
map<string,string> volume_context = 4;
// PublishContext is a map returned from the Controller service when
// ControllerPublishVolume is called. Again, it is optional, but if provided,
// must be passed.
map<string,string> publish_context = 5;
// AccessMode specifies the access mode of the volume.
VolumeAccessMode access_mode = 6;
// Secrets is the set of secrets required by the CSI plugin. These refer to
// swarmkit Secrets that will be distributed separately to the node.
repeated VolumeSecret secrets = 7;
}
// VolumeAttachment is the information associating a Volume with a Task.
message VolumeAttachment {
// ID is the swarmkit ID of the volume assigned to this task, not the CSI
// volume ID.
string id = 1;
// Source indicates the Mount source that this volume is assigned for.
string source = 2;
// Target indicates the Mount target that this volume is assigned for.
string target = 3;
}
// These types are copied from the CSI spec. They are copied because there is
// difficulty in compatibility between the CSI protos and the swarmkit protos,
// and straight importing them is difficult.
// TopologyRequirement expresses the user's requirements for a volume's
// accessible topology.
message TopologyRequirement {
// Specifies the list of topologies the provisioned volume MUST be
// accessible from.
// This field is OPTIONAL. If TopologyRequirement is specified either
// requisite or preferred or both MUST be specified.
//
// If requisite is specified, the provisioned volume MUST be
// accessible from at least one of the requisite topologies.
//
// Given
// x = number of topologies provisioned volume is accessible from
// n = number of requisite topologies
// The CO MUST ensure n >= 1. The SP MUST ensure x >= 1
// If x==n, then the SP MUST make the provisioned volume available to
// all topologies from the list of requisite topologies. If it is
// unable to do so, the SP MUST fail the CreateVolume call.
// For example, if a volume should be accessible from a single zone,
// and requisite =
// {"region": "R1", "zone": "Z2"}
// then the provisioned volume MUST be accessible from the "region"
// "R1" and the "zone" "Z2".
// Similarly, if a volume should be accessible from two zones, and
// requisite =
// {"region": "R1", "zone": "Z2"},
// {"region": "R1", "zone": "Z3"}
// then the provisioned volume MUST be accessible from the "region"
// "R1" and both "zone" "Z2" and "zone" "Z3".
//
// If x<n, then the SP SHALL choose x unique topologies from the list
// of requisite topologies. If it is unable to do so, the SP MUST fail
// the CreateVolume call.
// For example, if a volume should be accessible from a single zone,
// and requisite =
// {"region": "R1", "zone": "Z2"},
// {"region": "R1", "zone": "Z3"}
// then the SP may choose to make the provisioned volume available in
// either the "zone" "Z2" or the "zone" "Z3" in the "region" "R1".
// Similarly, if a volume should be accessible from two zones, and
// requisite =
// {"region": "R1", "zone": "Z2"},
// {"region": "R1", "zone": "Z3"},
// {"region": "R1", "zone": "Z4"}
// then the provisioned volume MUST be accessible from any combination
// of two unique topologies: e.g. "R1/Z2" and "R1/Z3", or "R1/Z2" and
// "R1/Z4", or "R1/Z3" and "R1/Z4".
//
// If x>n, then the SP MUST make the provisioned volume available from
// all topologies from the list of requisite topologies and MAY choose
// the remaining x-n unique topologies from the list of all possible
// topologies. If it is unable to do so, the SP MUST fail the
// CreateVolume call.
// For example, if a volume should be accessible from two zones, and
// requisite =
// {"region": "R1", "zone": "Z2"}
// then the provisioned volume MUST be accessible from the "region"
// "R1" and the "zone" "Z2" and the SP may select the second zone
// independently, e.g. "R1/Z4".
repeated Topology requisite = 1;
// Specifies the list of topologies the CO would prefer the volume to
// be provisioned in.
//
// This field is OPTIONAL. If TopologyRequirement is specified either
// requisite or preferred or both MUST be specified.
//
// An SP MUST attempt to make the provisioned volume available using
// the preferred topologies in order from first to last.
//
// If requisite is specified, all topologies in preferred list MUST
// also be present in the list of requisite topologies.
//
// If the SP is unable to to make the provisioned volume available
// from any of the preferred topologies, the SP MAY choose a topology
// from the list of requisite topologies.
// If the list of requisite topologies is not specified, then the SP
// MAY choose from the list of all possible topologies.
// If the list of requisite topologies is specified and the SP is
// unable to to make the provisioned volume available from any of the
// requisite topologies it MUST fail the CreateVolume call.
//
// Example 1:
// Given a volume should be accessible from a single zone, and
// requisite =
// {"region": "R1", "zone": "Z2"},
// {"region": "R1", "zone": "Z3"}
// preferred =
// {"region": "R1", "zone": "Z3"}
// then the the SP SHOULD first attempt to make the provisioned volume
// available from "zone" "Z3" in the "region" "R1" and fall back to
// "zone" "Z2" in the "region" "R1" if that is not possible.
//
// Example 2:
// Given a volume should be accessible from a single zone, and
// requisite =
// {"region": "R1", "zone": "Z2"},
// {"region": "R1", "zone": "Z3"},
// {"region": "R1", "zone": "Z4"},
// {"region": "R1", "zone": "Z5"}
// preferred =
// {"region": "R1", "zone": "Z4"},
// {"region": "R1", "zone": "Z2"}
// then the the SP SHOULD first attempt to make the provisioned volume
// accessible from "zone" "Z4" in the "region" "R1" and fall back to
// "zone" "Z2" in the "region" "R1" if that is not possible. If that
// is not possible, the SP may choose between either the "zone"
// "Z3" or "Z5" in the "region" "R1".
//
// Example 3:
// Given a volume should be accessible from TWO zones (because an
// opaque parameter in CreateVolumeRequest, for example, specifies
// the volume is accessible from two zones, aka synchronously
// replicated), and
// requisite =
// {"region": "R1", "zone": "Z2"},
// {"region": "R1", "zone": "Z3"},
// {"region": "R1", "zone": "Z4"},
// {"region": "R1", "zone": "Z5"}
// preferred =
// {"region": "R1", "zone": "Z5"},
// {"region": "R1", "zone": "Z3"}
// then the the SP SHOULD first attempt to make the provisioned volume
// accessible from the combination of the two "zones" "Z5" and "Z3" in
// the "region" "R1". If that's not possible, it should fall back to
// a combination of "Z5" and other possibilities from the list of
// requisite. If that's not possible, it should fall back to a
// combination of "Z3" and other possibilities from the list of
// requisite. If that's not possible, it should fall back to a
// combination of other possibilities from the list of requisite.
repeated Topology preferred = 2;
}
// Topology is a map of topological domains to topological segments.
// A topological domain is a sub-division of a cluster, like "region",
// "zone", "rack", etc.
// A topological segment is a specific instance of a topological domain,
// like "zone3", "rack3", etc.
// For example {"com.company/zone": "Z1", "com.company/rack": "R3"}
// Valid keys have two segments: an OPTIONAL prefix and name, separated
// by a slash (/), for example: "com.company.example/zone".
// The key name segment is REQUIRED. The prefix is OPTIONAL.
// The key name MUST be 63 characters or less, begin and end with an
// alphanumeric character ([a-z0-9A-Z]), and contain only dashes (-),
// underscores (_), dots (.), or alphanumerics in between, for example
// "zone".
// The key prefix MUST be 63 characters or less, begin and end with a
// lower-case alphanumeric character ([a-z0-9]), contain only
// dashes (-), dots (.), or lower-case alphanumerics in between, and
// follow domain name notation format
// (https://tools.ietf.org/html/rfc1035#section-2.3.1).
// The key prefix SHOULD include the plugin's host company name and/or
// the plugin name, to minimize the possibility of collisions with keys
// from other plugins.
// If a key prefix is specified, it MUST be identical across all
// topology keys returned by the SP (across all RPCs).
// Keys MUST be case-insensitive. Meaning the keys "Zone" and "zone"
// MUST not both exist.
// Each value (topological segment) MUST contain 1 or more strings.
// Each string MUST be 63 characters or less and begin and end with an
// alphanumeric character with '-', '_', '.', or alphanumerics in
// between.
message Topology {
map<string, string> segments = 1;
}
// VolumeCapability specifies a capability of a volume.
message VolumeCapability {
// Indicate that the volume will be accessed via the block device API.
message BlockVolume {
// Intentionally empty, for now.
}
// Indicate that the volume will be accessed via the filesystem API.
message MountVolume {
// The filesystem type. This field is OPTIONAL.
// An empty string is equal to an unspecified field value.
string fs_type = 1;
// The mount options that can be used for the volume. This field is
// OPTIONAL. `mount_flags` MAY contain sensitive information.
// Therefore, the CO and the Plugin MUST NOT leak this information
// to untrusted entities. The total size of this repeated field
// SHALL NOT exceed 4 KiB.
repeated string mount_flags = 2;
}
// Specify how a volume can be accessed.
message AccessMode {
enum Mode {
UNKNOWN = 0;
// Can only be published once as read/write on a single node, at
// any given time.
SINGLE_NODE_WRITER = 1;
// Can only be published once as readonly on a single node, at
// any given time.
SINGLE_NODE_READER_ONLY = 2;
// Can be published as readonly at multiple nodes simultaneously.
MULTI_NODE_READER_ONLY = 3;
// Can be published at multiple nodes simultaneously. Only one of
// the node can be used as read/write. The rest will be readonly.
MULTI_NODE_SINGLE_WRITER = 4;
// Can be published as read/write at multiple nodes
// simultaneously.
MULTI_NODE_MULTI_WRITER = 5;
}
// This field is REQUIRED.
Mode mode = 1;
}
// Specifies what API the volume will be accessed using. One of the
// following fields MUST be specified.
oneof access_type {
BlockVolume block = 1;
MountVolume mount = 2;
}
// This is a REQUIRED field.
AccessMode access_mode = 3;
}