allocator/balanced/balanced.go from ipfs/ipfs-cluster

allocator/balanced/balanced.go
Summary

Maintainability

2 hrs
Test Coverage

Issues
// Package balanced implements an allocator that can sort allocations
// based on multiple metrics, where metrics may be an arbitrary way to
// partition a set of peers.
//
// For example, allocating by ["tag:region", "disk"] the resulting peer
// candidate order will balanced between regions and ordered by the value of
// the weight of the disk metric.
package balanced

import (
    "context"
    "fmt"
    "sort"

    api "github.com/ipfs-cluster/ipfs-cluster/api"
    logging "github.com/ipfs/go-log/v2"
    peer "github.com/libp2p/go-libp2p/core/peer"
    rpc "github.com/libp2p/go-libp2p-gorpc"
)

var logger = logging.Logger("allocator")

// Allocator is an allocator that partitions metrics and orders
// the final list of allocation by selecting for each partition.
type Allocator struct {
    config    *Config
    rpcClient *rpc.Client
}

// New returns an initialized Allocator.
func New(cfg *Config) (*Allocator, error) {
    err := cfg.Validate()
    if err != nil {
        return nil, err
    }

    return &Allocator{
        config: cfg,
    }, nil
}

// SetClient provides us with an rpc.Client which allows
// contacting other components in the cluster.
func (a *Allocator) SetClient(c *rpc.Client) {
    a.rpcClient = c
}

// Shutdown is called on cluster shutdown. We just invalidate
// any metrics from this point.
func (a *Allocator) Shutdown(ctx context.Context) error {
    a.rpcClient = nil
    return nil
}

type partitionedMetric struct {
    metricName       string
    curChoosingIndex int
    noMore           bool
    partitions       []*partition // they are in order of their values
}

type partition struct {
    value            string
    weight           int64
    aggregatedWeight int64
    peers            map[peer.ID]bool   // the bool tracks whether the peer has been picked already out of the partition when doing the final sort.
    sub              *partitionedMetric // all peers in sub-partitions will have the same value for this metric
}

// Returns a partitionedMetric which has partitions and subpartitions based
// on the metrics and values given by the "by" slice. The partitions
// are ordered based on the cumulative weight.
func partitionMetrics(set api.MetricsSet, by []string) *partitionedMetric {
    rootMetric := by[0]
    pnedMetric := &partitionedMetric{
        metricName: rootMetric,
        partitions: partitionValues(set[rootMetric]),
    }

    // For sorting based on weight (more to less)
    lessF := func(i, j int) bool {
        wi := pnedMetric.partitions[i].weight
        wj := pnedMetric.partitions[j].weight

        // if weight is equal, sort by aggregated weight of
        // all sub-partitions.
        if wi == wj {
            awi := pnedMetric.partitions[i].aggregatedWeight
            awj := pnedMetric.partitions[j].aggregatedWeight
            // If subpartitions weight the same, do strict order
            // based on value string
            if awi == awj {
                return pnedMetric.partitions[i].value < pnedMetric.partitions[j].value
            }
            return awj < awi

        }
        // Descending!
        return wj < wi
    }

    if len(by) == 1 { // we are done
        sort.Slice(pnedMetric.partitions, lessF)
        return pnedMetric
    }

    // process sub-partitions
    for _, partition := range pnedMetric.partitions {
        filteredSet := make(api.MetricsSet)
        for k, v := range set {
            if k == rootMetric { // not needed anymore
                continue
            }
            for _, m := range v {
                // only leave metrics for peers in current partition
                if _, ok := partition.peers[m.Peer]; ok {
                    filteredSet[k] = append(filteredSet[k], m)
                }
            }
        }

        partition.sub = partitionMetrics(filteredSet, by[1:])

        // Add the aggregated weight of the subpartitions
        for _, subp := range partition.sub.partitions {
            partition.aggregatedWeight += subp.aggregatedWeight
        }
    }
    sort.Slice(pnedMetric.partitions, lessF)
    return pnedMetric
}

func partitionValues(metrics []api.Metric) []*partition {
    partitions := []*partition{}

    if len(metrics) <= 0 {
        return partitions
    }

    // We group peers with the same value in the same partition.
    partitionsByValue := make(map[string]*partition)

    for _, m := range metrics {
        // Sometimes two metrics have the same value / weight, but we
        // still want to put them in different partitions. Otherwise
        // their weights get added and they form a bucket and
        // therefore not they are not selected in order: 3 peers with
        // freespace=100 and one peer with freespace=200 would result
        // in one of the peers with freespace 100 being chosen first
        // because the partition's weight is 300.
        //
        // We are going to call these metrics (like free-space),
        // non-partitionable metrics. This is going to be the default
        // (for backwards compat reasons).
        //
        // The informers must set the Partitionable field accordingly
        // when two metrics with the same value must be grouped in the
        // same partition.
        //
        // Note: aggregatedWeight is the same as weight here (sum of
        // weight of all metrics in partitions), and gets updated
        // later in partitionMetrics with the aggregated weight of
        // sub-partitions.
        if !m.Partitionable {
            partitions = append(partitions, &partition{
                value:            m.Value,
                weight:           m.GetWeight(),
                aggregatedWeight: m.GetWeight(),
                peers: map[peer.ID]bool{
                    m.Peer: false,
                },
            })
            continue
        }

        // Any other case, we partition by value.
        if p, ok := partitionsByValue[m.Value]; ok {
            p.peers[m.Peer] = false
            p.weight += m.GetWeight()
            p.aggregatedWeight += m.GetWeight()
        } else {
            partitionsByValue[m.Value] = &partition{
                value:            m.Value,
                weight:           m.GetWeight(),
                aggregatedWeight: m.GetWeight(),
                peers: map[peer.ID]bool{
                    m.Peer: false,
                },
            }
        }

    }
    for _, p := range partitionsByValue {
        partitions = append(partitions, p)
    }
    return partitions
}

// Returns a list of peers sorted by never choosing twice from the same
// partition if there is some other partition to choose from.
func (pnedm *partitionedMetric) sortedPeers() []peer.ID {
    peers := []peer.ID{}
    for {
        peer := pnedm.chooseNext()
        if peer == "" { // This means we are done.
            break
        }
        peers = append(peers, peer)
    }
    return peers
}

func (pnedm *partitionedMetric) chooseNext() peer.ID {
    lenp := len(pnedm.partitions)
    if lenp == 0 {
        return ""
    }

    if pnedm.noMore {
        return ""
    }

    var peer peer.ID

    curPartition := pnedm.partitions[pnedm.curChoosingIndex]
    done := 0
    for {
        if curPartition.sub != nil {
            // Choose something from the sub-partitionedMetric
            peer = curPartition.sub.chooseNext()
        } else {
            // We are a bottom-partition. Choose one of our peers
            for pid, used := range curPartition.peers {
                if !used {
                    peer = pid
                    curPartition.peers[pid] = true // mark as used
                    break
                }
            }
        }
        // look in next partition next time
        pnedm.curChoosingIndex = (pnedm.curChoosingIndex + 1) % lenp
        curPartition = pnedm.partitions[pnedm.curChoosingIndex]
        done++

        if peer != "" {
            break
        }

        // no peer and we have looked in as many partitions as we have
        if done == lenp {
            pnedm.noMore = true
            break
        }
    }

    return peer
}

// Allocate produces a sorted list of cluster peer IDs based on different
// metrics provided for those peer IDs.
// It works as follows:
//
//   - First, it buckets each peer metrics based on the AllocateBy list. The
//   metric name must match the bucket name, otherwise they are put at the end.
//   - Second, based on the AllocateBy order, it orders the first bucket and
//   groups peers by ordered value.
//   - Third, it selects metrics on the second bucket for the most prioritary
//   peers of the first bucket and orders their metrics. Then for the peers in
//   second position etc.
//   - It repeats the process until there is no more buckets to sort.
//   - Finally, it returns the first peer of the first
//   - Third, based on the AllocateBy order, it select the first metric
func (a *Allocator) Allocate(
    ctx context.Context,
    c api.Cid,
    current, candidates, priority api.MetricsSet,
) ([]peer.ID, error) {

    // For the allocation to work well, there have to be metrics of all
    // the types for all the peers. There cannot be a metric of one type
    // for a peer that does not appear in the other types.
    //
    // Removing such occurrences is done in allocate.go, before the
    // allocator is called.
    //
    // Otherwise, the sorting might be funny.

    candidatePartition := partitionMetrics(candidates, a.config.AllocateBy)
    priorityPartition := partitionMetrics(priority, a.config.AllocateBy)

    logger.Debugf("Balanced allocator partitions:\n%s\n", printPartition(candidatePartition, 0))
    //fmt.Println(printPartition(candidatePartition, 0))

    first := priorityPartition.sortedPeers()
    last := candidatePartition.sortedPeers()

    return append(first, last...), nil
}

// Metrics returns the names of the metrics that have been registered
// with this allocator.
func (a *Allocator) Metrics() []string {
    return a.config.AllocateBy
}

func printPartition(m *partitionedMetric, ind int) string {
    str := ""
    indent := func() {
        for i := 0; i < ind+2; i++ {
            str += " "
        }
    }

    for _, p := range m.partitions {
        indent()
        str += fmt.Sprintf(" | %s:%s - %d - [", m.metricName, p.value, p.weight)
        for p, u := range p.peers {
            str += fmt.Sprintf("%s|%t, ", p, u)
        }
        str += "]\n"
        if p.sub != nil {
            str += printPartition(p.sub, ind+2)
        }
    }
    return str
}