depot/packages/networking/ipfs-cluster/allocator/balanced/balanced.go

327 lines
9 KiB
Go

// Package balanced implements an allocator that can sort allocations
// based on multiple metrics, where metrics may be an arbitrary way to
// partition a set of peers.
//
// For example, allocating by ["tag:region", "disk"] the resulting peer
// candidate order will balanced between regions and ordered by the value of
// the weight of the disk metric.
package balanced
import (
"context"
"fmt"
"sort"
api "github.com/ipfs-cluster/ipfs-cluster/api"
logging "github.com/ipfs/go-log/v2"
peer "github.com/libp2p/go-libp2p/core/peer"
rpc "github.com/libp2p/go-libp2p-gorpc"
)
var logger = logging.Logger("allocator")
// Allocator is an allocator that partitions metrics and orders
// the final list of allocation by selecting for each partition.
type Allocator struct {
config *Config
rpcClient *rpc.Client
}
// New returns an initialized Allocator.
func New(cfg *Config) (*Allocator, error) {
err := cfg.Validate()
if err != nil {
return nil, err
}
return &Allocator{
config: cfg,
}, nil
}
// SetClient provides us with an rpc.Client which allows
// contacting other components in the cluster.
func (a *Allocator) SetClient(c *rpc.Client) {
a.rpcClient = c
}
// Shutdown is called on cluster shutdown. We just invalidate
// any metrics from this point.
func (a *Allocator) Shutdown(ctx context.Context) error {
a.rpcClient = nil
return nil
}
type partitionedMetric struct {
metricName string
curChoosingIndex int
noMore bool
partitions []*partition // they are in order of their values
}
type partition struct {
value string
weight int64
aggregatedWeight int64
peers map[peer.ID]bool // the bool tracks whether the peer has been picked already out of the partition when doing the final sort.
sub *partitionedMetric // all peers in sub-partitions will have the same value for this metric
}
// Returns a partitionedMetric which has partitions and subpartitions based
// on the metrics and values given by the "by" slice. The partitions
// are ordered based on the cumulative weight.
func partitionMetrics(set api.MetricsSet, by []string) *partitionedMetric {
rootMetric := by[0]
pnedMetric := &partitionedMetric{
metricName: rootMetric,
partitions: partitionValues(set[rootMetric]),
}
// For sorting based on weight (more to less)
lessF := func(i, j int) bool {
wi := pnedMetric.partitions[i].weight
wj := pnedMetric.partitions[j].weight
// if weight is equal, sort by aggregated weight of
// all sub-partitions.
if wi == wj {
awi := pnedMetric.partitions[i].aggregatedWeight
awj := pnedMetric.partitions[j].aggregatedWeight
// If subpartitions weight the same, do strict order
// based on value string
if awi == awj {
return pnedMetric.partitions[i].value < pnedMetric.partitions[j].value
}
return awj < awi
}
// Descending!
return wj < wi
}
if len(by) == 1 { // we are done
sort.Slice(pnedMetric.partitions, lessF)
return pnedMetric
}
// process sub-partitions
for _, partition := range pnedMetric.partitions {
filteredSet := make(api.MetricsSet)
for k, v := range set {
if k == rootMetric { // not needed anymore
continue
}
for _, m := range v {
// only leave metrics for peers in current partition
if _, ok := partition.peers[m.Peer]; ok {
filteredSet[k] = append(filteredSet[k], m)
}
}
}
partition.sub = partitionMetrics(filteredSet, by[1:])
// Add the aggregated weight of the subpartitions
for _, subp := range partition.sub.partitions {
partition.aggregatedWeight += subp.aggregatedWeight
}
}
sort.Slice(pnedMetric.partitions, lessF)
return pnedMetric
}
func partitionValues(metrics []api.Metric) []*partition {
partitions := []*partition{}
if len(metrics) <= 0 {
return partitions
}
// We group peers with the same value in the same partition.
partitionsByValue := make(map[string]*partition)
for _, m := range metrics {
// Sometimes two metrics have the same value / weight, but we
// still want to put them in different partitions. Otherwise
// their weights get added and they form a bucket and
// therefore not they are not selected in order: 3 peers with
// freespace=100 and one peer with freespace=200 would result
// in one of the peers with freespace 100 being chosen first
// because the partition's weight is 300.
//
// We are going to call these metrics (like free-space),
// non-partitionable metrics. This is going to be the default
// (for backwards compat reasons).
//
// The informers must set the Partitionable field accordingly
// when two metrics with the same value must be grouped in the
// same partition.
//
// Note: aggregatedWeight is the same as weight here (sum of
// weight of all metrics in partitions), and gets updated
// later in partitionMetrics with the aggregated weight of
// sub-partitions.
if !m.Partitionable {
partitions = append(partitions, &partition{
value: m.Value,
weight: m.GetWeight(),
aggregatedWeight: m.GetWeight(),
peers: map[peer.ID]bool{
m.Peer: false,
},
})
continue
}
// Any other case, we partition by value.
if p, ok := partitionsByValue[m.Value]; ok {
p.peers[m.Peer] = false
p.weight += m.GetWeight()
p.aggregatedWeight += m.GetWeight()
} else {
partitionsByValue[m.Value] = &partition{
value: m.Value,
weight: m.GetWeight(),
aggregatedWeight: m.GetWeight(),
peers: map[peer.ID]bool{
m.Peer: false,
},
}
}
}
for _, p := range partitionsByValue {
partitions = append(partitions, p)
}
return partitions
}
// Returns a list of peers sorted by never choosing twice from the same
// partition if there is some other partition to choose from.
func (pnedm *partitionedMetric) sortedPeers() []peer.ID {
peers := []peer.ID{}
for {
peer := pnedm.chooseNext()
if peer == "" { // This means we are done.
break
}
peers = append(peers, peer)
}
return peers
}
func (pnedm *partitionedMetric) chooseNext() peer.ID {
lenp := len(pnedm.partitions)
if lenp == 0 {
return ""
}
if pnedm.noMore {
return ""
}
var peer peer.ID
curPartition := pnedm.partitions[pnedm.curChoosingIndex]
done := 0
for {
if curPartition.sub != nil {
// Choose something from the sub-partitionedMetric
peer = curPartition.sub.chooseNext()
} else {
// We are a bottom-partition. Choose one of our peers
for pid, used := range curPartition.peers {
if !used {
peer = pid
curPartition.peers[pid] = true // mark as used
break
}
}
}
// look in next partition next time
pnedm.curChoosingIndex = (pnedm.curChoosingIndex + 1) % lenp
curPartition = pnedm.partitions[pnedm.curChoosingIndex]
done++
if peer != "" {
break
}
// no peer and we have looked in as many partitions as we have
if done == lenp {
pnedm.noMore = true
break
}
}
return peer
}
// Allocate produces a sorted list of cluster peer IDs based on different
// metrics provided for those peer IDs.
// It works as follows:
//
// - First, it buckets each peer metrics based on the AllocateBy list. The
// metric name must match the bucket name, otherwise they are put at the end.
// - Second, based on the AllocateBy order, it orders the first bucket and
// groups peers by ordered value.
// - Third, it selects metrics on the second bucket for the most prioritary
// peers of the first bucket and orders their metrics. Then for the peers in
// second position etc.
// - It repeats the process until there is no more buckets to sort.
// - Finally, it returns the first peer of the first
// - Third, based on the AllocateBy order, it select the first metric
func (a *Allocator) Allocate(
ctx context.Context,
c api.Cid,
current, candidates, priority api.MetricsSet,
) ([]peer.ID, error) {
// For the allocation to work well, there have to be metrics of all
// the types for all the peers. There cannot be a metric of one type
// for a peer that does not appear in the other types.
//
// Removing such occurrences is done in allocate.go, before the
// allocator is called.
//
// Otherwise, the sorting might be funny.
candidatePartition := partitionMetrics(candidates, a.config.AllocateBy)
priorityPartition := partitionMetrics(priority, a.config.AllocateBy)
logger.Debugf("Balanced allocator partitions:\n%s\n", printPartition(candidatePartition, 0))
//fmt.Println(printPartition(candidatePartition, 0))
first := priorityPartition.sortedPeers()
last := candidatePartition.sortedPeers()
return append(first, last...), nil
}
// Metrics returns the names of the metrics that have been registered
// with this allocator.
func (a *Allocator) Metrics() []string {
return a.config.AllocateBy
}
func printPartition(m *partitionedMetric, ind int) string {
str := ""
indent := func() {
for i := 0; i < ind+2; i++ {
str += " "
}
}
for _, p := range m.partitions {
indent()
str += fmt.Sprintf(" | %s:%s - %d - [", m.metricName, p.value, p.weight)
for p, u := range p.peers {
str += fmt.Sprintf("%s|%t, ", p, u)
}
str += "]\n"
if p.sub != nil {
str += printPartition(p.sub, ind+2)
}
}
return str
}