2246 lines
54 KiB
Go
2246 lines
54 KiB
Go
|
package ipfscluster
|
||
|
|
||
|
import (
|
||
|
"context"
|
||
|
"errors"
|
||
|
"flag"
|
||
|
"fmt"
|
||
|
"math/rand"
|
||
|
"mime/multipart"
|
||
|
"os"
|
||
|
"path/filepath"
|
||
|
"sort"
|
||
|
"strings"
|
||
|
"sync"
|
||
|
"testing"
|
||
|
"time"
|
||
|
|
||
|
"github.com/ipfs-cluster/ipfs-cluster/allocator/balanced"
|
||
|
"github.com/ipfs-cluster/ipfs-cluster/api"
|
||
|
"github.com/ipfs-cluster/ipfs-cluster/api/rest"
|
||
|
"github.com/ipfs-cluster/ipfs-cluster/consensus/crdt"
|
||
|
"github.com/ipfs-cluster/ipfs-cluster/consensus/raft"
|
||
|
"github.com/ipfs-cluster/ipfs-cluster/datastore/badger"
|
||
|
"github.com/ipfs-cluster/ipfs-cluster/datastore/inmem"
|
||
|
"github.com/ipfs-cluster/ipfs-cluster/datastore/leveldb"
|
||
|
"github.com/ipfs-cluster/ipfs-cluster/informer/disk"
|
||
|
"github.com/ipfs-cluster/ipfs-cluster/ipfsconn/ipfshttp"
|
||
|
"github.com/ipfs-cluster/ipfs-cluster/monitor/pubsubmon"
|
||
|
"github.com/ipfs-cluster/ipfs-cluster/observations"
|
||
|
"github.com/ipfs-cluster/ipfs-cluster/pintracker/stateless"
|
||
|
"github.com/ipfs-cluster/ipfs-cluster/state"
|
||
|
"github.com/ipfs-cluster/ipfs-cluster/test"
|
||
|
"github.com/ipfs-cluster/ipfs-cluster/version"
|
||
|
|
||
|
ds "github.com/ipfs/go-datastore"
|
||
|
libp2p "github.com/libp2p/go-libp2p"
|
||
|
crypto "github.com/libp2p/go-libp2p-core/crypto"
|
||
|
host "github.com/libp2p/go-libp2p-core/host"
|
||
|
peer "github.com/libp2p/go-libp2p-core/peer"
|
||
|
peerstore "github.com/libp2p/go-libp2p-core/peerstore"
|
||
|
dht "github.com/libp2p/go-libp2p-kad-dht"
|
||
|
dual "github.com/libp2p/go-libp2p-kad-dht/dual"
|
||
|
pubsub "github.com/libp2p/go-libp2p-pubsub"
|
||
|
routedhost "github.com/libp2p/go-libp2p/p2p/host/routed"
|
||
|
ma "github.com/multiformats/go-multiaddr"
|
||
|
)
|
||
|
|
||
|
var (
|
||
|
// number of clusters to create
|
||
|
nClusters = 5
|
||
|
|
||
|
// number of pins to pin/unpin/check
|
||
|
nPins = 100
|
||
|
|
||
|
logLevel = "FATAL"
|
||
|
customLogLvlFacilities = logFacilities{}
|
||
|
|
||
|
consensus = "crdt"
|
||
|
datastore = "badger"
|
||
|
|
||
|
ttlDelayTime = 2 * time.Second // set on Main to diskInf.MetricTTL
|
||
|
testsFolder = "clusterTestsFolder"
|
||
|
|
||
|
// When testing with fixed ports...
|
||
|
// clusterPort = 10000
|
||
|
// apiPort = 10100
|
||
|
// ipfsProxyPort = 10200
|
||
|
)
|
||
|
|
||
|
type logFacilities []string
|
||
|
|
||
|
// String is the method to format the flag's value, part of the flag.Value interface.
|
||
|
func (lg *logFacilities) String() string {
|
||
|
return fmt.Sprint(*lg)
|
||
|
}
|
||
|
|
||
|
// Set is the method to set the flag value, part of the flag.Value interface.
|
||
|
func (lg *logFacilities) Set(value string) error {
|
||
|
if len(*lg) > 0 {
|
||
|
return errors.New("logFacilities flag already set")
|
||
|
}
|
||
|
for _, lf := range strings.Split(value, ",") {
|
||
|
*lg = append(*lg, lf)
|
||
|
}
|
||
|
return nil
|
||
|
}
|
||
|
|
||
|
// TestMain runs test initialization. Since Go1.13 we cannot run this on init()
|
||
|
// as flag.Parse() does not work well there
|
||
|
// (see https://golang.org/src/testing/testing.go#L211)
|
||
|
func TestMain(m *testing.M) {
|
||
|
rand.Seed(time.Now().UnixNano())
|
||
|
ReadyTimeout = 11 * time.Second
|
||
|
|
||
|
// GossipSub needs to heartbeat to discover newly connected hosts
|
||
|
// This speeds things up a little.
|
||
|
pubsub.GossipSubHeartbeatInterval = 50 * time.Millisecond
|
||
|
|
||
|
flag.Var(&customLogLvlFacilities, "logfacs", "use -logLevel for only the following log facilities; comma-separated")
|
||
|
flag.StringVar(&logLevel, "loglevel", logLevel, "default log level for tests")
|
||
|
flag.IntVar(&nClusters, "nclusters", nClusters, "number of clusters to use")
|
||
|
flag.IntVar(&nPins, "npins", nPins, "number of pins to pin/unpin/check")
|
||
|
flag.StringVar(&consensus, "consensus", consensus, "consensus implementation")
|
||
|
flag.StringVar(&datastore, "datastore", datastore, "datastore backend")
|
||
|
flag.Parse()
|
||
|
|
||
|
if len(customLogLvlFacilities) <= 0 {
|
||
|
for f := range LoggingFacilities {
|
||
|
SetFacilityLogLevel(f, logLevel)
|
||
|
}
|
||
|
|
||
|
for f := range LoggingFacilitiesExtra {
|
||
|
SetFacilityLogLevel(f, logLevel)
|
||
|
}
|
||
|
}
|
||
|
|
||
|
for _, f := range customLogLvlFacilities {
|
||
|
if _, ok := LoggingFacilities[f]; ok {
|
||
|
SetFacilityLogLevel(f, logLevel)
|
||
|
continue
|
||
|
}
|
||
|
if _, ok := LoggingFacilitiesExtra[f]; ok {
|
||
|
SetFacilityLogLevel(f, logLevel)
|
||
|
continue
|
||
|
}
|
||
|
}
|
||
|
|
||
|
diskInfCfg := &disk.Config{}
|
||
|
diskInfCfg.LoadJSON(testingDiskInfCfg)
|
||
|
ttlDelayTime = diskInfCfg.MetricTTL * 2
|
||
|
|
||
|
os.Exit(m.Run())
|
||
|
}
|
||
|
|
||
|
func randomBytes() []byte {
|
||
|
bs := make([]byte, 64)
|
||
|
for i := 0; i < len(bs); i++ {
|
||
|
b := byte(rand.Int())
|
||
|
bs[i] = b
|
||
|
}
|
||
|
return bs
|
||
|
}
|
||
|
|
||
|
func createComponents(
|
||
|
t *testing.T,
|
||
|
host host.Host,
|
||
|
pubsub *pubsub.PubSub,
|
||
|
dht *dual.DHT,
|
||
|
i int,
|
||
|
staging bool,
|
||
|
) (
|
||
|
*Config,
|
||
|
ds.Datastore,
|
||
|
Consensus,
|
||
|
[]API,
|
||
|
IPFSConnector,
|
||
|
PinTracker,
|
||
|
PeerMonitor,
|
||
|
PinAllocator,
|
||
|
Informer,
|
||
|
Tracer,
|
||
|
*test.IpfsMock,
|
||
|
) {
|
||
|
ctx := context.Background()
|
||
|
mock := test.NewIpfsMock(t)
|
||
|
|
||
|
//apiAddr, _ := ma.NewMultiaddr(fmt.Sprintf("/ip4/127.0.0.1/tcp/%d", apiPort+i))
|
||
|
// Bind on port 0
|
||
|
apiAddr, _ := ma.NewMultiaddr("/ip4/127.0.0.1/tcp/0")
|
||
|
// Bind on Port 0
|
||
|
// proxyAddr, _ := ma.NewMultiaddr(fmt.Sprintf("/ip4/127.0.0.1/tcp/%d", ipfsProxyPort+i))
|
||
|
proxyAddr, _ := ma.NewMultiaddr("/ip4/127.0.0.1/tcp/0")
|
||
|
nodeAddr, _ := ma.NewMultiaddr(fmt.Sprintf("/ip4/%s/tcp/%d", mock.Addr, mock.Port))
|
||
|
|
||
|
peername := fmt.Sprintf("peer_%d", i)
|
||
|
|
||
|
ident, clusterCfg, apiCfg, ipfsproxyCfg, ipfshttpCfg, badgerCfg, levelDBCfg, raftCfg, crdtCfg, statelesstrackerCfg, psmonCfg, allocBalancedCfg, diskInfCfg, tracingCfg := testingConfigs()
|
||
|
|
||
|
ident.ID = host.ID()
|
||
|
ident.PrivateKey = host.Peerstore().PrivKey(host.ID())
|
||
|
clusterCfg.Peername = peername
|
||
|
clusterCfg.LeaveOnShutdown = false
|
||
|
clusterCfg.SetBaseDir(filepath.Join(testsFolder, host.ID().Pretty()))
|
||
|
|
||
|
apiCfg.HTTPListenAddr = []ma.Multiaddr{apiAddr}
|
||
|
|
||
|
ipfsproxyCfg.ListenAddr = []ma.Multiaddr{proxyAddr}
|
||
|
ipfsproxyCfg.NodeAddr = nodeAddr
|
||
|
|
||
|
ipfshttpCfg.NodeAddr = nodeAddr
|
||
|
|
||
|
raftCfg.DataFolder = filepath.Join(testsFolder, host.ID().Pretty())
|
||
|
|
||
|
badgerCfg.Folder = filepath.Join(testsFolder, host.ID().Pretty(), "badger")
|
||
|
levelDBCfg.Folder = filepath.Join(testsFolder, host.ID().Pretty(), "leveldb")
|
||
|
|
||
|
api, err := rest.NewAPI(ctx, apiCfg)
|
||
|
if err != nil {
|
||
|
t.Fatal(err)
|
||
|
}
|
||
|
|
||
|
ipfsProxy, err := rest.NewAPI(ctx, apiCfg)
|
||
|
if err != nil {
|
||
|
t.Fatal(err)
|
||
|
}
|
||
|
|
||
|
ipfs, err := ipfshttp.NewConnector(ipfshttpCfg)
|
||
|
if err != nil {
|
||
|
t.Fatal(err)
|
||
|
}
|
||
|
|
||
|
alloc, err := balanced.New(allocBalancedCfg)
|
||
|
if err != nil {
|
||
|
t.Fatal(err)
|
||
|
}
|
||
|
inf, err := disk.NewInformer(diskInfCfg)
|
||
|
if err != nil {
|
||
|
t.Fatal(err)
|
||
|
}
|
||
|
|
||
|
store := makeStore(t, badgerCfg, levelDBCfg)
|
||
|
cons := makeConsensus(t, store, host, pubsub, dht, raftCfg, staging, crdtCfg)
|
||
|
tracker := stateless.New(statelesstrackerCfg, ident.ID, clusterCfg.Peername, cons.State)
|
||
|
|
||
|
var peersF func(context.Context) ([]peer.ID, error)
|
||
|
if consensus == "raft" {
|
||
|
peersF = cons.Peers
|
||
|
}
|
||
|
mon, err := pubsubmon.New(ctx, psmonCfg, pubsub, peersF)
|
||
|
if err != nil {
|
||
|
t.Fatal(err)
|
||
|
}
|
||
|
tracingCfg.ServiceName = peername
|
||
|
tracer, err := observations.SetupTracing(tracingCfg)
|
||
|
if err != nil {
|
||
|
t.Fatal(err)
|
||
|
}
|
||
|
|
||
|
return clusterCfg, store, cons, []API{api, ipfsProxy}, ipfs, tracker, mon, alloc, inf, tracer, mock
|
||
|
}
|
||
|
|
||
|
func makeStore(t *testing.T, badgerCfg *badger.Config, levelDBCfg *leveldb.Config) ds.Datastore {
|
||
|
switch consensus {
|
||
|
case "crdt":
|
||
|
if datastore == "badger" {
|
||
|
dstr, err := badger.New(badgerCfg)
|
||
|
if err != nil {
|
||
|
t.Fatal(err)
|
||
|
}
|
||
|
return dstr
|
||
|
}
|
||
|
dstr, err := leveldb.New(levelDBCfg)
|
||
|
if err != nil {
|
||
|
t.Fatal(err)
|
||
|
}
|
||
|
return dstr
|
||
|
default:
|
||
|
return inmem.New()
|
||
|
}
|
||
|
}
|
||
|
|
||
|
func makeConsensus(t *testing.T, store ds.Datastore, h host.Host, psub *pubsub.PubSub, dht *dual.DHT, raftCfg *raft.Config, staging bool, crdtCfg *crdt.Config) Consensus {
|
||
|
switch consensus {
|
||
|
case "raft":
|
||
|
raftCon, err := raft.NewConsensus(h, raftCfg, store, staging)
|
||
|
if err != nil {
|
||
|
t.Fatal(err)
|
||
|
}
|
||
|
return raftCon
|
||
|
case "crdt":
|
||
|
crdtCon, err := crdt.New(h, dht, psub, crdtCfg, store)
|
||
|
if err != nil {
|
||
|
t.Fatal(err)
|
||
|
}
|
||
|
return crdtCon
|
||
|
default:
|
||
|
panic("bad consensus")
|
||
|
}
|
||
|
}
|
||
|
|
||
|
func createCluster(t *testing.T, host host.Host, dht *dual.DHT, clusterCfg *Config, store ds.Datastore, consensus Consensus, apis []API, ipfs IPFSConnector, tracker PinTracker, mon PeerMonitor, alloc PinAllocator, inf Informer, tracer Tracer) *Cluster {
|
||
|
cl, err := NewCluster(context.Background(), host, dht, clusterCfg, store, consensus, apis, ipfs, tracker, mon, alloc, []Informer{inf}, tracer)
|
||
|
if err != nil {
|
||
|
t.Fatal(err)
|
||
|
}
|
||
|
return cl
|
||
|
}
|
||
|
|
||
|
func createOnePeerCluster(t *testing.T, nth int, clusterSecret []byte) (*Cluster, *test.IpfsMock) {
|
||
|
hosts, pubsubs, dhts := createHosts(t, clusterSecret, 1)
|
||
|
clusterCfg, store, consensus, api, ipfs, tracker, mon, alloc, inf, tracer, mock := createComponents(t, hosts[0], pubsubs[0], dhts[0], nth, false)
|
||
|
cl := createCluster(t, hosts[0], dhts[0], clusterCfg, store, consensus, api, ipfs, tracker, mon, alloc, inf, tracer)
|
||
|
<-cl.Ready()
|
||
|
return cl, mock
|
||
|
}
|
||
|
|
||
|
func createHosts(t *testing.T, clusterSecret []byte, nClusters int) ([]host.Host, []*pubsub.PubSub, []*dual.DHT) {
|
||
|
hosts := make([]host.Host, nClusters)
|
||
|
pubsubs := make([]*pubsub.PubSub, nClusters)
|
||
|
dhts := make([]*dual.DHT, nClusters)
|
||
|
|
||
|
tcpaddr, _ := ma.NewMultiaddr("/ip4/127.0.0.1/tcp/0")
|
||
|
quicAddr, _ := ma.NewMultiaddr("/ip4/127.0.0.1/udp/0/quic")
|
||
|
for i := range hosts {
|
||
|
priv, _, err := crypto.GenerateKeyPair(crypto.RSA, 2048)
|
||
|
if err != nil {
|
||
|
t.Fatal(err)
|
||
|
}
|
||
|
|
||
|
h, p, d := createHost(t, priv, clusterSecret, []ma.Multiaddr{quicAddr, tcpaddr})
|
||
|
hosts[i] = h
|
||
|
dhts[i] = d
|
||
|
pubsubs[i] = p
|
||
|
}
|
||
|
|
||
|
return hosts, pubsubs, dhts
|
||
|
}
|
||
|
|
||
|
func createHost(t *testing.T, priv crypto.PrivKey, clusterSecret []byte, listen []ma.Multiaddr) (host.Host, *pubsub.PubSub, *dual.DHT) {
|
||
|
ctx := context.Background()
|
||
|
|
||
|
h, err := newHost(ctx, clusterSecret, priv, libp2p.ListenAddrs(listen...))
|
||
|
if err != nil {
|
||
|
t.Fatal(err)
|
||
|
}
|
||
|
|
||
|
// DHT needs to be created BEFORE connecting the peers
|
||
|
d, err := newTestDHT(ctx, h)
|
||
|
if err != nil {
|
||
|
t.Fatal(err)
|
||
|
}
|
||
|
|
||
|
// Pubsub needs to be created BEFORE connecting the peers,
|
||
|
// otherwise they are not picked up.
|
||
|
psub, err := newPubSub(ctx, h)
|
||
|
if err != nil {
|
||
|
t.Fatal(err)
|
||
|
}
|
||
|
|
||
|
return routedhost.Wrap(h, d), psub, d
|
||
|
}
|
||
|
|
||
|
func newTestDHT(ctx context.Context, h host.Host) (*dual.DHT, error) {
|
||
|
return newDHT(ctx, h, nil,
|
||
|
dual.DHTOption(dht.RoutingTableRefreshPeriod(600*time.Millisecond)),
|
||
|
dual.DHTOption(dht.RoutingTableRefreshQueryTimeout(300*time.Millisecond)),
|
||
|
)
|
||
|
}
|
||
|
|
||
|
func createClusters(t *testing.T) ([]*Cluster, []*test.IpfsMock) {
|
||
|
ctx := context.Background()
|
||
|
os.RemoveAll(testsFolder)
|
||
|
cfgs := make([]*Config, nClusters)
|
||
|
stores := make([]ds.Datastore, nClusters)
|
||
|
cons := make([]Consensus, nClusters)
|
||
|
apis := make([][]API, nClusters)
|
||
|
ipfss := make([]IPFSConnector, nClusters)
|
||
|
trackers := make([]PinTracker, nClusters)
|
||
|
mons := make([]PeerMonitor, nClusters)
|
||
|
allocs := make([]PinAllocator, nClusters)
|
||
|
infs := make([]Informer, nClusters)
|
||
|
tracers := make([]Tracer, nClusters)
|
||
|
ipfsMocks := make([]*test.IpfsMock, nClusters)
|
||
|
|
||
|
clusters := make([]*Cluster, nClusters)
|
||
|
|
||
|
// Uncomment when testing with fixed ports
|
||
|
// clusterPeers := make([]ma.Multiaddr, nClusters, nClusters)
|
||
|
|
||
|
hosts, pubsubs, dhts := createHosts(t, testingClusterSecret, nClusters)
|
||
|
|
||
|
for i := 0; i < nClusters; i++ {
|
||
|
// staging = true for all except first (i==0)
|
||
|
cfgs[i], stores[i], cons[i], apis[i], ipfss[i], trackers[i], mons[i], allocs[i], infs[i], tracers[i], ipfsMocks[i] = createComponents(t, hosts[i], pubsubs[i], dhts[i], i, i != 0)
|
||
|
}
|
||
|
|
||
|
// Start first node
|
||
|
clusters[0] = createCluster(t, hosts[0], dhts[0], cfgs[0], stores[0], cons[0], apis[0], ipfss[0], trackers[0], mons[0], allocs[0], infs[0], tracers[0])
|
||
|
<-clusters[0].Ready()
|
||
|
bootstrapAddr := clusterAddr(clusters[0])
|
||
|
|
||
|
// Start the rest and join
|
||
|
for i := 1; i < nClusters; i++ {
|
||
|
clusters[i] = createCluster(t, hosts[i], dhts[i], cfgs[i], stores[i], cons[i], apis[i], ipfss[i], trackers[i], mons[i], allocs[i], infs[i], tracers[i])
|
||
|
err := clusters[i].Join(ctx, bootstrapAddr)
|
||
|
if err != nil {
|
||
|
logger.Error(err)
|
||
|
t.Fatal(err)
|
||
|
}
|
||
|
<-clusters[i].Ready()
|
||
|
}
|
||
|
|
||
|
// connect all hosts
|
||
|
for _, h := range hosts {
|
||
|
for _, h2 := range hosts {
|
||
|
if h.ID() != h2.ID() {
|
||
|
h.Peerstore().AddAddrs(h2.ID(), h2.Addrs(), peerstore.PermanentAddrTTL)
|
||
|
_, err := h.Network().DialPeer(ctx, h2.ID())
|
||
|
if err != nil {
|
||
|
t.Log(err)
|
||
|
}
|
||
|
}
|
||
|
|
||
|
}
|
||
|
}
|
||
|
|
||
|
waitForLeader(t, clusters)
|
||
|
waitForClustersHealthy(t, clusters)
|
||
|
|
||
|
return clusters, ipfsMocks
|
||
|
}
|
||
|
|
||
|
func shutdownClusters(t *testing.T, clusters []*Cluster, m []*test.IpfsMock) {
|
||
|
for i, c := range clusters {
|
||
|
shutdownCluster(t, c, m[i])
|
||
|
}
|
||
|
os.RemoveAll(testsFolder)
|
||
|
}
|
||
|
|
||
|
func shutdownCluster(t *testing.T, c *Cluster, m *test.IpfsMock) {
|
||
|
err := c.Shutdown(context.Background())
|
||
|
if err != nil {
|
||
|
t.Error(err)
|
||
|
}
|
||
|
c.dht.Close()
|
||
|
c.host.Close()
|
||
|
c.datastore.Close()
|
||
|
m.Close()
|
||
|
}
|
||
|
|
||
|
func collectGlobalPinInfos(t *testing.T, out <-chan api.GlobalPinInfo, timeout time.Duration) []api.GlobalPinInfo {
|
||
|
t.Helper()
|
||
|
|
||
|
ctx, cancel := context.WithTimeout(context.Background(), timeout)
|
||
|
defer cancel()
|
||
|
|
||
|
var gpis []api.GlobalPinInfo
|
||
|
for {
|
||
|
select {
|
||
|
case <-ctx.Done():
|
||
|
t.Error(ctx.Err())
|
||
|
return gpis
|
||
|
case gpi, ok := <-out:
|
||
|
if !ok {
|
||
|
return gpis
|
||
|
}
|
||
|
gpis = append(gpis, gpi)
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
func collectPinInfos(t *testing.T, out <-chan api.PinInfo) []api.PinInfo {
|
||
|
t.Helper()
|
||
|
|
||
|
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
||
|
defer cancel()
|
||
|
|
||
|
var pis []api.PinInfo
|
||
|
for {
|
||
|
select {
|
||
|
case <-ctx.Done():
|
||
|
t.Error(ctx.Err())
|
||
|
return pis
|
||
|
case pi, ok := <-out:
|
||
|
if !ok {
|
||
|
return pis
|
||
|
}
|
||
|
pis = append(pis, pi)
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
func runF(t *testing.T, clusters []*Cluster, f func(*testing.T, *Cluster)) {
|
||
|
t.Helper()
|
||
|
var wg sync.WaitGroup
|
||
|
for _, c := range clusters {
|
||
|
wg.Add(1)
|
||
|
go func(c *Cluster) {
|
||
|
defer wg.Done()
|
||
|
f(t, c)
|
||
|
}(c)
|
||
|
|
||
|
}
|
||
|
wg.Wait()
|
||
|
}
|
||
|
|
||
|
//////////////////////////////////////
|
||
|
// Delay and wait functions
|
||
|
//
|
||
|
// Delays are used in tests to wait for certain events to happen:
|
||
|
// * ttlDelay() waits for metrics to arrive. If you pin something
|
||
|
// and your next operation depends on updated metrics, you need to wait
|
||
|
// * pinDelay() accounts for the time necessary to pin something and for the new
|
||
|
// log entry to be visible in all cluster peers
|
||
|
// * delay just sleeps a second or two.
|
||
|
// * waitForLeader functions make sure there is a raft leader, for example,
|
||
|
// after killing the leader.
|
||
|
//
|
||
|
// The values for delays are a result of testing and adjusting so tests pass
|
||
|
// in travis, jenkins etc., taking into account the values used in the
|
||
|
// testing configuration (config_test.go).
|
||
|
func delay() {
|
||
|
var d int
|
||
|
if nClusters > 10 {
|
||
|
d = 3000
|
||
|
} else {
|
||
|
d = 2000
|
||
|
}
|
||
|
time.Sleep(time.Duration(d) * time.Millisecond)
|
||
|
}
|
||
|
|
||
|
func pinDelay() {
|
||
|
time.Sleep(800 * time.Millisecond)
|
||
|
}
|
||
|
|
||
|
func ttlDelay() {
|
||
|
time.Sleep(ttlDelayTime)
|
||
|
}
|
||
|
|
||
|
// Like waitForLeader but letting metrics expire before waiting, and
|
||
|
// waiting for new metrics to arrive afterwards.
|
||
|
func waitForLeaderAndMetrics(t *testing.T, clusters []*Cluster) {
|
||
|
ttlDelay()
|
||
|
waitForLeader(t, clusters)
|
||
|
ttlDelay()
|
||
|
}
|
||
|
|
||
|
// Makes sure there is a leader and everyone knows about it.
|
||
|
func waitForLeader(t *testing.T, clusters []*Cluster) {
|
||
|
if consensus == "crdt" {
|
||
|
return // yai
|
||
|
}
|
||
|
ctx := context.Background()
|
||
|
timer := time.NewTimer(time.Minute)
|
||
|
ticker := time.NewTicker(100 * time.Millisecond)
|
||
|
|
||
|
loop:
|
||
|
for {
|
||
|
select {
|
||
|
case <-timer.C:
|
||
|
t.Fatal("timed out waiting for a leader")
|
||
|
case <-ticker.C:
|
||
|
for _, cl := range clusters {
|
||
|
if cl.shutdownB {
|
||
|
continue // skip shutdown clusters
|
||
|
}
|
||
|
_, err := cl.consensus.Leader(ctx)
|
||
|
if err != nil {
|
||
|
continue loop
|
||
|
}
|
||
|
}
|
||
|
break loop
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
func waitForClustersHealthy(t *testing.T, clusters []*Cluster) {
|
||
|
t.Helper()
|
||
|
if len(clusters) == 0 {
|
||
|
return
|
||
|
}
|
||
|
|
||
|
timer := time.NewTimer(15 * time.Second)
|
||
|
for {
|
||
|
ttlDelay()
|
||
|
metrics := clusters[0].monitor.LatestMetrics(context.Background(), clusters[0].informers[0].Name())
|
||
|
healthy := 0
|
||
|
for _, m := range metrics {
|
||
|
if !m.Expired() {
|
||
|
healthy++
|
||
|
}
|
||
|
}
|
||
|
if len(clusters) == healthy {
|
||
|
return
|
||
|
}
|
||
|
|
||
|
select {
|
||
|
case <-timer.C:
|
||
|
t.Fatal("timed out waiting for clusters to be healthy")
|
||
|
default:
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/////////////////////////////////////////
|
||
|
|
||
|
func TestClustersVersion(t *testing.T) {
|
||
|
clusters, mock := createClusters(t)
|
||
|
defer shutdownClusters(t, clusters, mock)
|
||
|
f := func(t *testing.T, c *Cluster) {
|
||
|
v := c.Version()
|
||
|
if v != version.Version.String() {
|
||
|
t.Error("Bad version")
|
||
|
}
|
||
|
}
|
||
|
runF(t, clusters, f)
|
||
|
}
|
||
|
|
||
|
func TestClustersPeers(t *testing.T) {
|
||
|
ctx := context.Background()
|
||
|
clusters, mock := createClusters(t)
|
||
|
defer shutdownClusters(t, clusters, mock)
|
||
|
|
||
|
delay()
|
||
|
|
||
|
j := rand.Intn(nClusters) // choose a random cluster peer
|
||
|
|
||
|
out := make(chan api.ID, len(clusters))
|
||
|
clusters[j].Peers(ctx, out)
|
||
|
|
||
|
if len(out) != nClusters {
|
||
|
t.Fatal("expected as many peers as clusters")
|
||
|
}
|
||
|
|
||
|
clusterIDMap := make(map[peer.ID]api.ID)
|
||
|
peerIDMap := make(map[peer.ID]api.ID)
|
||
|
|
||
|
for _, c := range clusters {
|
||
|
id := c.ID(ctx)
|
||
|
clusterIDMap[id.ID] = id
|
||
|
}
|
||
|
|
||
|
for p := range out {
|
||
|
if p.Error != "" {
|
||
|
t.Error(p.ID, p.Error)
|
||
|
continue
|
||
|
}
|
||
|
peerIDMap[p.ID] = p
|
||
|
}
|
||
|
|
||
|
for k, id := range clusterIDMap {
|
||
|
id2, ok := peerIDMap[k]
|
||
|
if !ok {
|
||
|
t.Fatal("expected id in both maps")
|
||
|
}
|
||
|
//if !crypto.KeyEqual(id.PublicKey, id2.PublicKey) {
|
||
|
// t.Error("expected same public key")
|
||
|
//}
|
||
|
if id.IPFS.ID != id2.IPFS.ID {
|
||
|
t.Error("expected same ipfs daemon ID")
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
func TestClustersPin(t *testing.T) {
|
||
|
ctx := context.Background()
|
||
|
clusters, mock := createClusters(t)
|
||
|
defer shutdownClusters(t, clusters, mock)
|
||
|
prefix := test.Cid1.Prefix()
|
||
|
|
||
|
ttlDelay()
|
||
|
|
||
|
for i := 0; i < nPins; i++ {
|
||
|
j := rand.Intn(nClusters) // choose a random cluster peer
|
||
|
h, err := prefix.Sum(randomBytes()) // create random cid
|
||
|
if err != nil {
|
||
|
t.Fatal(err)
|
||
|
}
|
||
|
_, err = clusters[j].Pin(ctx, api.NewCid(h), api.PinOptions{})
|
||
|
if err != nil {
|
||
|
t.Errorf("error pinning %s: %s", h, err)
|
||
|
}
|
||
|
// // Test re-pin
|
||
|
// err = clusters[j].Pin(ctx, api.PinCid(h))
|
||
|
// if err != nil {
|
||
|
// t.Errorf("error repinning %s: %s", h, err)
|
||
|
// }
|
||
|
}
|
||
|
switch consensus {
|
||
|
case "crdt":
|
||
|
time.Sleep(10 * time.Second)
|
||
|
default:
|
||
|
delay()
|
||
|
}
|
||
|
fpinned := func(t *testing.T, c *Cluster) {
|
||
|
out := make(chan api.PinInfo, 10)
|
||
|
|
||
|
go func() {
|
||
|
err := c.tracker.StatusAll(ctx, api.TrackerStatusUndefined, out)
|
||
|
if err != nil {
|
||
|
t.Error(err)
|
||
|
}
|
||
|
}()
|
||
|
|
||
|
status := collectPinInfos(t, out)
|
||
|
|
||
|
for _, v := range status {
|
||
|
if v.Status != api.TrackerStatusPinned {
|
||
|
t.Errorf("%s should have been pinned but it is %s", v.Cid, v.Status)
|
||
|
}
|
||
|
}
|
||
|
if l := len(status); l != nPins {
|
||
|
t.Errorf("Pinned %d out of %d requests", l, nPins)
|
||
|
}
|
||
|
}
|
||
|
runF(t, clusters, fpinned)
|
||
|
|
||
|
// Unpin everything
|
||
|
pinList, err := clusters[0].pinsSlice(ctx)
|
||
|
if err != nil {
|
||
|
t.Fatal(err)
|
||
|
}
|
||
|
|
||
|
if len(pinList) != nPins {
|
||
|
t.Fatalf("pin list has %d but pinned %d", len(pinList), nPins)
|
||
|
}
|
||
|
|
||
|
for i := 0; i < len(pinList); i++ {
|
||
|
// test re-unpin fails
|
||
|
j := rand.Intn(nClusters) // choose a random cluster peer
|
||
|
_, err := clusters[j].Unpin(ctx, pinList[i].Cid)
|
||
|
if err != nil {
|
||
|
t.Errorf("error unpinning %s: %s", pinList[i].Cid, err)
|
||
|
}
|
||
|
}
|
||
|
|
||
|
switch consensus {
|
||
|
case "crdt":
|
||
|
time.Sleep(10 * time.Second)
|
||
|
default:
|
||
|
delay()
|
||
|
}
|
||
|
|
||
|
for i := 0; i < len(pinList); i++ {
|
||
|
j := rand.Intn(nClusters) // choose a random cluster peer
|
||
|
_, err := clusters[j].Unpin(ctx, pinList[i].Cid)
|
||
|
if err == nil {
|
||
|
t.Errorf("expected error re-unpinning %s", pinList[i].Cid)
|
||
|
}
|
||
|
}
|
||
|
|
||
|
delay()
|
||
|
|
||
|
funpinned := func(t *testing.T, c *Cluster) {
|
||
|
out := make(chan api.PinInfo)
|
||
|
go func() {
|
||
|
err := c.tracker.StatusAll(ctx, api.TrackerStatusUndefined, out)
|
||
|
if err != nil {
|
||
|
t.Error(err)
|
||
|
}
|
||
|
}()
|
||
|
|
||
|
status := collectPinInfos(t, out)
|
||
|
for _, v := range status {
|
||
|
t.Errorf("%s should have been unpinned but it is %s", v.Cid, v.Status)
|
||
|
}
|
||
|
}
|
||
|
runF(t, clusters, funpinned)
|
||
|
}
|
||
|
|
||
|
func TestClustersPinUpdate(t *testing.T) {
|
||
|
ctx := context.Background()
|
||
|
clusters, mock := createClusters(t)
|
||
|
defer shutdownClusters(t, clusters, mock)
|
||
|
prefix := test.Cid1.Prefix()
|
||
|
|
||
|
ttlDelay()
|
||
|
|
||
|
h, _ := prefix.Sum(randomBytes()) // create random cid
|
||
|
h2, _ := prefix.Sum(randomBytes()) // create random cid
|
||
|
|
||
|
_, err := clusters[0].PinUpdate(ctx, api.NewCid(h), api.NewCid(h2), api.PinOptions{})
|
||
|
if err == nil || err != state.ErrNotFound {
|
||
|
t.Fatal("pin update should fail when from is not pinned")
|
||
|
}
|
||
|
|
||
|
_, err = clusters[0].Pin(ctx, api.NewCid(h), api.PinOptions{})
|
||
|
if err != nil {
|
||
|
t.Errorf("error pinning %s: %s", h, err)
|
||
|
}
|
||
|
|
||
|
pinDelay()
|
||
|
expiry := time.Now().AddDate(1, 0, 0)
|
||
|
opts2 := api.PinOptions{
|
||
|
UserAllocations: []peer.ID{clusters[0].host.ID()}, // should not be used
|
||
|
PinUpdate: api.NewCid(h),
|
||
|
Name: "new name",
|
||
|
ExpireAt: expiry,
|
||
|
}
|
||
|
|
||
|
_, err = clusters[0].Pin(ctx, api.NewCid(h2), opts2) // should call PinUpdate
|
||
|
if err != nil {
|
||
|
t.Errorf("error pin-updating %s: %s", h2, err)
|
||
|
}
|
||
|
|
||
|
pinDelay()
|
||
|
|
||
|
f := func(t *testing.T, c *Cluster) {
|
||
|
pinget, err := c.PinGet(ctx, api.NewCid(h2))
|
||
|
if err != nil {
|
||
|
t.Fatal(err)
|
||
|
}
|
||
|
|
||
|
if len(pinget.Allocations) != 0 {
|
||
|
t.Error("new pin should be allocated everywhere like pin1")
|
||
|
}
|
||
|
|
||
|
if pinget.MaxDepth != -1 {
|
||
|
t.Error("updated pin should be recursive like pin1")
|
||
|
}
|
||
|
// We compare Unix seconds because our protobuf serde will have
|
||
|
// lost any sub-second precision.
|
||
|
if pinget.ExpireAt.Unix() != expiry.Unix() {
|
||
|
t.Errorf("Expiry didn't match. Expected: %s. Got: %s", expiry, pinget.ExpireAt)
|
||
|
}
|
||
|
|
||
|
if pinget.Name != "new name" {
|
||
|
t.Error("name should be kept")
|
||
|
}
|
||
|
}
|
||
|
runF(t, clusters, f)
|
||
|
}
|
||
|
|
||
|
func TestClustersPinDirect(t *testing.T) {
|
||
|
ctx := context.Background()
|
||
|
clusters, mock := createClusters(t)
|
||
|
defer shutdownClusters(t, clusters, mock)
|
||
|
prefix := test.Cid1.Prefix()
|
||
|
|
||
|
ttlDelay()
|
||
|
|
||
|
h, _ := prefix.Sum(randomBytes()) // create random cid
|
||
|
|
||
|
_, err := clusters[0].Pin(ctx, api.NewCid(h), api.PinOptions{Mode: api.PinModeDirect})
|
||
|
if err != nil {
|
||
|
t.Fatal(err)
|
||
|
}
|
||
|
|
||
|
pinDelay()
|
||
|
|
||
|
f := func(t *testing.T, c *Cluster, mode api.PinMode) {
|
||
|
pinget, err := c.PinGet(ctx, api.NewCid(h))
|
||
|
if err != nil {
|
||
|
t.Fatal(err)
|
||
|
}
|
||
|
|
||
|
if pinget.Mode != mode {
|
||
|
t.Error("pin should be pinned in direct mode")
|
||
|
}
|
||
|
|
||
|
if pinget.MaxDepth != mode.ToPinDepth() {
|
||
|
t.Errorf("pin should have max-depth %d but has %d", mode.ToPinDepth(), pinget.MaxDepth)
|
||
|
}
|
||
|
|
||
|
pInfo := c.StatusLocal(ctx, api.NewCid(h))
|
||
|
if pInfo.Error != "" {
|
||
|
t.Error(pInfo.Error)
|
||
|
}
|
||
|
if pInfo.Status != api.TrackerStatusPinned {
|
||
|
t.Error(pInfo.Error)
|
||
|
t.Error("the status should show the hash as pinned")
|
||
|
}
|
||
|
}
|
||
|
|
||
|
runF(t, clusters, func(t *testing.T, c *Cluster) {
|
||
|
f(t, c, api.PinModeDirect)
|
||
|
})
|
||
|
|
||
|
// Convert into a recursive mode
|
||
|
_, err = clusters[0].Pin(ctx, api.NewCid(h), api.PinOptions{Mode: api.PinModeRecursive})
|
||
|
if err != nil {
|
||
|
t.Fatal(err)
|
||
|
}
|
||
|
|
||
|
pinDelay()
|
||
|
|
||
|
runF(t, clusters, func(t *testing.T, c *Cluster) {
|
||
|
f(t, c, api.PinModeRecursive)
|
||
|
})
|
||
|
|
||
|
// This should fail as we cannot convert back to direct
|
||
|
_, err = clusters[0].Pin(ctx, api.NewCid(h), api.PinOptions{Mode: api.PinModeDirect})
|
||
|
if err == nil {
|
||
|
t.Error("a recursive pin cannot be converted back to direct pin")
|
||
|
}
|
||
|
}
|
||
|
|
||
|
func TestClustersStatusAll(t *testing.T) {
|
||
|
ctx := context.Background()
|
||
|
clusters, mock := createClusters(t)
|
||
|
defer shutdownClusters(t, clusters, mock)
|
||
|
h := test.Cid1
|
||
|
clusters[0].Pin(ctx, h, api.PinOptions{Name: "test"})
|
||
|
pinDelay()
|
||
|
// Global status
|
||
|
f := func(t *testing.T, c *Cluster) {
|
||
|
out := make(chan api.GlobalPinInfo, 10)
|
||
|
go func() {
|
||
|
err := c.StatusAll(ctx, api.TrackerStatusUndefined, out)
|
||
|
if err != nil {
|
||
|
t.Error(err)
|
||
|
}
|
||
|
}()
|
||
|
|
||
|
statuses := collectGlobalPinInfos(t, out, 5*time.Second)
|
||
|
if len(statuses) != 1 {
|
||
|
t.Fatal("bad status. Expected one item")
|
||
|
}
|
||
|
if !statuses[0].Cid.Equals(h) {
|
||
|
t.Error("bad cid in status")
|
||
|
}
|
||
|
|
||
|
if statuses[0].Name != "test" {
|
||
|
t.Error("globalPinInfo should have the name")
|
||
|
}
|
||
|
|
||
|
info := statuses[0].PeerMap
|
||
|
if len(info) != nClusters {
|
||
|
t.Error("bad info in status")
|
||
|
}
|
||
|
|
||
|
for _, pi := range info {
|
||
|
if pi.IPFS != test.PeerID1 {
|
||
|
t.Error("ipfs not set in pin status")
|
||
|
}
|
||
|
}
|
||
|
|
||
|
pid := peer.Encode(c.host.ID())
|
||
|
if info[pid].Status != api.TrackerStatusPinned {
|
||
|
t.Error("the hash should have been pinned")
|
||
|
}
|
||
|
|
||
|
status, err := c.Status(ctx, h)
|
||
|
if err != nil {
|
||
|
t.Error(err)
|
||
|
}
|
||
|
|
||
|
pinfo, ok := status.PeerMap[pid]
|
||
|
if !ok {
|
||
|
t.Fatal("Host not in status")
|
||
|
}
|
||
|
|
||
|
if pinfo.Status != api.TrackerStatusPinned {
|
||
|
t.Error(pinfo.Error)
|
||
|
t.Error("the status should show the hash as pinned")
|
||
|
}
|
||
|
}
|
||
|
runF(t, clusters, f)
|
||
|
}
|
||
|
|
||
|
func TestClustersStatusAllWithErrors(t *testing.T) {
|
||
|
ctx := context.Background()
|
||
|
clusters, mock := createClusters(t)
|
||
|
defer shutdownClusters(t, clusters, mock)
|
||
|
h := test.Cid1
|
||
|
clusters[0].Pin(ctx, h, api.PinOptions{Name: "test"})
|
||
|
pinDelay()
|
||
|
|
||
|
// shutdown 1 cluster peer
|
||
|
clusters[1].Shutdown(ctx)
|
||
|
clusters[1].host.Close()
|
||
|
delay()
|
||
|
|
||
|
f := func(t *testing.T, c *Cluster) {
|
||
|
// skip if it's the shutdown peer
|
||
|
if c.ID(ctx).ID == clusters[1].ID(ctx).ID {
|
||
|
return
|
||
|
}
|
||
|
|
||
|
out := make(chan api.GlobalPinInfo, 10)
|
||
|
go func() {
|
||
|
err := c.StatusAll(ctx, api.TrackerStatusUndefined, out)
|
||
|
if err != nil {
|
||
|
t.Error(err)
|
||
|
}
|
||
|
}()
|
||
|
|
||
|
statuses := collectGlobalPinInfos(t, out, 5*time.Second)
|
||
|
|
||
|
if len(statuses) != 1 {
|
||
|
t.Fatal("bad status. Expected one item")
|
||
|
}
|
||
|
|
||
|
if !statuses[0].Cid.Equals(h) {
|
||
|
t.Error("wrong Cid in globalPinInfo")
|
||
|
}
|
||
|
|
||
|
if statuses[0].Name != "test" {
|
||
|
t.Error("wrong Name in globalPinInfo")
|
||
|
}
|
||
|
|
||
|
// Raft and CRDT behave differently here
|
||
|
switch consensus {
|
||
|
case "raft":
|
||
|
// Raft will have all statuses with one of them
|
||
|
// being in ERROR because the peer is off
|
||
|
|
||
|
stts := statuses[0]
|
||
|
if len(stts.PeerMap) != nClusters {
|
||
|
t.Error("bad number of peers in status")
|
||
|
}
|
||
|
|
||
|
pid := peer.Encode(clusters[1].id)
|
||
|
errst := stts.PeerMap[pid]
|
||
|
|
||
|
if errst.Status != api.TrackerStatusClusterError {
|
||
|
t.Error("erroring status should be set to ClusterError:", errst.Status)
|
||
|
}
|
||
|
if errst.PeerName != "peer_1" {
|
||
|
t.Error("peername should have been set in the erroring peer too from the cache")
|
||
|
}
|
||
|
|
||
|
if errst.IPFS != test.PeerID1 {
|
||
|
t.Error("IPFS ID should have been set in the erroring peer too from the cache")
|
||
|
}
|
||
|
|
||
|
// now check with Cid status
|
||
|
status, err := c.Status(ctx, h)
|
||
|
if err != nil {
|
||
|
t.Error(err)
|
||
|
}
|
||
|
|
||
|
pinfo := status.PeerMap[pid]
|
||
|
|
||
|
if pinfo.Status != api.TrackerStatusClusterError {
|
||
|
t.Error("erroring status should be ClusterError:", pinfo.Status)
|
||
|
}
|
||
|
|
||
|
if pinfo.PeerName != "peer_1" {
|
||
|
t.Error("peername should have been set in the erroring peer too from the cache")
|
||
|
}
|
||
|
|
||
|
if pinfo.IPFS != test.PeerID1 {
|
||
|
t.Error("IPFS ID should have been set in the erroring peer too from the cache")
|
||
|
}
|
||
|
case "crdt":
|
||
|
// CRDT will not have contacted the offline peer because
|
||
|
// its metric expired and therefore is not in the
|
||
|
// peerset.
|
||
|
if len(statuses[0].PeerMap) != nClusters-1 {
|
||
|
t.Error("expected a different number of statuses")
|
||
|
}
|
||
|
default:
|
||
|
t.Fatal("bad consensus")
|
||
|
|
||
|
}
|
||
|
|
||
|
}
|
||
|
runF(t, clusters, f)
|
||
|
}
|
||
|
|
||
|
func TestClustersRecoverLocal(t *testing.T) {
|
||
|
ctx := context.Background()
|
||
|
clusters, mock := createClusters(t)
|
||
|
defer shutdownClusters(t, clusters, mock)
|
||
|
h := test.ErrorCid // This cid always fails
|
||
|
h2 := test.Cid2
|
||
|
|
||
|
ttlDelay()
|
||
|
|
||
|
clusters[0].Pin(ctx, h, api.PinOptions{})
|
||
|
clusters[0].Pin(ctx, h2, api.PinOptions{})
|
||
|
pinDelay()
|
||
|
pinDelay()
|
||
|
|
||
|
f := func(t *testing.T, c *Cluster) {
|
||
|
_, err := c.RecoverLocal(ctx, h)
|
||
|
if err != nil {
|
||
|
t.Fatal(err)
|
||
|
}
|
||
|
// Wait for queue to be processed
|
||
|
delay()
|
||
|
|
||
|
info := c.StatusLocal(ctx, h)
|
||
|
if info.Status != api.TrackerStatusPinError {
|
||
|
t.Errorf("element is %s and not PinError", info.Status)
|
||
|
}
|
||
|
|
||
|
// Recover good ID
|
||
|
info, _ = c.RecoverLocal(ctx, h2)
|
||
|
if info.Status != api.TrackerStatusPinned {
|
||
|
t.Error("element should be in Pinned state")
|
||
|
}
|
||
|
}
|
||
|
// Test Local syncs
|
||
|
runF(t, clusters, f)
|
||
|
}
|
||
|
|
||
|
func TestClustersRecover(t *testing.T) {
|
||
|
ctx := context.Background()
|
||
|
clusters, mock := createClusters(t)
|
||
|
defer shutdownClusters(t, clusters, mock)
|
||
|
h := test.ErrorCid // This cid always fails
|
||
|
h2 := test.Cid2
|
||
|
|
||
|
ttlDelay()
|
||
|
|
||
|
clusters[0].Pin(ctx, h, api.PinOptions{})
|
||
|
clusters[0].Pin(ctx, h2, api.PinOptions{})
|
||
|
|
||
|
pinDelay()
|
||
|
pinDelay()
|
||
|
|
||
|
j := rand.Intn(nClusters)
|
||
|
ginfo, err := clusters[j].Recover(ctx, h)
|
||
|
if err != nil {
|
||
|
// we always attempt to return a valid response
|
||
|
// with errors contained in GlobalPinInfo
|
||
|
t.Fatal("did not expect an error")
|
||
|
}
|
||
|
if len(ginfo.PeerMap) != nClusters {
|
||
|
t.Error("number of peers do not match")
|
||
|
}
|
||
|
// Wait for queue to be processed
|
||
|
delay()
|
||
|
|
||
|
ginfo, err = clusters[j].Status(ctx, h)
|
||
|
if err != nil {
|
||
|
t.Fatal(err)
|
||
|
}
|
||
|
|
||
|
pinfo, ok := ginfo.PeerMap[peer.Encode(clusters[j].host.ID())]
|
||
|
if !ok {
|
||
|
t.Fatal("should have info for this host")
|
||
|
}
|
||
|
if pinfo.Error == "" {
|
||
|
t.Error("pinInfo error should not be empty")
|
||
|
}
|
||
|
|
||
|
for _, c := range clusters {
|
||
|
inf, ok := ginfo.PeerMap[peer.Encode(c.host.ID())]
|
||
|
if !ok {
|
||
|
t.Fatal("GlobalPinInfo should not be empty for this host")
|
||
|
}
|
||
|
|
||
|
if inf.Status != api.TrackerStatusPinError {
|
||
|
t.Logf("%+v", inf)
|
||
|
t.Error("should be PinError in all peers")
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Test with a good Cid
|
||
|
j = rand.Intn(nClusters)
|
||
|
ginfo, err = clusters[j].Recover(ctx, h2)
|
||
|
if err != nil {
|
||
|
t.Fatal(err)
|
||
|
}
|
||
|
if !ginfo.Cid.Equals(h2) {
|
||
|
t.Error("GlobalPinInfo should be for testrCid2")
|
||
|
}
|
||
|
if len(ginfo.PeerMap) != nClusters {
|
||
|
t.Error("number of peers do not match")
|
||
|
}
|
||
|
|
||
|
for _, c := range clusters {
|
||
|
inf, ok := ginfo.PeerMap[peer.Encode(c.host.ID())]
|
||
|
if !ok {
|
||
|
t.Fatal("GlobalPinInfo should have this cluster")
|
||
|
}
|
||
|
if inf.Status != api.TrackerStatusPinned {
|
||
|
t.Error("the GlobalPinInfo should show Pinned in all peers")
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
func TestClustersRecoverAll(t *testing.T) {
|
||
|
ctx := context.Background()
|
||
|
clusters, mock := createClusters(t)
|
||
|
defer shutdownClusters(t, clusters, mock)
|
||
|
h1 := test.Cid1
|
||
|
hError := test.ErrorCid
|
||
|
|
||
|
ttlDelay()
|
||
|
|
||
|
clusters[0].Pin(ctx, h1, api.PinOptions{})
|
||
|
clusters[0].Pin(ctx, hError, api.PinOptions{})
|
||
|
|
||
|
pinDelay()
|
||
|
|
||
|
out := make(chan api.GlobalPinInfo)
|
||
|
go func() {
|
||
|
err := clusters[rand.Intn(nClusters)].RecoverAll(ctx, out)
|
||
|
if err != nil {
|
||
|
t.Error(err)
|
||
|
}
|
||
|
}()
|
||
|
|
||
|
gInfos := collectGlobalPinInfos(t, out, 5*time.Second)
|
||
|
|
||
|
if len(gInfos) != 1 {
|
||
|
t.Error("expected one items")
|
||
|
}
|
||
|
|
||
|
for _, gInfo := range gInfos {
|
||
|
if len(gInfo.PeerMap) != nClusters {
|
||
|
t.Error("number of peers do not match")
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
func TestClustersShutdown(t *testing.T) {
|
||
|
ctx := context.Background()
|
||
|
clusters, mock := createClusters(t)
|
||
|
defer shutdownClusters(t, clusters, mock)
|
||
|
|
||
|
f := func(t *testing.T, c *Cluster) {
|
||
|
err := c.Shutdown(ctx)
|
||
|
if err != nil {
|
||
|
t.Error("should be able to shutdown cleanly")
|
||
|
}
|
||
|
}
|
||
|
// Shutdown 3 times
|
||
|
runF(t, clusters, f)
|
||
|
runF(t, clusters, f)
|
||
|
runF(t, clusters, f)
|
||
|
}
|
||
|
|
||
|
func TestClustersReplicationOverall(t *testing.T) {
|
||
|
ctx := context.Background()
|
||
|
clusters, mock := createClusters(t)
|
||
|
defer shutdownClusters(t, clusters, mock)
|
||
|
for _, c := range clusters {
|
||
|
c.config.ReplicationFactorMin = nClusters - 1
|
||
|
c.config.ReplicationFactorMax = nClusters - 1
|
||
|
}
|
||
|
|
||
|
// Why is replication factor nClusters - 1?
|
||
|
// Because that way we know that pinning nCluster
|
||
|
// pins with an strategy like numpins/disk
|
||
|
// will result in each peer holding locally exactly
|
||
|
// nCluster pins.
|
||
|
|
||
|
prefix := test.Cid1.Prefix()
|
||
|
|
||
|
for i := 0; i < nClusters; i++ {
|
||
|
// Pick a random cluster and hash
|
||
|
j := rand.Intn(nClusters) // choose a random cluster peer
|
||
|
h, err := prefix.Sum(randomBytes()) // create random cid
|
||
|
if err != nil {
|
||
|
t.Fatal(err)
|
||
|
}
|
||
|
_, err = clusters[j].Pin(ctx, api.NewCid(h), api.PinOptions{})
|
||
|
if err != nil {
|
||
|
t.Error(err)
|
||
|
}
|
||
|
pinDelay()
|
||
|
|
||
|
// check that it is held by exactly nClusters - 1 peers
|
||
|
gpi, err := clusters[j].Status(ctx, api.NewCid(h))
|
||
|
if err != nil {
|
||
|
t.Fatal(err)
|
||
|
}
|
||
|
|
||
|
numLocal := 0
|
||
|
numRemote := 0
|
||
|
for _, v := range gpi.PeerMap {
|
||
|
if v.Status == api.TrackerStatusPinned {
|
||
|
numLocal++
|
||
|
} else if v.Status == api.TrackerStatusRemote {
|
||
|
numRemote++
|
||
|
}
|
||
|
}
|
||
|
if numLocal != nClusters-1 {
|
||
|
t.Errorf(
|
||
|
"We wanted replication %d but it's only %d",
|
||
|
nClusters-1,
|
||
|
numLocal,
|
||
|
)
|
||
|
}
|
||
|
|
||
|
if numRemote != 1 {
|
||
|
t.Errorf("We wanted 1 peer track as remote but %d do", numRemote)
|
||
|
}
|
||
|
ttlDelay()
|
||
|
}
|
||
|
|
||
|
f := func(t *testing.T, c *Cluster) {
|
||
|
// confirm that the pintracker state matches the current global state
|
||
|
out := make(chan api.PinInfo, 100)
|
||
|
|
||
|
go func() {
|
||
|
err := c.tracker.StatusAll(ctx, api.TrackerStatusUndefined, out)
|
||
|
if err != nil {
|
||
|
t.Error(err)
|
||
|
}
|
||
|
}()
|
||
|
pinfos := collectPinInfos(t, out)
|
||
|
if len(pinfos) != nClusters {
|
||
|
t.Error("Pinfos does not have the expected pins")
|
||
|
}
|
||
|
|
||
|
numRemote := 0
|
||
|
numLocal := 0
|
||
|
for _, pi := range pinfos {
|
||
|
switch pi.Status {
|
||
|
case api.TrackerStatusPinned:
|
||
|
numLocal++
|
||
|
|
||
|
case api.TrackerStatusRemote:
|
||
|
numRemote++
|
||
|
}
|
||
|
}
|
||
|
if numLocal != nClusters-1 {
|
||
|
t.Errorf("%s: Expected %d local pins but got %d", c.id.String(), nClusters-1, numLocal)
|
||
|
}
|
||
|
|
||
|
if numRemote != 1 {
|
||
|
t.Errorf("%s: Expected 1 remote pin but got %d", c.id.String(), numRemote)
|
||
|
}
|
||
|
|
||
|
outPins := make(chan api.Pin)
|
||
|
go func() {
|
||
|
err := c.Pins(ctx, outPins)
|
||
|
if err != nil {
|
||
|
t.Error(err)
|
||
|
}
|
||
|
}()
|
||
|
for pin := range outPins {
|
||
|
allocs := pin.Allocations
|
||
|
if len(allocs) != nClusters-1 {
|
||
|
t.Errorf("Allocations are [%s]", allocs)
|
||
|
}
|
||
|
for _, a := range allocs {
|
||
|
if a == c.id {
|
||
|
pinfo := c.tracker.Status(ctx, pin.Cid)
|
||
|
if pinfo.Status != api.TrackerStatusPinned {
|
||
|
t.Errorf("Peer %s was allocated but it is not pinning cid", c.id)
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
runF(t, clusters, f)
|
||
|
}
|
||
|
|
||
|
// This test checks that we pin with ReplicationFactorMax when
|
||
|
// we can
|
||
|
func TestClustersReplicationFactorMax(t *testing.T) {
|
||
|
ctx := context.Background()
|
||
|
if nClusters < 3 {
|
||
|
t.Skip("Need at least 3 peers")
|
||
|
}
|
||
|
|
||
|
clusters, mock := createClusters(t)
|
||
|
defer shutdownClusters(t, clusters, mock)
|
||
|
for _, c := range clusters {
|
||
|
c.config.ReplicationFactorMin = 1
|
||
|
c.config.ReplicationFactorMax = nClusters - 1
|
||
|
}
|
||
|
|
||
|
ttlDelay()
|
||
|
|
||
|
h := test.Cid1
|
||
|
_, err := clusters[0].Pin(ctx, h, api.PinOptions{})
|
||
|
if err != nil {
|
||
|
t.Fatal(err)
|
||
|
}
|
||
|
|
||
|
pinDelay()
|
||
|
|
||
|
f := func(t *testing.T, c *Cluster) {
|
||
|
p, err := c.PinGet(ctx, h)
|
||
|
if err != nil {
|
||
|
t.Fatal(err)
|
||
|
}
|
||
|
|
||
|
if len(p.Allocations) != nClusters-1 {
|
||
|
t.Error("should have pinned nClusters - 1 allocations")
|
||
|
}
|
||
|
|
||
|
if p.ReplicationFactorMin != 1 {
|
||
|
t.Error("rplMin should be 1")
|
||
|
}
|
||
|
|
||
|
if p.ReplicationFactorMax != nClusters-1 {
|
||
|
t.Error("rplMax should be nClusters-1")
|
||
|
}
|
||
|
}
|
||
|
runF(t, clusters, f)
|
||
|
}
|
||
|
|
||
|
// This tests checks that repinning something that is overpinned
|
||
|
// removes some allocations
|
||
|
func TestClustersReplicationFactorMaxLower(t *testing.T) {
|
||
|
ctx := context.Background()
|
||
|
if nClusters < 5 {
|
||
|
t.Skip("Need at least 5 peers")
|
||
|
}
|
||
|
|
||
|
clusters, mock := createClusters(t)
|
||
|
defer shutdownClusters(t, clusters, mock)
|
||
|
for _, c := range clusters {
|
||
|
c.config.ReplicationFactorMin = 1
|
||
|
c.config.ReplicationFactorMax = nClusters
|
||
|
}
|
||
|
|
||
|
ttlDelay() // make sure we have places to pin
|
||
|
|
||
|
h := test.Cid1
|
||
|
_, err := clusters[0].Pin(ctx, h, api.PinOptions{})
|
||
|
if err != nil {
|
||
|
t.Fatal(err)
|
||
|
}
|
||
|
|
||
|
pinDelay()
|
||
|
|
||
|
p1, err := clusters[0].PinGet(ctx, h)
|
||
|
if err != nil {
|
||
|
t.Fatal(err)
|
||
|
}
|
||
|
|
||
|
if len(p1.Allocations) != nClusters {
|
||
|
t.Fatal("allocations should be nClusters")
|
||
|
}
|
||
|
|
||
|
opts := api.PinOptions{
|
||
|
ReplicationFactorMin: 1,
|
||
|
ReplicationFactorMax: 2,
|
||
|
}
|
||
|
_, err = clusters[0].Pin(ctx, h, opts)
|
||
|
if err != nil {
|
||
|
t.Fatal(err)
|
||
|
}
|
||
|
|
||
|
pinDelay()
|
||
|
|
||
|
p2, err := clusters[0].PinGet(ctx, h)
|
||
|
if err != nil {
|
||
|
t.Fatal(err)
|
||
|
}
|
||
|
|
||
|
if len(p2.Allocations) != 2 {
|
||
|
t.Fatal("allocations should have been reduced to 2")
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// This test checks that when not all nodes are available,
|
||
|
// we pin in as many as we can aiming for ReplicationFactorMax
|
||
|
func TestClustersReplicationFactorInBetween(t *testing.T) {
|
||
|
ctx := context.Background()
|
||
|
if nClusters < 5 {
|
||
|
t.Skip("Need at least 5 peers")
|
||
|
}
|
||
|
|
||
|
clusters, mock := createClusters(t)
|
||
|
defer shutdownClusters(t, clusters, mock)
|
||
|
for _, c := range clusters {
|
||
|
c.config.ReplicationFactorMin = 1
|
||
|
c.config.ReplicationFactorMax = nClusters
|
||
|
}
|
||
|
|
||
|
ttlDelay()
|
||
|
|
||
|
// Shutdown two peers
|
||
|
clusters[nClusters-1].Shutdown(ctx)
|
||
|
clusters[nClusters-2].Shutdown(ctx)
|
||
|
|
||
|
waitForLeaderAndMetrics(t, clusters)
|
||
|
|
||
|
h := test.Cid1
|
||
|
_, err := clusters[0].Pin(ctx, h, api.PinOptions{})
|
||
|
if err != nil {
|
||
|
t.Fatal(err)
|
||
|
}
|
||
|
|
||
|
pinDelay()
|
||
|
|
||
|
f := func(t *testing.T, c *Cluster) {
|
||
|
if c == clusters[nClusters-1] || c == clusters[nClusters-2] {
|
||
|
return
|
||
|
}
|
||
|
p, err := c.PinGet(ctx, h)
|
||
|
if err != nil {
|
||
|
t.Fatal(err)
|
||
|
}
|
||
|
|
||
|
if len(p.Allocations) != nClusters-2 {
|
||
|
t.Error("should have pinned nClusters-2 allocations")
|
||
|
}
|
||
|
|
||
|
if p.ReplicationFactorMin != 1 {
|
||
|
t.Error("rplMin should be 1")
|
||
|
}
|
||
|
|
||
|
if p.ReplicationFactorMax != nClusters {
|
||
|
t.Error("rplMax should be nClusters")
|
||
|
}
|
||
|
}
|
||
|
runF(t, clusters, f)
|
||
|
}
|
||
|
|
||
|
// This test checks that we do not pin something for which
|
||
|
// we cannot reach ReplicationFactorMin
|
||
|
func TestClustersReplicationFactorMin(t *testing.T) {
|
||
|
ctx := context.Background()
|
||
|
if nClusters < 5 {
|
||
|
t.Skip("Need at least 5 peers")
|
||
|
}
|
||
|
|
||
|
clusters, mock := createClusters(t)
|
||
|
defer shutdownClusters(t, clusters, mock)
|
||
|
for _, c := range clusters {
|
||
|
c.config.ReplicationFactorMin = nClusters - 1
|
||
|
c.config.ReplicationFactorMax = nClusters
|
||
|
}
|
||
|
|
||
|
// Shutdown two peers
|
||
|
clusters[nClusters-1].Shutdown(ctx)
|
||
|
waitForLeaderAndMetrics(t, clusters)
|
||
|
clusters[nClusters-2].Shutdown(ctx)
|
||
|
waitForLeaderAndMetrics(t, clusters)
|
||
|
|
||
|
h := test.Cid1
|
||
|
_, err := clusters[0].Pin(ctx, h, api.PinOptions{})
|
||
|
if err == nil {
|
||
|
t.Error("Pin should have failed as rplMin cannot be satisfied")
|
||
|
}
|
||
|
t.Log(err)
|
||
|
if !strings.Contains(err.Error(), "not enough peers to allocate CID") {
|
||
|
t.Fatal(err)
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// This tests checks that repinning something that has becomed
|
||
|
// underpinned actually changes nothing if it's sufficiently pinned
|
||
|
func TestClustersReplicationMinMaxNoRealloc(t *testing.T) {
|
||
|
ctx := context.Background()
|
||
|
if nClusters < 5 {
|
||
|
t.Skip("Need at least 5 peers")
|
||
|
}
|
||
|
|
||
|
clusters, mock := createClusters(t)
|
||
|
defer shutdownClusters(t, clusters, mock)
|
||
|
for _, c := range clusters {
|
||
|
c.config.ReplicationFactorMin = 1
|
||
|
c.config.ReplicationFactorMax = nClusters
|
||
|
}
|
||
|
|
||
|
ttlDelay()
|
||
|
|
||
|
h := test.Cid1
|
||
|
_, err := clusters[0].Pin(ctx, h, api.PinOptions{})
|
||
|
if err != nil {
|
||
|
t.Fatal(err)
|
||
|
}
|
||
|
|
||
|
pinDelay()
|
||
|
|
||
|
// Shutdown two peers
|
||
|
clusters[nClusters-1].Shutdown(ctx)
|
||
|
waitForLeaderAndMetrics(t, clusters)
|
||
|
clusters[nClusters-2].Shutdown(ctx)
|
||
|
waitForLeaderAndMetrics(t, clusters)
|
||
|
|
||
|
_, err = clusters[0].Pin(ctx, h, api.PinOptions{})
|
||
|
if err != nil {
|
||
|
t.Fatal(err)
|
||
|
}
|
||
|
|
||
|
pinDelay()
|
||
|
|
||
|
p, err := clusters[0].PinGet(ctx, h)
|
||
|
if err != nil {
|
||
|
t.Fatal(err)
|
||
|
}
|
||
|
|
||
|
if len(p.Allocations) != nClusters {
|
||
|
t.Error("allocations should still be nCluster even if not all available")
|
||
|
}
|
||
|
|
||
|
if p.ReplicationFactorMax != nClusters {
|
||
|
t.Error("rplMax should have not changed")
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// This test checks that repinning something that has becomed
|
||
|
// underpinned does re-allocations when it's not sufficiently
|
||
|
// pinned anymore.
|
||
|
// FIXME: The manual repin only works if the pin options changed.
|
||
|
func TestClustersReplicationMinMaxRealloc(t *testing.T) {
|
||
|
ctx := context.Background()
|
||
|
if nClusters < 5 {
|
||
|
t.Skip("Need at least 5 peers")
|
||
|
}
|
||
|
|
||
|
clusters, mock := createClusters(t)
|
||
|
defer shutdownClusters(t, clusters, mock)
|
||
|
for _, c := range clusters {
|
||
|
c.config.ReplicationFactorMin = 3
|
||
|
c.config.ReplicationFactorMax = 4
|
||
|
}
|
||
|
|
||
|
ttlDelay() // make sure metrics are in
|
||
|
|
||
|
h := test.Cid1
|
||
|
_, err := clusters[0].Pin(ctx, h, api.PinOptions{
|
||
|
Name: "a",
|
||
|
})
|
||
|
if err != nil {
|
||
|
t.Fatal(err)
|
||
|
}
|
||
|
|
||
|
pinDelay()
|
||
|
|
||
|
p, err := clusters[0].PinGet(ctx, h)
|
||
|
if err != nil {
|
||
|
t.Fatal(err)
|
||
|
}
|
||
|
|
||
|
firstAllocations := p.Allocations
|
||
|
|
||
|
peerIDMap := make(map[peer.ID]*Cluster)
|
||
|
for _, a := range clusters {
|
||
|
peerIDMap[a.id] = a
|
||
|
}
|
||
|
|
||
|
// kill two of the allocations
|
||
|
// Only the first allocated peer (or the second if the first is
|
||
|
// alerting) will automatically repin.
|
||
|
alloc1 := peerIDMap[firstAllocations[1]]
|
||
|
alloc2 := peerIDMap[firstAllocations[2]]
|
||
|
safePeer := peerIDMap[firstAllocations[0]]
|
||
|
|
||
|
alloc1.Shutdown(ctx)
|
||
|
alloc2.Shutdown(ctx)
|
||
|
|
||
|
waitForLeaderAndMetrics(t, clusters)
|
||
|
|
||
|
// Repin - (although this should have been taken of as alerts
|
||
|
// happen for the shutdown nodes. We force re-allocation by
|
||
|
// changing the name.
|
||
|
_, err = safePeer.Pin(ctx, h, api.PinOptions{
|
||
|
Name: "b",
|
||
|
})
|
||
|
if err != nil {
|
||
|
t.Fatal(err)
|
||
|
}
|
||
|
|
||
|
pinDelay()
|
||
|
|
||
|
p, err = safePeer.PinGet(ctx, h)
|
||
|
if err != nil {
|
||
|
t.Fatal(err)
|
||
|
}
|
||
|
|
||
|
secondAllocations := p.Allocations
|
||
|
|
||
|
strings1 := api.PeersToStrings(firstAllocations)
|
||
|
strings2 := api.PeersToStrings(secondAllocations)
|
||
|
sort.Strings(strings1)
|
||
|
sort.Strings(strings2)
|
||
|
t.Logf("Allocs1: %s", strings1)
|
||
|
t.Logf("Allocs2: %s", strings2)
|
||
|
|
||
|
if fmt.Sprintf("%s", strings1) == fmt.Sprintf("%s", strings2) {
|
||
|
t.Error("allocations should have changed")
|
||
|
}
|
||
|
|
||
|
lenSA := len(secondAllocations)
|
||
|
expected := minInt(nClusters-2, 4)
|
||
|
if lenSA != expected {
|
||
|
t.Errorf("Insufficient reallocation, could have allocated to %d peers but instead only allocated to %d peers", expected, lenSA)
|
||
|
}
|
||
|
|
||
|
if lenSA < 3 {
|
||
|
t.Error("allocations should be more than rplMin")
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// In this test we check that repinning something
|
||
|
// when a node has gone down will re-assign the pin
|
||
|
func TestClustersReplicationRealloc(t *testing.T) {
|
||
|
ctx := context.Background()
|
||
|
clusters, mock := createClusters(t)
|
||
|
defer shutdownClusters(t, clusters, mock)
|
||
|
for _, c := range clusters {
|
||
|
c.config.ReplicationFactorMin = nClusters - 1
|
||
|
c.config.ReplicationFactorMax = nClusters - 1
|
||
|
}
|
||
|
|
||
|
ttlDelay()
|
||
|
|
||
|
j := rand.Intn(nClusters)
|
||
|
h := test.Cid1
|
||
|
_, err := clusters[j].Pin(ctx, h, api.PinOptions{})
|
||
|
if err != nil {
|
||
|
t.Fatal(err)
|
||
|
}
|
||
|
|
||
|
// Let the pin arrive
|
||
|
pinDelay()
|
||
|
|
||
|
pinList, err := clusters[j].pinsSlice(ctx)
|
||
|
if err != nil {
|
||
|
t.Fatal(err)
|
||
|
}
|
||
|
pin := pinList[0]
|
||
|
allocs := sort.StringSlice(api.PeersToStrings(pin.Allocations))
|
||
|
allocs.Sort()
|
||
|
allocsStr := fmt.Sprintf("%s", allocs)
|
||
|
|
||
|
// Re-pin should work and be allocated to the same
|
||
|
// nodes
|
||
|
_, err = clusters[j].Pin(ctx, h, api.PinOptions{})
|
||
|
if err != nil {
|
||
|
t.Fatal(err)
|
||
|
}
|
||
|
|
||
|
pinDelay()
|
||
|
|
||
|
pinList2, err := clusters[j].pinsSlice(ctx)
|
||
|
if err != nil {
|
||
|
t.Fatal(err)
|
||
|
}
|
||
|
pin2 := pinList2[0]
|
||
|
allocs2 := sort.StringSlice(api.PeersToStrings(pin2.Allocations))
|
||
|
allocs2.Sort()
|
||
|
allocsStr2 := fmt.Sprintf("%s", allocs2)
|
||
|
if allocsStr != allocsStr2 {
|
||
|
t.Fatal("allocations changed without reason")
|
||
|
}
|
||
|
//t.Log(allocsStr)
|
||
|
//t.Log(allocsStr2)
|
||
|
|
||
|
var killedClusterIndex int
|
||
|
// find someone that pinned it and kill that cluster
|
||
|
for i, c := range clusters {
|
||
|
pinfo := c.tracker.Status(ctx, h)
|
||
|
if pinfo.Status == api.TrackerStatusPinned {
|
||
|
//t.Logf("Killing %s", c.id.Pretty())
|
||
|
killedClusterIndex = i
|
||
|
t.Logf("Shutting down %s", c.ID(ctx).ID)
|
||
|
c.Shutdown(ctx)
|
||
|
break
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// let metrics expire and give time for the cluster to
|
||
|
// see if they have lost the leader
|
||
|
waitForLeaderAndMetrics(t, clusters)
|
||
|
|
||
|
// Make sure we haven't killed our randomly
|
||
|
// selected cluster
|
||
|
for j == killedClusterIndex {
|
||
|
j = rand.Intn(nClusters)
|
||
|
}
|
||
|
|
||
|
// now pin should succeed
|
||
|
_, err = clusters[j].Pin(ctx, h, api.PinOptions{})
|
||
|
if err != nil {
|
||
|
t.Fatal(err)
|
||
|
}
|
||
|
|
||
|
pinDelay()
|
||
|
|
||
|
numPinned := 0
|
||
|
for i, c := range clusters {
|
||
|
if i == killedClusterIndex {
|
||
|
continue
|
||
|
}
|
||
|
pinfo := c.tracker.Status(ctx, h)
|
||
|
if pinfo.Status == api.TrackerStatusPinned {
|
||
|
//t.Log(pinfo.Peer.Pretty())
|
||
|
numPinned++
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if numPinned != nClusters-1 {
|
||
|
t.Error("pin should have been correctly re-assigned")
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// In this test we try to pin something when there are not
|
||
|
// as many available peers a we need. It's like before, except
|
||
|
// more peers are killed.
|
||
|
func TestClustersReplicationNotEnoughPeers(t *testing.T) {
|
||
|
ctx := context.Background()
|
||
|
if nClusters < 5 {
|
||
|
t.Skip("Need at least 5 peers")
|
||
|
}
|
||
|
clusters, mock := createClusters(t)
|
||
|
defer shutdownClusters(t, clusters, mock)
|
||
|
for _, c := range clusters {
|
||
|
c.config.ReplicationFactorMin = nClusters - 1
|
||
|
c.config.ReplicationFactorMax = nClusters - 1
|
||
|
}
|
||
|
|
||
|
ttlDelay()
|
||
|
|
||
|
j := rand.Intn(nClusters)
|
||
|
_, err := clusters[j].Pin(ctx, test.Cid1, api.PinOptions{})
|
||
|
if err != nil {
|
||
|
t.Fatal(err)
|
||
|
}
|
||
|
|
||
|
// Let the pin arrive
|
||
|
pinDelay()
|
||
|
|
||
|
clusters[0].Shutdown(ctx)
|
||
|
clusters[1].Shutdown(ctx)
|
||
|
|
||
|
waitForLeaderAndMetrics(t, clusters)
|
||
|
|
||
|
_, err = clusters[2].Pin(ctx, test.Cid2, api.PinOptions{})
|
||
|
if err == nil {
|
||
|
t.Fatal("expected an error")
|
||
|
}
|
||
|
if !strings.Contains(err.Error(), "not enough peers to allocate") {
|
||
|
t.Error("different error than expected")
|
||
|
t.Error(err)
|
||
|
}
|
||
|
//t.Log(err)
|
||
|
}
|
||
|
|
||
|
func TestClustersRebalanceOnPeerDown(t *testing.T) {
|
||
|
ctx := context.Background()
|
||
|
if nClusters < 5 {
|
||
|
t.Skip("Need at least 5 peers")
|
||
|
}
|
||
|
|
||
|
clusters, mock := createClusters(t)
|
||
|
defer shutdownClusters(t, clusters, mock)
|
||
|
for _, c := range clusters {
|
||
|
c.config.ReplicationFactorMin = nClusters - 1
|
||
|
c.config.ReplicationFactorMax = nClusters - 1
|
||
|
}
|
||
|
|
||
|
// pin something
|
||
|
h := test.Cid1
|
||
|
clusters[0].Pin(ctx, h, api.PinOptions{})
|
||
|
pinDelay()
|
||
|
pinLocal := 0
|
||
|
pinRemote := 0
|
||
|
var localPinner string
|
||
|
var remotePinner string
|
||
|
var remotePinnerCluster *Cluster
|
||
|
|
||
|
status, _ := clusters[0].Status(ctx, h)
|
||
|
|
||
|
// check it was correctly pinned
|
||
|
for p, pinfo := range status.PeerMap {
|
||
|
if pinfo.Status == api.TrackerStatusPinned {
|
||
|
pinLocal++
|
||
|
localPinner = p
|
||
|
} else if pinfo.Status == api.TrackerStatusRemote {
|
||
|
pinRemote++
|
||
|
remotePinner = p
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if pinLocal != nClusters-1 || pinRemote != 1 {
|
||
|
t.Fatal("Not pinned as expected")
|
||
|
}
|
||
|
|
||
|
// kill the local pinner
|
||
|
for _, c := range clusters {
|
||
|
clid := peer.Encode(c.id)
|
||
|
if clid == localPinner {
|
||
|
c.Shutdown(ctx)
|
||
|
} else if clid == remotePinner {
|
||
|
remotePinnerCluster = c
|
||
|
}
|
||
|
}
|
||
|
|
||
|
delay()
|
||
|
waitForLeaderAndMetrics(t, clusters) // in case we killed the leader
|
||
|
|
||
|
// It should be now pinned in the remote pinner
|
||
|
if s := remotePinnerCluster.tracker.Status(ctx, h).Status; s != api.TrackerStatusPinned {
|
||
|
t.Errorf("it should be pinned and is %s", s)
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Helper function for verifying cluster graph. Will only pass if exactly the
|
||
|
// peers in clusterIDs are fully connected to each other and the expected ipfs
|
||
|
// mock connectivity exists. Cluster peers not in clusterIDs are assumed to
|
||
|
// be disconnected and the graph should reflect this
|
||
|
func validateClusterGraph(t *testing.T, graph api.ConnectGraph, clusterIDs map[string]struct{}, peerNum int) {
|
||
|
// Check that all cluster peers see each other as peers
|
||
|
for id1, peers := range graph.ClusterLinks {
|
||
|
if _, ok := clusterIDs[id1]; !ok {
|
||
|
if len(peers) != 0 {
|
||
|
t.Errorf("disconnected peer %s is still connected in graph", id1)
|
||
|
}
|
||
|
continue
|
||
|
}
|
||
|
t.Logf("id: %s, peers: %v\n", id1, peers)
|
||
|
if len(peers) > len(clusterIDs)-1 {
|
||
|
t.Errorf("More peers recorded in graph than expected")
|
||
|
}
|
||
|
// Make lookup index for peers connected to id1
|
||
|
peerIndex := make(map[string]struct{})
|
||
|
for _, p := range peers {
|
||
|
peerIndex[peer.Encode(p)] = struct{}{}
|
||
|
}
|
||
|
for id2 := range clusterIDs {
|
||
|
if _, ok := peerIndex[id2]; id1 != id2 && !ok {
|
||
|
t.Errorf("Expected graph to see peer %s connected to peer %s", id1, id2)
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
if len(graph.ClusterLinks) != peerNum {
|
||
|
t.Errorf("Unexpected number of cluster nodes in graph")
|
||
|
}
|
||
|
|
||
|
// Check that all cluster peers are recorded as nodes in the graph
|
||
|
for id := range clusterIDs {
|
||
|
if _, ok := graph.ClusterLinks[id]; !ok {
|
||
|
t.Errorf("Expected graph to record peer %s as a node", id)
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if len(graph.ClusterTrustLinks) != peerNum {
|
||
|
t.Errorf("Unexpected number of trust links in graph")
|
||
|
}
|
||
|
|
||
|
// Check that the mocked ipfs swarm is recorded
|
||
|
if len(graph.IPFSLinks) != 1 {
|
||
|
t.Error("Expected exactly one ipfs peer for all cluster nodes, the mocked peer")
|
||
|
}
|
||
|
links, ok := graph.IPFSLinks[peer.Encode(test.PeerID1)]
|
||
|
if !ok {
|
||
|
t.Error("Expected the mocked ipfs peer to be a node in the graph")
|
||
|
} else {
|
||
|
if len(links) != 2 || links[0] != test.PeerID4 ||
|
||
|
links[1] != test.PeerID5 {
|
||
|
t.Error("Swarm peers of mocked ipfs are not those expected")
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Check that the cluster to ipfs connections are all recorded
|
||
|
for id := range clusterIDs {
|
||
|
if ipfsID, ok := graph.ClustertoIPFS[id]; !ok {
|
||
|
t.Errorf("Expected graph to record peer %s's ipfs connection", id)
|
||
|
} else {
|
||
|
if ipfsID != test.PeerID1 {
|
||
|
t.Errorf("Unexpected error %s", ipfsID)
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
if len(graph.ClustertoIPFS) > len(clusterIDs) {
|
||
|
t.Error("More cluster to ipfs links recorded in graph than expected")
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// In this test we get a cluster graph report from a random peer in a healthy
|
||
|
// fully connected cluster and verify that it is formed as expected.
|
||
|
func TestClustersGraphConnected(t *testing.T) {
|
||
|
ctx := context.Background()
|
||
|
clusters, mock := createClusters(t)
|
||
|
defer shutdownClusters(t, clusters, mock)
|
||
|
|
||
|
ttlDelay()
|
||
|
|
||
|
j := rand.Intn(nClusters) // choose a random cluster peer to query
|
||
|
graph, err := clusters[j].ConnectGraph()
|
||
|
if err != nil {
|
||
|
t.Fatal(err)
|
||
|
}
|
||
|
|
||
|
clusterIDs := make(map[string]struct{})
|
||
|
for _, c := range clusters {
|
||
|
id := peer.Encode(c.ID(ctx).ID)
|
||
|
clusterIDs[id] = struct{}{}
|
||
|
}
|
||
|
validateClusterGraph(t, graph, clusterIDs, nClusters)
|
||
|
}
|
||
|
|
||
|
// Similar to the previous test we get a cluster graph report from a peer.
|
||
|
// However now 2 peers have been shutdown and so we do not expect to see
|
||
|
// them in the graph
|
||
|
func TestClustersGraphUnhealthy(t *testing.T) {
|
||
|
ctx := context.Background()
|
||
|
clusters, mock := createClusters(t)
|
||
|
defer shutdownClusters(t, clusters, mock)
|
||
|
if nClusters < 5 {
|
||
|
t.Skip("Need at least 5 peers")
|
||
|
}
|
||
|
|
||
|
j := rand.Intn(nClusters) // choose a random cluster peer to query
|
||
|
// chose the clusters to shutdown
|
||
|
discon1 := -1
|
||
|
discon2 := -1
|
||
|
for i := range clusters {
|
||
|
if i != j {
|
||
|
if discon1 == -1 {
|
||
|
discon1 = i
|
||
|
} else {
|
||
|
discon2 = i
|
||
|
break
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
clusters[discon1].Shutdown(ctx)
|
||
|
clusters[discon1].host.Close()
|
||
|
clusters[discon2].Shutdown(ctx)
|
||
|
clusters[discon2].host.Close()
|
||
|
|
||
|
waitForLeaderAndMetrics(t, clusters)
|
||
|
|
||
|
graph, err := clusters[j].ConnectGraph()
|
||
|
if err != nil {
|
||
|
t.Fatal(err)
|
||
|
}
|
||
|
|
||
|
clusterIDs := make(map[string]struct{})
|
||
|
for i, c := range clusters {
|
||
|
if i == discon1 || i == discon2 {
|
||
|
continue
|
||
|
}
|
||
|
id := peer.Encode(c.ID(ctx).ID)
|
||
|
clusterIDs[id] = struct{}{}
|
||
|
}
|
||
|
peerNum := nClusters
|
||
|
switch consensus {
|
||
|
case "crdt":
|
||
|
peerNum = nClusters - 2
|
||
|
}
|
||
|
|
||
|
validateClusterGraph(t, graph, clusterIDs, peerNum)
|
||
|
}
|
||
|
|
||
|
// Check that the pin is not re-assigned when a node
|
||
|
// that has disabled repinning goes down.
|
||
|
func TestClustersDisabledRepinning(t *testing.T) {
|
||
|
ctx := context.Background()
|
||
|
clusters, mock := createClusters(t)
|
||
|
defer shutdownClusters(t, clusters, mock)
|
||
|
for _, c := range clusters {
|
||
|
c.config.ReplicationFactorMin = nClusters - 1
|
||
|
c.config.ReplicationFactorMax = nClusters - 1
|
||
|
c.config.DisableRepinning = true
|
||
|
}
|
||
|
|
||
|
ttlDelay()
|
||
|
|
||
|
j := rand.Intn(nClusters)
|
||
|
h := test.Cid1
|
||
|
_, err := clusters[j].Pin(ctx, h, api.PinOptions{})
|
||
|
if err != nil {
|
||
|
t.Fatal(err)
|
||
|
}
|
||
|
|
||
|
// Let the pin arrive
|
||
|
pinDelay()
|
||
|
|
||
|
var killedClusterIndex int
|
||
|
// find someone that pinned it and kill that cluster
|
||
|
for i, c := range clusters {
|
||
|
pinfo := c.tracker.Status(ctx, h)
|
||
|
if pinfo.Status == api.TrackerStatusPinned {
|
||
|
killedClusterIndex = i
|
||
|
t.Logf("Shutting down %s", c.ID(ctx).ID)
|
||
|
c.Shutdown(ctx)
|
||
|
break
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// let metrics expire and give time for the cluster to
|
||
|
// see if they have lost the leader
|
||
|
waitForLeaderAndMetrics(t, clusters)
|
||
|
|
||
|
// Make sure we haven't killed our randomly
|
||
|
// selected cluster
|
||
|
for j == killedClusterIndex {
|
||
|
j = rand.Intn(nClusters)
|
||
|
}
|
||
|
|
||
|
numPinned := 0
|
||
|
for i, c := range clusters {
|
||
|
if i == killedClusterIndex {
|
||
|
continue
|
||
|
}
|
||
|
pinfo := c.tracker.Status(ctx, h)
|
||
|
if pinfo.Status == api.TrackerStatusPinned {
|
||
|
//t.Log(pinfo.Peer.Pretty())
|
||
|
numPinned++
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if numPinned != nClusters-2 {
|
||
|
t.Errorf("expected %d replicas for pin, got %d", nClusters-2, numPinned)
|
||
|
}
|
||
|
}
|
||
|
|
||
|
func TestRepoGC(t *testing.T) {
|
||
|
clusters, mock := createClusters(t)
|
||
|
defer shutdownClusters(t, clusters, mock)
|
||
|
f := func(t *testing.T, c *Cluster) {
|
||
|
gRepoGC, err := c.RepoGC(context.Background())
|
||
|
if err != nil {
|
||
|
t.Fatal("gc should have worked:", err)
|
||
|
}
|
||
|
|
||
|
if gRepoGC.PeerMap == nil {
|
||
|
t.Fatal("expected a non-nil peer map")
|
||
|
}
|
||
|
|
||
|
if len(gRepoGC.PeerMap) != nClusters {
|
||
|
t.Errorf("expected repo gc information for %d peer", nClusters)
|
||
|
}
|
||
|
for _, repoGC := range gRepoGC.PeerMap {
|
||
|
testRepoGC(t, repoGC)
|
||
|
}
|
||
|
}
|
||
|
|
||
|
runF(t, clusters, f)
|
||
|
}
|
||
|
|
||
|
func TestClustersFollowerMode(t *testing.T) {
|
||
|
ctx := context.Background()
|
||
|
clusters, mock := createClusters(t)
|
||
|
defer shutdownClusters(t, clusters, mock)
|
||
|
|
||
|
_, err := clusters[0].Pin(ctx, test.Cid1, api.PinOptions{})
|
||
|
if err != nil {
|
||
|
t.Fatal(err)
|
||
|
}
|
||
|
_, err = clusters[0].Pin(ctx, test.ErrorCid, api.PinOptions{})
|
||
|
if err != nil {
|
||
|
t.Fatal(err)
|
||
|
}
|
||
|
|
||
|
// Let the pins arrive
|
||
|
pinDelay()
|
||
|
|
||
|
// Set Cluster1 to follower mode
|
||
|
clusters[1].config.FollowerMode = true
|
||
|
|
||
|
t.Run("follower cannot pin", func(t *testing.T) {
|
||
|
_, err := clusters[1].PinPath(ctx, "/ipfs/"+test.Cid2.String(), api.PinOptions{})
|
||
|
if err != errFollowerMode {
|
||
|
t.Error("expected follower mode error")
|
||
|
}
|
||
|
_, err = clusters[1].Pin(ctx, test.Cid2, api.PinOptions{})
|
||
|
if err != errFollowerMode {
|
||
|
t.Error("expected follower mode error")
|
||
|
}
|
||
|
})
|
||
|
|
||
|
t.Run("follower cannot unpin", func(t *testing.T) {
|
||
|
_, err := clusters[1].UnpinPath(ctx, "/ipfs/"+test.Cid1.String())
|
||
|
if err != errFollowerMode {
|
||
|
t.Error("expected follower mode error")
|
||
|
}
|
||
|
_, err = clusters[1].Unpin(ctx, test.Cid1)
|
||
|
if err != errFollowerMode {
|
||
|
t.Error("expected follower mode error")
|
||
|
}
|
||
|
})
|
||
|
|
||
|
t.Run("follower cannot add", func(t *testing.T) {
|
||
|
sth := test.NewShardingTestHelper()
|
||
|
defer sth.Clean(t)
|
||
|
params := api.DefaultAddParams()
|
||
|
params.Shard = false
|
||
|
params.Name = "testlocal"
|
||
|
mfr, closer := sth.GetTreeMultiReader(t)
|
||
|
defer closer.Close()
|
||
|
r := multipart.NewReader(mfr, mfr.Boundary())
|
||
|
_, err = clusters[1].AddFile(ctx, r, params)
|
||
|
if err != errFollowerMode {
|
||
|
t.Error("expected follower mode error")
|
||
|
}
|
||
|
})
|
||
|
|
||
|
t.Run("follower status itself only", func(t *testing.T) {
|
||
|
gpi, err := clusters[1].Status(ctx, test.Cid1)
|
||
|
if err != nil {
|
||
|
t.Error("status should work")
|
||
|
}
|
||
|
if len(gpi.PeerMap) != 1 {
|
||
|
t.Fatal("globalPinInfo should only have one peer")
|
||
|
}
|
||
|
})
|
||
|
}
|
||
|
|
||
|
func TestClusterPinsWithExpiration(t *testing.T) {
|
||
|
ctx := context.Background()
|
||
|
|
||
|
clusters, mock := createClusters(t)
|
||
|
defer shutdownClusters(t, clusters, mock)
|
||
|
|
||
|
ttlDelay()
|
||
|
|
||
|
cl := clusters[rand.Intn(nClusters)] // choose a random cluster peer to query
|
||
|
|
||
|
c := test.Cid1
|
||
|
expireIn := 1 * time.Second
|
||
|
opts := api.PinOptions{
|
||
|
ExpireAt: time.Now().Add(expireIn),
|
||
|
}
|
||
|
_, err := cl.Pin(ctx, c, opts)
|
||
|
if err != nil {
|
||
|
t.Fatal("pin should have worked:", err)
|
||
|
}
|
||
|
|
||
|
pinDelay()
|
||
|
|
||
|
pins, err := cl.pinsSlice(ctx)
|
||
|
if err != nil {
|
||
|
t.Fatal(err)
|
||
|
}
|
||
|
if len(pins) != 1 {
|
||
|
t.Error("pin should be part of the state")
|
||
|
}
|
||
|
|
||
|
// wait till expiry time
|
||
|
time.Sleep(expireIn)
|
||
|
|
||
|
// manually call state sync on all peers, so we don't have to wait till
|
||
|
// state sync interval
|
||
|
for _, c := range clusters {
|
||
|
err = c.StateSync(ctx)
|
||
|
if err != nil {
|
||
|
t.Error(err)
|
||
|
}
|
||
|
}
|
||
|
|
||
|
pinDelay()
|
||
|
|
||
|
// state sync should have unpinned expired pin
|
||
|
pins, err = cl.pinsSlice(ctx)
|
||
|
if err != nil {
|
||
|
t.Fatal(err)
|
||
|
}
|
||
|
if len(pins) != 0 {
|
||
|
t.Error("pin should not be part of the state")
|
||
|
}
|
||
|
}
|
||
|
|
||
|
func TestClusterAlerts(t *testing.T) {
|
||
|
ctx := context.Background()
|
||
|
clusters, mock := createClusters(t)
|
||
|
defer shutdownClusters(t, clusters, mock)
|
||
|
|
||
|
if len(clusters) < 2 {
|
||
|
t.Skip("need at least 2 nodes for this test")
|
||
|
}
|
||
|
|
||
|
ttlDelay()
|
||
|
|
||
|
for _, c := range clusters[1:] {
|
||
|
c.Shutdown(ctx)
|
||
|
}
|
||
|
|
||
|
ttlDelay()
|
||
|
|
||
|
alerts := clusters[0].Alerts()
|
||
|
if len(alerts) == 0 {
|
||
|
t.Error("expected at least one alert")
|
||
|
}
|
||
|
}
|