Use rate.Limiter for bandwidth monitoring (#12506)

Bonus: fixes a hang when bandwidth caps are enabled for
synchronous replication
This commit is contained in:
Poorna Krishnamoorthy 2021-06-24 18:29:30 -07:00 committed by GitHub
parent 8d1bc65757
commit d00783c923
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
12 changed files with 419 additions and 527 deletions

View file

@ -205,6 +205,12 @@ func (a adminAPIHandlers) SetRemoteTargetHandler(w http.ResponseWriter, r *http.
}
target = tgt
}
// enforce minimum bandwidth limit as 100MBps
if target.BandwidthLimit < 100*1000*1000 {
writeErrorResponseJSON(ctx, w, errorCodes.ToAPIErrWithErr(ErrReplicationBandwidthLimitError, err), r.URL)
return
}
if err = globalBucketTargetSys.SetTarget(ctx, bucket, &target, update); err != nil {
switch err.(type) {
case BucketRemoteConnectionErr:

View file

@ -115,6 +115,7 @@ const (
ErrReplicationDestinationMissingLock
ErrRemoteTargetNotFoundError
ErrReplicationRemoteConnectionError
ErrReplicationBandwidthLimitError
ErrBucketRemoteIdenticalToSource
ErrBucketRemoteAlreadyExists
ErrBucketRemoteLabelInUse
@ -860,6 +861,11 @@ var errorCodes = errorCodeMap{
Description: "Remote service connection error - please check remote service credentials and target bucket",
HTTPStatusCode: http.StatusNotFound,
},
ErrReplicationBandwidthLimitError: {
Code: "XMinioAdminReplicationBandwidthLimitError",
Description: "Bandwidth limit for remote target must be atleast 100MBps",
HTTPStatusCode: http.StatusBadRequest,
},
ErrReplicationNoMatchingRuleError: {
Code: "XMinioReplicationNoMatchingRule",
Description: "No matching replication rule found for this object prefix",

File diff suppressed because one or more lines are too long

View file

@ -733,18 +733,6 @@ func replicateObject(ctx context.Context, ri ReplicateObjectInfo, objectAPI Obje
logger.LogIf(ctx, fmt.Errorf("Unable to replicate metadata for object %s/%s(%s): %s", bucket, objInfo.Name, objInfo.VersionID, err))
}
} else {
target, err := globalBucketMetadataSys.GetBucketTarget(bucket, cfg.RoleArn)
if err != nil {
logger.LogIf(ctx, fmt.Errorf("failed to get target for replication bucket:%s cfg:%s err:%s", bucket, cfg.RoleArn, err))
sendEvent(eventArgs{
EventName: event.ObjectReplicationNotTracked,
BucketName: bucket,
Object: objInfo,
Host: "Internal: [Replication]",
})
return
}
putOpts, err := putReplicationOpts(ctx, dest, objInfo)
if err != nil {
logger.LogIf(ctx, fmt.Errorf("failed to get target for replication bucket:%s cfg:%s err:%w", bucket, cfg.RoleArn, err))
@ -756,28 +744,18 @@ func replicateObject(ctx context.Context, ri ReplicateObjectInfo, objectAPI Obje
})
return
}
// Setup bandwidth throttling
peers, _ := globalEndpoints.peers()
totalNodesCount := len(peers)
if totalNodesCount == 0 {
totalNodesCount = 1 // For standalone erasure coding
}
var headerSize int
for k, v := range putOpts.Header() {
headerSize += len(k) + len(v)
}
opts := &bandwidth.MonitorReaderOptions{
Bucket: objInfo.Bucket,
Object: objInfo.Name,
HeaderSize: headerSize,
BandwidthBytesPerSec: target.BandwidthLimit / int64(totalNodesCount),
ClusterBandwidth: target.BandwidthLimit,
Bucket: objInfo.Bucket,
HeaderSize: headerSize,
}
r := bandwidth.NewMonitoredReader(ctx, globalBucketMonitor, gr, opts)
newCtx, cancel := context.WithTimeout(ctx, globalOperationTimeout.Timeout())
defer cancel()
r := bandwidth.NewMonitoredReader(newCtx, globalBucketMonitor, gr, opts)
if _, err = c.PutObject(ctx, dest.Bucket, object, r, size, "", "", putOpts); err != nil {
replicationStatus = replication.Failed
logger.LogIf(ctx, fmt.Errorf("Unable to replicate for object %s/%s(%s): %s", bucket, objInfo.Name, objInfo.VersionID, err))

View file

@ -131,9 +131,6 @@ func (sys *BucketTargetSys) SetTarget(ctx context.Context, bucket string, tgt *m
if vcfg.Status != string(versioning.Enabled) {
return BucketRemoteTargetNotVersioned{Bucket: tgt.TargetBucket}
}
if tgt.ReplicationSync && tgt.BandwidthLimit > 0 {
return NotImplemented{Message: "Synchronous replication does not support bandwidth limits"}
}
}
sys.Lock()
defer sys.Unlock()
@ -159,9 +156,23 @@ func (sys *BucketTargetSys) SetTarget(ctx context.Context, bucket string, tgt *m
sys.targetsMap[bucket] = newtgts
sys.arnRemotesMap[tgt.Arn] = clnt
sys.updateBandwidthLimit(bucket, tgt.BandwidthLimit)
return nil
}
func (sys *BucketTargetSys) updateBandwidthLimit(bucket string, limit int64) {
if globalIsGateway {
return
}
if limit == 0 {
globalBucketMonitor.DeleteBucket(bucket)
return
}
// Setup bandwidth throttling
globalBucketMonitor.SetBandwidthLimit(bucket, limit)
}
// RemoveTarget - removes a remote bucket target for this source bucket.
func (sys *BucketTargetSys) RemoveTarget(ctx context.Context, bucket, arnStr string) error {
if globalIsGateway {
@ -214,6 +225,7 @@ func (sys *BucketTargetSys) RemoveTarget(ctx context.Context, bucket, arnStr str
}
sys.targetsMap[bucket] = targets
delete(sys.arnRemotesMap, arnStr)
sys.updateBandwidthLimit(bucket, 0)
return nil
}
@ -278,6 +290,7 @@ func (sys *BucketTargetSys) UpdateAllTargets(bucket string, tgts *madmin.BucketT
}
}
delete(sys.targetsMap, bucket)
sys.updateBandwidthLimit(bucket, 0)
return
}
@ -290,6 +303,7 @@ func (sys *BucketTargetSys) UpdateAllTargets(bucket string, tgts *madmin.BucketT
continue
}
sys.arnRemotesMap[tgt.Arn] = tgtClient
sys.updateBandwidthLimit(bucket, tgt.BandwidthLimit)
}
sys.targetsMap[bucket] = tgts.Targets
}
@ -315,6 +329,7 @@ func (sys *BucketTargetSys) load(ctx context.Context, buckets []BucketInfo, objA
continue
}
sys.arnRemotesMap[tgt.Arn] = tgtClient
sys.updateBandwidthLimit(bucket.Name, tgt.BandwidthLimit)
}
sys.targetsMap[bucket.Name] = cfg.Targets
}

View file

@ -214,7 +214,7 @@ func newAllSubsystems() {
}
// Create the bucket bandwidth monitor
globalBucketMonitor = bandwidth.NewMonitor(GlobalServiceDoneCh)
globalBucketMonitor = bandwidth.NewMonitor(GlobalContext, totalNodeCount())
// Create a new config system.
globalConfigSys = NewConfigSys()

View file

@ -930,3 +930,13 @@ func loadAndResetRPCNetworkErrsCounter() uint64 {
defer rest.ResetNetworkErrsCounter()
return rest.GetNetworkErrsCounter()
}
// Helper method to return total number of nodes in cluster
func totalNodeCount() uint64 {
peers, _ := globalEndpoints.peers()
totalNodesCount := uint64(len(peers))
if totalNodesCount == 0 {
totalNodesCount = 1 // For standalone erasure coding
}
return totalNodesCount
}

1
go.mod
View file

@ -82,6 +82,7 @@ require (
go.uber.org/zap v1.16.1-0.20210329175301-c23abee72d19
golang.org/x/crypto v0.0.0-20210421170649-83a5a9bb288b
golang.org/x/sys v0.0.0-20210510120138-977fb7262007
golang.org/x/time v0.0.0-20210220033141-f8bda1e9f3ba
golang.org/x/tools v0.1.1 // indirect
google.golang.org/api v0.31.0
gopkg.in/yaml.v2 v2.4.0

View file

@ -23,48 +23,47 @@ import (
"time"
"github.com/minio/madmin-go"
"golang.org/x/time/rate"
)
// throttleBandwidth gets the throttle for bucket with the configured value
func (m *Monitor) throttleBandwidth(ctx context.Context, bucket string, bandwidthBytesPerSecond int64, clusterBandwidth int64) *throttle {
m.lock.Lock()
defer m.lock.Unlock()
throttle, ok := m.bucketThrottle[bucket]
if !ok {
throttle = newThrottle(ctx, bandwidthBytesPerSecond, clusterBandwidth)
m.bucketThrottle[bucket] = throttle
return throttle
}
throttle.SetBandwidth(bandwidthBytesPerSecond, clusterBandwidth)
return throttle
type throttle struct {
*rate.Limiter
NodeBandwidthPerSec int64
}
// Monitor implements the monitoring for bandwidth measurements.
// Monitor holds the state of the global bucket monitor
type Monitor struct {
lock sync.Mutex // lock for all updates
activeBuckets map[string]*bucketMeasurement // Buckets with objects in flight
bucketMovingAvgTicker *time.Ticker // Ticker for calculating moving averages
bucketThrottle map[string]*throttle
doneCh <-chan struct{}
tlock sync.RWMutex // mutex for bucketThrottle
bucketThrottle map[string]*throttle
mlock sync.RWMutex // mutex for activeBuckets map
activeBuckets map[string]*bucketMeasurement // Buckets with objects in flight
bucketMovingAvgTicker *time.Ticker // Ticker for calculating moving averages
ctx context.Context // Context for generate
NodeCount uint64
}
// NewMonitor returns a monitor with defaults.
func NewMonitor(doneCh <-chan struct{}) *Monitor {
//NewMonitor returns a monitor with defaults.
func NewMonitor(ctx context.Context, numNodes uint64) *Monitor {
m := &Monitor{
activeBuckets: make(map[string]*bucketMeasurement),
bucketMovingAvgTicker: time.NewTicker(2 * time.Second),
bucketThrottle: make(map[string]*throttle),
doneCh: doneCh,
bucketMovingAvgTicker: time.NewTicker(2 * time.Second),
ctx: ctx,
NodeCount: numNodes,
}
go m.trackEWMA()
return m
}
// SelectionFunction for buckets
func (m *Monitor) updateMeasurement(bucket string, bytes uint64) {
m.mlock.Lock()
defer m.mlock.Unlock()
if m, ok := m.activeBuckets[bucket]; ok {
m.incrementBytes(bytes)
}
}
//SelectionFunction for buckets
type SelectionFunction func(bucket string) bool
// SelectBuckets will select all the buckets passed in.
@ -86,8 +85,8 @@ func SelectBuckets(buckets ...string) SelectionFunction {
// GetReport gets the report for all bucket bandwidth details.
func (m *Monitor) GetReport(selectBucket SelectionFunction) *madmin.BucketBandwidthReport {
m.lock.Lock()
defer m.lock.Unlock()
m.mlock.RLock()
defer m.mlock.RUnlock()
return m.getReport(selectBucket)
}
@ -99,29 +98,38 @@ func (m *Monitor) getReport(selectBucket SelectionFunction) *madmin.BucketBandwi
if !selectBucket(bucket) {
continue
}
m.tlock.RLock()
bucketThrottle, ok := m.bucketThrottle[bucket]
if !ok {
continue
}
report.BucketStats[bucket] = madmin.BandwidthDetails{
LimitInBytesPerSecond: bucketThrottle.clusterBandwidth,
CurrentBandwidthInBytesPerSecond: bucketMeasurement.getExpMovingAvgBytesPerSecond(),
if ok {
report.BucketStats[bucket] = madmin.BandwidthDetails{
LimitInBytesPerSecond: bucketThrottle.NodeBandwidthPerSec * int64(m.NodeCount),
CurrentBandwidthInBytesPerSecond: bucketMeasurement.getExpMovingAvgBytesPerSecond(),
}
}
m.tlock.RUnlock()
}
return report
}
func (m *Monitor) trackEWMA() {
for {
select {
case <-m.bucketMovingAvgTicker.C:
m.updateMovingAvg()
case <-m.doneCh:
case <-m.ctx.Done():
return
}
}
}
func (m *Monitor) updateMovingAvg() {
m.mlock.Lock()
defer m.mlock.Unlock()
for _, bucketMeasurement := range m.activeBuckets {
bucketMeasurement.updateExponentialMovingAverage(time.Now())
}
}
func (m *Monitor) getBucketMeasurement(bucket string, initTime time.Time) *bucketMeasurement {
bucketTracker, ok := m.activeBuckets[bucket]
if !ok {
@ -131,25 +139,43 @@ func (m *Monitor) getBucketMeasurement(bucket string, initTime time.Time) *bucke
return bucketTracker
}
func (m *Monitor) updateMovingAvg() {
m.lock.Lock()
defer m.lock.Unlock()
for _, bucketMeasurement := range m.activeBuckets {
bucketMeasurement.updateExponentialMovingAverage(time.Now())
}
}
// track returns the measurement object for bucket and object
func (m *Monitor) track(bucket string, object string) *bucketMeasurement {
m.lock.Lock()
defer m.lock.Unlock()
return m.getBucketMeasurement(bucket, time.Now())
// track returns the measurement object for bucket
func (m *Monitor) track(bucket string) {
m.mlock.Lock()
defer m.mlock.Unlock()
m.getBucketMeasurement(bucket, time.Now())
}
// DeleteBucket deletes monitoring the 'bucket'
func (m *Monitor) DeleteBucket(bucket string) {
m.lock.Lock()
defer m.lock.Unlock()
delete(m.activeBuckets, bucket)
m.tlock.Lock()
delete(m.bucketThrottle, bucket)
m.tlock.Unlock()
m.mlock.Lock()
delete(m.activeBuckets, bucket)
m.mlock.Unlock()
}
// throttle returns currently configured throttle for this bucket
func (m *Monitor) throttle(bucket string) *throttle {
m.tlock.RLock()
defer m.tlock.RUnlock()
return m.bucketThrottle[bucket]
}
// SetBandwidthLimit sets the bandwidth limit for a bucket
func (m *Monitor) SetBandwidthLimit(bucket string, limit int64) {
m.tlock.Lock()
defer m.tlock.Unlock()
bw := limit / int64(m.NodeCount)
t, ok := m.bucketThrottle[bucket]
if !ok {
t = &throttle{
NodeBandwidthPerSec: bw,
}
}
t.NodeBandwidthPerSec = bw
newlimit := rate.Every(time.Second / time.Duration(t.NodeBandwidthPerSec))
t.Limiter = rate.NewLimiter(newlimit, int(t.NodeBandwidthPerSec))
m.bucketThrottle[bucket] = t
}

View file

@ -18,7 +18,6 @@
package bandwidth
import (
"context"
"reflect"
"testing"
"time"
@ -30,52 +29,6 @@ const (
oneMiB uint64 = 1024 * 1024
)
func TestMonitor_GetThrottle(t *testing.T) {
type fields struct {
bucketThrottles map[string]*throttle
bucket string
bpi int64
}
t1 := newThrottle(context.Background(), 100, 1024*1024)
t2 := newThrottle(context.Background(), 200, 1024*1024)
tests := []struct {
name string
fields fields
want *throttle
}{
{
name: "Existing",
fields: fields{
bucketThrottles: map[string]*throttle{"bucket": t1},
bucket: "bucket",
bpi: 100,
},
want: t1,
},
{
name: "new",
fields: fields{
bucketThrottles: map[string]*throttle{"bucket": t1},
bucket: "bucket2",
bpi: 200,
},
want: t2,
},
}
for _, tt := range tests {
tt := tt
t.Run(tt.name, func(t *testing.T) {
t.Parallel()
m := &Monitor{
bucketThrottle: tt.fields.bucketThrottles,
}
if got := m.throttleBandwidth(context.Background(), tt.fields.bucket, tt.fields.bpi, 1024*1024); got.bytesPerInterval != tt.want.bytesPerInterval {
t.Errorf("throttleBandwidth() = %v, want %v", got, tt.want)
}
})
}
}
func TestMonitor_GetReport(t *testing.T) {
type fields struct {
activeBuckets map[string]*bucketMeasurement
@ -136,12 +89,12 @@ func TestMonitor_GetReport(t *testing.T) {
t.Run(tt.name, func(t *testing.T) {
t.Parallel()
thr := throttle{
bytesPerSecond: 1024 * 1024,
clusterBandwidth: 1024 * 1024,
NodeBandwidthPerSec: 1024 * 1024,
}
m := &Monitor{
activeBuckets: tt.fields.activeBuckets,
bucketThrottle: map[string]*throttle{"bucket": &thr},
NodeCount: 1,
}
m.activeBuckets["bucket"].updateExponentialMovingAverage(tt.fields.endTime)
got := m.GetReport(SelectBuckets())

View file

@ -20,61 +20,78 @@ package bandwidth
import (
"context"
"io"
"math"
)
// MonitoredReader monitors the bandwidth
// MonitoredReader represents a throttled reader subject to bandwidth monitoring
type MonitoredReader struct {
opts *MonitorReaderOptions
bucketMeasurement *bucketMeasurement // bucket measurement object
reader io.Reader // Reader to wrap
throttle *throttle // throttle the rate at which replication occur
monitor *Monitor // Monitor reference
lastErr error // last error reported, if this non-nil all reads will fail.
r io.Reader
throttle *throttle
ctx context.Context // request context
lastErr error // last error reported, if this non-nil all reads will fail.
m *Monitor
opts *MonitorReaderOptions
}
// MonitorReaderOptions provides configurable options for monitor reader implementation.
type MonitorReaderOptions struct {
Bucket string
Object string
HeaderSize int
BandwidthBytesPerSec int64
ClusterBandwidth int64
Bucket string
HeaderSize int
}
// NewMonitoredReader returns a io.Reader that reports bandwidth details.
func NewMonitoredReader(ctx context.Context, monitor *Monitor, reader io.Reader, opts *MonitorReaderOptions) *MonitoredReader {
return &MonitoredReader{
opts: opts,
bucketMeasurement: monitor.track(opts.Bucket, opts.Object),
reader: reader,
throttle: monitor.throttleBandwidth(ctx, opts.Bucket, opts.BandwidthBytesPerSec, opts.ClusterBandwidth),
monitor: monitor,
}
}
// Read wraps the read reader
func (m *MonitoredReader) Read(p []byte) (n int, err error) {
if m.lastErr != nil {
err = m.lastErr
// Read implements a throttled read
func (r *MonitoredReader) Read(buf []byte) (n int, err error) {
if r.lastErr != nil {
err = r.lastErr
return
}
p = p[:m.throttle.GetLimitForBytes(int64(len(p)))]
b := r.throttle.Burst() // maximum available tokens
need := len(buf) // number of bytes requested by caller
hdr := r.opts.HeaderSize // remaining header bytes
var tokens int // number of tokens to request
n, err = m.reader.Read(p)
if hdr > 0 { // available tokens go towards header first
if hdr < b { // all of header can be accommodated
r.opts.HeaderSize = 0
need = int(math.Min(float64(b-hdr), float64(need))) // use remaining tokens towards payload
tokens = need + hdr
} else { // part of header can be accommodated
r.opts.HeaderSize -= b - 1
need = 1 // to ensure we read at least one byte for every Read
tokens = b
}
} else { // all tokens go towards payload
need = int(math.Min(float64(b), float64(need)))
tokens = need
}
err = r.throttle.WaitN(r.ctx, tokens)
if err != nil {
m.lastErr = err
return
}
update := n + m.opts.HeaderSize
unused := len(p) - update
m.bucketMeasurement.incrementBytes(uint64(update))
m.opts.HeaderSize = 0 // Set to 0 post first read
if unused > 0 {
m.throttle.ReleaseUnusedBandwidth(int64(unused))
n, err = r.r.Read(buf[:need])
if err != nil {
r.lastErr = err
return
}
r.m.updateMeasurement(r.opts.Bucket, uint64(tokens))
return
}
// NewMonitoredReader returns reference to a monitored reader that throttles reads to configured bandwidth for the
// bucket.
func NewMonitoredReader(ctx context.Context, m *Monitor, r io.Reader, opts *MonitorReaderOptions) *MonitoredReader {
reader := MonitoredReader{
r: r,
throttle: m.throttle(opts.Bucket),
m: m,
opts: opts,
ctx: ctx,
}
reader.m.track(opts.Bucket)
return &reader
}

View file

@ -1,121 +0,0 @@
// Copyright (c) 2015-2021 MinIO, Inc.
//
// This file is part of MinIO Object Storage stack
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
package bandwidth
import (
"context"
"sync"
"sync/atomic"
"time"
)
const (
throttleInternal = 250 * time.Millisecond
)
// throttle implements the throttling for bandwidth
type throttle struct {
generateTicker *time.Ticker // Ticker to generate available bandwidth
freeBytes int64 // unused bytes in the interval
bytesPerSecond int64 // max limit for bandwidth
bytesPerInterval int64 // bytes allocated for the interval
clusterBandwidth int64 // Cluster wide bandwidth needed for reporting
cond *sync.Cond // Used to notify waiting threads for bandwidth availability
goGenerate int64 // Flag to track if generate routine should be running. 0 == stopped
ctx context.Context // Context for generate
}
// newThrottle returns a new bandwidth throttle. Set bytesPerSecond to 0 for no limit
func newThrottle(ctx context.Context, bytesPerSecond int64, clusterBandwidth int64) *throttle {
if bytesPerSecond == 0 {
return &throttle{}
}
t := &throttle{
bytesPerSecond: bytesPerSecond,
generateTicker: time.NewTicker(throttleInternal),
clusterBandwidth: clusterBandwidth,
ctx: ctx,
}
t.cond = sync.NewCond(&sync.Mutex{})
t.SetBandwidth(bytesPerSecond, clusterBandwidth)
t.freeBytes = t.bytesPerInterval
return t
}
// GetLimitForBytes gets the bytes that are possible to send within the limit
// if want is <= 0 or no bandwidth limit set, returns want.
// Otherwise a value > 0 will always be returned.
func (t *throttle) GetLimitForBytes(want int64) int64 {
if want <= 0 || atomic.LoadInt64(&t.bytesPerInterval) == 0 {
return want
}
t.cond.L.Lock()
defer t.cond.L.Unlock()
for {
var send int64
freeBytes := atomic.LoadInt64(&t.freeBytes)
send = want
if freeBytes < want {
send = freeBytes
if send <= 0 {
t.cond.Wait()
continue
}
}
atomic.AddInt64(&t.freeBytes, -send)
// Bandwidth was consumed, start generate routine to allocate bandwidth
if atomic.CompareAndSwapInt64(&t.goGenerate, 0, 1) {
go t.generateBandwidth(t.ctx)
}
return send
}
}
// SetBandwidth sets a new bandwidth limit in bytes per second.
func (t *throttle) SetBandwidth(bandwidthBiPS int64, clusterBandwidth int64) {
bpi := int64(throttleInternal) * bandwidthBiPS / int64(time.Second)
atomic.StoreInt64(&t.bytesPerInterval, bpi)
}
// ReleaseUnusedBandwidth releases bandwidth that was allocated for a user
func (t *throttle) ReleaseUnusedBandwidth(bytes int64) {
atomic.AddInt64(&t.freeBytes, bytes)
}
// generateBandwidth periodically allocates new bandwidth to use
func (t *throttle) generateBandwidth(ctx context.Context) {
for {
select {
case <-t.generateTicker.C:
if atomic.LoadInt64(&t.freeBytes) == atomic.LoadInt64(&t.bytesPerInterval) {
// No bandwidth consumption stop the routine.
atomic.StoreInt64(&t.goGenerate, 0)
return
}
// A new window is available
t.cond.L.Lock()
atomic.StoreInt64(&t.freeBytes, atomic.LoadInt64(&t.bytesPerInterval))
t.cond.Broadcast()
t.cond.L.Unlock()
case <-ctx.Done():
return
}
}
}