Synchronize bucket cycle numbers (#13058)

Synchronize bucket cycles so it is much more
likely that the same prefixes will be picked up
for scanning.

Use the global bloom filter cycle for that. 
Bump bloom filter versions to clear those.
This commit is contained in:
Klaus Post 2021-08-25 17:25:26 +02:00 committed by GitHub
parent 200eb8dc0e
commit 88d719689c
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 18 additions and 17 deletions

View file

@ -147,7 +147,7 @@ func runDataScanner(pctx context.Context, objAPI ObjectLayer) {
go storeDataUsageInBackend(ctx, objAPI, results) go storeDataUsageInBackend(ctx, objAPI, results)
bf, err := globalNotificationSys.updateBloomFilter(ctx, nextBloomCycle) bf, err := globalNotificationSys.updateBloomFilter(ctx, nextBloomCycle)
logger.LogIf(ctx, err) logger.LogIf(ctx, err)
err = objAPI.NSScanner(ctx, bf, results) err = objAPI.NSScanner(ctx, bf, results, uint32(nextBloomCycle))
logger.LogIf(ctx, err) logger.LogIf(ctx, err)
if err == nil { if err == nil {
// Store new cycle... // Store new cycle...
@ -320,7 +320,7 @@ func scanDataFolder(ctx context.Context, basePath string, cache dataUsageCache,
console.Debugf(logPrefix+"Finished scanner, %v entries (%+v) %s \n", len(s.newCache.Cache), *s.newCache.sizeRecursive(s.newCache.Info.Name), logSuffix) console.Debugf(logPrefix+"Finished scanner, %v entries (%+v) %s \n", len(s.newCache.Cache), *s.newCache.sizeRecursive(s.newCache.Info.Name), logSuffix)
} }
s.newCache.Info.LastUpdate = UTCNow() s.newCache.Info.LastUpdate = UTCNow()
s.newCache.Info.NextCycle++ s.newCache.Info.NextCycle = cache.Info.NextCycle
return s.newCache, nil return s.newCache, nil
} }
@ -929,9 +929,6 @@ func (i *scannerItem) applyLifecycle(ctx context.Context, o ObjectLayer, meta ac
case lifecycle.DeleteRestoredAction, lifecycle.DeleteRestoredVersionAction: case lifecycle.DeleteRestoredAction, lifecycle.DeleteRestoredVersionAction:
default: default:
// No action. // No action.
if i.debug {
console.Debugf(applyActionsLogPrefix+" object not expirable: %q\n", i.objectPath())
}
return false, size return false, size
} }
@ -1063,7 +1060,7 @@ func evalActionFromLifecycle(ctx context.Context, lc lifecycle.Lifecycle, obj Ob
return action return action
} }
func applyTransitionRule(ctx context.Context, action lifecycle.Action, objLayer ObjectLayer, obj ObjectInfo) bool { func applyTransitionRule(obj ObjectInfo) bool {
if obj.DeleteMarker { if obj.DeleteMarker {
return false return false
} }
@ -1143,7 +1140,7 @@ func applyLifecycleAction(ctx context.Context, action lifecycle.Action, objLayer
case lifecycle.DeleteRestoredAction, lifecycle.DeleteRestoredVersionAction: case lifecycle.DeleteRestoredAction, lifecycle.DeleteRestoredVersionAction:
success = applyExpiryRule(ctx, objLayer, obj, true, action == lifecycle.DeleteRestoredVersionAction) success = applyExpiryRule(ctx, objLayer, obj, true, action == lifecycle.DeleteRestoredVersionAction)
case lifecycle.TransitionAction, lifecycle.TransitionVersionAction: case lifecycle.TransitionAction, lifecycle.TransitionVersionAction:
success = applyTransitionRule(ctx, action, objLayer, obj) success = applyTransitionRule(obj)
} }
return return
} }

View file

@ -46,7 +46,7 @@ const (
dataUpdateTrackerQueueSize = 0 dataUpdateTrackerQueueSize = 0
dataUpdateTrackerFilename = dataUsageBucket + SlashSeparator + ".tracker.bin" dataUpdateTrackerFilename = dataUsageBucket + SlashSeparator + ".tracker.bin"
dataUpdateTrackerVersion = 6 dataUpdateTrackerVersion = 7
dataUpdateTrackerSaveInterval = 5 * time.Minute dataUpdateTrackerSaveInterval = 5 * time.Minute
) )
@ -397,7 +397,7 @@ func (d *dataUpdateTracker) deserialize(src io.Reader, newerThan time.Time) erro
return err return err
} }
switch tmp[0] { switch tmp[0] {
case 1, 2, 3, 4, 5: case 1, 2, 3, 4, 5, 6:
if intDataUpdateTracker.debug { if intDataUpdateTracker.debug {
console.Debugln(color.Green("dataUpdateTracker: ") + "deprecated data version, updating.") console.Debugln(color.Green("dataUpdateTracker: ") + "deprecated data version, updating.")
} }

View file

@ -179,6 +179,7 @@ func TestDataUsageUpdate(t *testing.T) {
// Changed dir must be picked up in this many cycles. // Changed dir must be picked up in this many cycles.
for i := 0; i < dataUsageUpdateDirCycles; i++ { for i := 0; i < dataUsageUpdateDirCycles; i++ {
got, err = scanDataFolder(context.Background(), base, got, getSize) got, err = scanDataFolder(context.Background(), base, got, getSize)
got.Info.NextCycle++
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }
@ -423,6 +424,7 @@ func TestDataUsageUpdatePrefix(t *testing.T) {
// Changed dir must be picked up in this many cycles. // Changed dir must be picked up in this many cycles.
for i := 0; i < dataUsageUpdateDirCycles; i++ { for i := 0; i < dataUsageUpdateDirCycles; i++ {
got, err = scanDataFolder(context.Background(), base, got, getSize) got, err = scanDataFolder(context.Background(), base, got, getSize)
got.Info.NextCycle++
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }

View file

@ -456,7 +456,7 @@ func (z *erasureServerPools) StorageInfo(ctx context.Context) (StorageInfo, []er
return storageInfo, errs return storageInfo, errs
} }
func (z *erasureServerPools) NSScanner(ctx context.Context, bf *bloomFilter, updates chan<- madmin.DataUsageInfo) error { func (z *erasureServerPools) NSScanner(ctx context.Context, bf *bloomFilter, updates chan<- madmin.DataUsageInfo, wantCycle uint32) error {
// Updates must be closed before we return. // Updates must be closed before we return.
defer close(updates) defer close(updates)
@ -501,7 +501,7 @@ func (z *erasureServerPools) NSScanner(ctx context.Context, bf *bloomFilter, upd
} }
}() }()
// Start scanner. Blocks until done. // Start scanner. Blocks until done.
err := erObj.nsScanner(ctx, allBuckets, bf, updates) err := erObj.nsScanner(ctx, allBuckets, bf, wantCycle, updates)
if err != nil { if err != nil {
logger.LogIf(ctx, err) logger.LogIf(ctx, err)
mu.Lock() mu.Lock()

View file

@ -326,7 +326,7 @@ func (er erasureObjects) cleanupDeletedObjects(ctx context.Context) {
// nsScanner will start scanning buckets and send updated totals as they are traversed. // nsScanner will start scanning buckets and send updated totals as they are traversed.
// Updates are sent on a regular basis and the caller *must* consume them. // Updates are sent on a regular basis and the caller *must* consume them.
func (er erasureObjects) nsScanner(ctx context.Context, buckets []BucketInfo, bf *bloomFilter, updates chan<- dataUsageCache) error { func (er erasureObjects) nsScanner(ctx context.Context, buckets []BucketInfo, bf *bloomFilter, wantCycle uint32, updates chan<- dataUsageCache) error {
if len(buckets) == 0 { if len(buckets) == 0 {
return nil return nil
} }
@ -419,7 +419,7 @@ func (er erasureObjects) nsScanner(ctx context.Context, buckets []BucketInfo, bf
case v, ok := <-bucketResults: case v, ok := <-bucketResults:
if !ok { if !ok {
// Save final state... // Save final state...
cache.Info.NextCycle++ cache.Info.NextCycle = wantCycle
cache.Info.LastUpdate = time.Now() cache.Info.LastUpdate = time.Now()
logger.LogIf(ctx, cache.save(ctx, er, dataUsageCacheName)) logger.LogIf(ctx, cache.save(ctx, er, dataUsageCacheName))
updates <- cache updates <- cache
@ -461,12 +461,13 @@ func (er erasureObjects) nsScanner(ctx context.Context, buckets []BucketInfo, bf
cache.Info.BloomFilter = bloom cache.Info.BloomFilter = bloom
cache.Info.SkipHealing = healing cache.Info.SkipHealing = healing
cache.Disks = allDiskIDs cache.Disks = allDiskIDs
cache.Info.NextCycle = wantCycle
if cache.Info.Name != bucket.Name { if cache.Info.Name != bucket.Name {
logger.LogIf(ctx, fmt.Errorf("cache name mismatch: %s != %s", cache.Info.Name, bucket.Name)) logger.LogIf(ctx, fmt.Errorf("cache name mismatch: %s != %s", cache.Info.Name, bucket.Name))
cache.Info = dataUsageCacheInfo{ cache.Info = dataUsageCacheInfo{
Name: bucket.Name, Name: bucket.Name,
LastUpdate: time.Time{}, LastUpdate: time.Time{},
NextCycle: 0, NextCycle: wantCycle,
} }
} }
// Collect updates. // Collect updates.

View file

@ -240,7 +240,7 @@ func (fs *FSObjects) StorageInfo(ctx context.Context) (StorageInfo, []error) {
} }
// NSScanner returns data usage stats of the current FS deployment // NSScanner returns data usage stats of the current FS deployment
func (fs *FSObjects) NSScanner(ctx context.Context, bf *bloomFilter, updates chan<- madmin.DataUsageInfo) error { func (fs *FSObjects) NSScanner(ctx context.Context, bf *bloomFilter, updates chan<- madmin.DataUsageInfo, wantCycle uint32) error {
defer close(updates) defer close(updates)
// Load bucket totals // Load bucket totals
var totalCache dataUsageCache var totalCache dataUsageCache

View file

@ -48,7 +48,7 @@ func (a GatewayUnsupported) LocalStorageInfo(ctx context.Context) (StorageInfo,
} }
// NSScanner - scanner is not implemented for gateway // NSScanner - scanner is not implemented for gateway
func (a GatewayUnsupported) NSScanner(ctx context.Context, bf *bloomFilter, updates chan<- madmin.DataUsageInfo) error { func (a GatewayUnsupported) NSScanner(ctx context.Context, bf *bloomFilter, updates chan<- madmin.DataUsageInfo, wantCycle uint32) error {
logger.CriticalIf(ctx, errors.New("not implemented")) logger.CriticalIf(ctx, errors.New("not implemented"))
return NotImplemented{} return NotImplemented{}
} }

View file

@ -103,7 +103,7 @@ type ObjectLayer interface {
// Storage operations. // Storage operations.
Shutdown(context.Context) error Shutdown(context.Context) error
NSScanner(ctx context.Context, bf *bloomFilter, updates chan<- madmin.DataUsageInfo) error NSScanner(ctx context.Context, bf *bloomFilter, updates chan<- madmin.DataUsageInfo, wantCycle uint32) error
BackendInfo() madmin.BackendInfo BackendInfo() madmin.BackendInfo
StorageInfo(ctx context.Context) (StorageInfo, []error) StorageInfo(ctx context.Context) (StorageInfo, []error)

View file

@ -473,6 +473,7 @@ func (s *xlStorage) NSScanner(ctx context.Context, cache dataUsageCache, updates
} }
return sizeSummary{}, errSkipFile return sizeSummary{}, errSkipFile
} }
defer metaDataPoolPut(buf)
// Remove filename which is the meta file. // Remove filename which is the meta file.
item.transformMetaDir() item.transformMetaDir()