[feat] use rename instead of recursive deletes (#11641)

most of the delete calls today spend time in
a blocking operation where multiple calls need
to be recursively sent to delete the objects,
instead we can use rename operation to atomically
move the objects from the namespace to `tmp/.trash`

we can schedule deletion of objects at this
location once in 15, 30mins and we can also add
wait times between each delete operation.

this allows us to make delete's faster as well
less chattier on the drives, each server runs locally
a groutine which would clean this up regularly.
This commit is contained in:
Harshavardhana 2021-02-26 09:52:27 -08:00 committed by GitHub
parent 1f659204a2
commit 6386b45c08
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 179 additions and 87 deletions

View file

@ -20,6 +20,7 @@ import (
"context"
"fmt"
"io"
"os"
"path"
"sort"
"strconv"
@ -138,39 +139,39 @@ func (er erasureObjects) deleteAll(ctx context.Context, bucket, prefix string) {
// Remove the old multipart uploads on the given disk.
func (er erasureObjects) cleanupStaleUploadsOnDisk(ctx context.Context, disk StorageAPI, expiry time.Duration) {
now := time.Now()
shaDirs, err := disk.ListDir(ctx, minioMetaMultipartBucket, "", -1)
if err != nil {
return
}
for _, shaDir := range shaDirs {
uploadIDDirs, err := disk.ListDir(ctx, minioMetaMultipartBucket, shaDir, -1)
if err != nil {
continue
}
for _, uploadIDDir := range uploadIDDirs {
diskPath := disk.Endpoint().Path
readDirFn(pathJoin(diskPath, minioMetaMultipartBucket), func(shaDir string, typ os.FileMode) error {
return readDirFn(pathJoin(diskPath, minioMetaMultipartBucket, shaDir), func(uploadIDDir string, typ os.FileMode) error {
uploadIDPath := pathJoin(shaDir, uploadIDDir)
fi, err := disk.ReadVersion(ctx, minioMetaMultipartBucket, uploadIDPath, "", false)
if err != nil {
continue
return nil
}
wait := er.deletedCleanupSleeper.Timer(ctx)
if now.Sub(fi.ModTime) > expiry {
er.renameAll(ctx, minioMetaMultipartBucket, uploadIDPath)
}
wait()
return nil
})
})
readDirFn(pathJoin(diskPath, minioMetaTmpBucket), func(tmpDir string, typ os.FileMode) error {
if tmpDir == ".trash/" { // do not remove .trash/ here, it has its own routines
return nil
}
}
tmpDirs, err := disk.ListDir(ctx, minioMetaTmpBucket, "", -1)
if err != nil {
return
}
for _, tmpDir := range tmpDirs {
vi, err := disk.StatVol(ctx, pathJoin(minioMetaTmpBucket, tmpDir))
if err != nil {
continue
return nil
}
wait := er.deletedCleanupSleeper.Timer(ctx)
if now.Sub(vi.Created) > expiry {
er.deleteAll(ctx, minioMetaTmpBucket, tmpDir)
}
}
wait()
return nil
})
}
// ListMultipartUploads - lists all the pending multipart

View file

@ -403,21 +403,28 @@ func newErasureSets(ctx context.Context, endpoints Endpoints, storageDisks []Sto
// Initialize erasure objects for a given set.
s.sets[i] = &erasureObjects{
setNumber: i,
setDriveCount: setDriveCount,
defaultParityCount: defaultParityCount,
getDisks: s.GetDisks(i),
getLockers: s.GetLockers(i),
getEndpoints: s.GetEndpoints(i),
nsMutex: mutex,
bp: bp,
mrfOpCh: make(chan partialOperation, 10000),
setNumber: i,
setDriveCount: setDriveCount,
defaultParityCount: defaultParityCount,
getDisks: s.GetDisks(i),
getLockers: s.GetLockers(i),
getEndpoints: s.GetEndpoints(i),
deletedCleanupSleeper: newDynamicSleeper(10, 10*time.Second),
nsMutex: mutex,
bp: bp,
mrfOpCh: make(chan partialOperation, 10000),
}
}
// cleanup ".trash/" folder every 30 minutes with sufficient sleep cycles.
const deletedObjectsCleanupInterval = 30 * time.Minute
// start cleanup stale uploads go-routine.
go s.cleanupStaleUploads(ctx, GlobalStaleUploadsCleanupInterval, GlobalStaleUploadsExpiry)
// start cleanup of deleted objects.
go s.cleanupDeletedObjects(ctx, deletedObjectsCleanupInterval)
// Start the disk monitoring and connect routine.
go s.monitorAndConnectEndpoints(ctx, defaultMonitorConnectEndpointInterval)
go s.maintainMRFList()
@ -426,6 +433,25 @@ func newErasureSets(ctx context.Context, endpoints Endpoints, storageDisks []Sto
return s, nil
}
func (s *erasureSets) cleanupDeletedObjects(ctx context.Context, cleanupInterval time.Duration) {
timer := time.NewTimer(cleanupInterval)
defer timer.Stop()
for {
select {
case <-ctx.Done():
return
case <-timer.C:
// Reset for the next interval
timer.Reset(cleanupInterval)
for _, set := range s.sets {
set.cleanupDeletedObjects(ctx)
}
}
}
}
func (s *erasureSets) cleanupStaleUploads(ctx context.Context, cleanupInterval, expiry time.Duration) {
timer := time.NewTimer(cleanupInterval)
defer timer.Stop()

View file

@ -21,6 +21,7 @@ import (
"errors"
"fmt"
"math/rand"
"os"
"sort"
"sync"
"time"
@ -71,6 +72,8 @@ type erasureObjects struct {
bp *bpool.BytePoolCap
mrfOpCh chan partialOperation
deletedCleanupSleeper *dynamicSleeper
}
// NewNSLock - initialize a new namespace RWLocker instance.
@ -273,6 +276,28 @@ func (er erasureObjects) getOnlineDisksWithHealing() (newDisks []StorageAPI, hea
return newDisks, healing
}
// Clean-up previously deleted objects. from .minio.sys/tmp/.trash/
func (er erasureObjects) cleanupDeletedObjects(ctx context.Context) {
// run multiple cleanup's local to this server.
var wg sync.WaitGroup
for _, disk := range er.getLoadBalancedLocalDisks() {
if disk != nil {
wg.Add(1)
go func(disk StorageAPI) {
defer wg.Done()
diskPath := disk.Endpoint().Path
readDirFn(pathJoin(diskPath, minioMetaTmpDeletedBucket), func(ddir string, typ os.FileMode) error {
wait := er.deletedCleanupSleeper.Timer(ctx)
removeAll(pathJoin(diskPath, minioMetaTmpDeletedBucket, ddir))
wait()
return nil
})
}(disk)
}
}
wg.Wait()
}
// CrawlAndGetDataUsage will start crawling buckets and send updated totals as they are traversed.
// Updates are sent on a regular basis and the caller *must* consume them.
func (er erasureObjects) crawlAndGetDataUsage(ctx context.Context, buckets []BucketInfo, bf *bloomFilter, updates chan<- dataUsageCache) error {

View file

@ -22,6 +22,7 @@ import (
"os"
pathutil "path"
"runtime"
"strings"
"github.com/minio/minio/cmd/logger"
"github.com/minio/minio/pkg/lock"
@ -392,6 +393,55 @@ func fsRenameFile(ctx context.Context, sourcePath, destPath string) error {
return nil
}
func deleteFile(basePath, deletePath string, recursive bool) error {
if basePath == "" || deletePath == "" {
return nil
}
isObjectDir := HasSuffix(deletePath, SlashSeparator)
basePath = pathutil.Clean(basePath)
deletePath = pathutil.Clean(deletePath)
if !strings.HasPrefix(deletePath, basePath) || deletePath == basePath {
return nil
}
var err error
if recursive {
os.RemoveAll(deletePath)
} else {
err = os.Remove(deletePath)
}
if err != nil {
switch {
case isSysErrNotEmpty(err):
// if object is a directory, but if its not empty
// return FileNotFound to indicate its an empty prefix.
if isObjectDir {
return errFileNotFound
}
// Ignore errors if the directory is not empty. The server relies on
// this functionality, and sometimes uses recursion that should not
// error on parent directories.
return nil
case osIsNotExist(err):
return errFileNotFound
case osIsPermission(err):
return errFileAccessDenied
case isSysErrIO(err):
return errFaultyDisk
default:
return err
}
}
deletePath = pathutil.Dir(deletePath)
// Delete parent directory obviously not recursively. Errors for
// parent directories shouldn't trickle down.
deleteFile(basePath, deletePath, false)
return nil
}
// fsDeleteFile is a wrapper for deleteFile(), after checking the path length.
func fsDeleteFile(ctx context.Context, basePath, deletePath string) error {
if err := checkPathLength(basePath); err != nil {

View file

@ -87,8 +87,9 @@ const (
// GlobalStaleUploadsExpiry - Expiry duration after which the uploads in multipart, tmp directory are deemed stale.
GlobalStaleUploadsExpiry = time.Hour * 24 // 24 hrs.
// GlobalStaleUploadsCleanupInterval - Cleanup interval when the stale uploads cleanup is initiated.
GlobalStaleUploadsCleanupInterval = time.Hour * 24 // 24 hrs.
GlobalStaleUploadsCleanupInterval = time.Hour * 12 // 12 hrs.
// GlobalServiceExecutionInterval - Executes the Lifecycle events.
GlobalServiceExecutionInterval = time.Hour * 24 // 24 hrs.
@ -96,7 +97,7 @@ const (
// Refresh interval to update in-memory iam config cache.
globalRefreshIAMInterval = 5 * time.Minute
// Limit of location constraint XML for unauthenticted PUT bucket operations.
// Limit of location constraint XML for unauthenticated PUT bucket operations.
maxLocationConstraintSize = 3 * humanize.MiByte
// Maximum size of default bucket encryption configuration allowed

View file

@ -23,6 +23,7 @@ import (
"io"
"os"
"path"
pathutil "path"
"strings"
"sync"
"time"
@ -35,7 +36,7 @@ func renameAllBucketMetacache(epPath string) error {
// to `.minio.sys/tmp/` for deletion.
return readDirFn(pathJoin(epPath, minioMetaBucket, bucketMetaPrefix), func(name string, typ os.FileMode) error {
if typ == os.ModeDir {
tmpMetacacheOld := pathJoin(epPath, minioMetaTmpBucket+"-old", mustGetUUID())
tmpMetacacheOld := pathutil.Join(epPath, minioMetaTmpDeletedBucket, mustGetUUID())
if err := renameAll(pathJoin(epPath, minioMetaBucket, metacachePrefixForID(name, slashSeparator)),
tmpMetacacheOld); err != nil && err != errFileNotFound {
return fmt.Errorf("unable to rename (%s -> %s) %w",

View file

@ -340,8 +340,19 @@ func testObjectAPIPutObjectStaleFiles(obj ObjectLayer, instanceType string, disk
for _, disk := range disks {
tmpMetaDir := path.Join(disk, minioMetaTmpBucket)
if !isDirEmpty(tmpMetaDir) {
t.Fatalf("%s: expected: empty, got: non-empty", minioMetaTmpBucket)
files, err := ioutil.ReadDir(tmpMetaDir)
if err != nil {
t.Fatal(err)
}
var found bool
for _, fi := range files {
if fi.Name() == ".trash" {
continue
}
found = true
}
if found {
t.Fatalf("%s: expected: empty, got: non-empty %#v", minioMetaTmpBucket, files)
}
}
}
@ -418,8 +429,17 @@ func testObjectAPIMultipartPutObjectStaleFiles(obj ObjectLayer, instanceType str
t.Errorf("%s", err)
}
if len(files) != 0 {
t.Fatalf("%s: expected: empty, got: non-empty. content: %s", tmpMetaDir, files)
var found bool
for _, fi := range files {
if fi.Name() == ".trash" {
continue
}
found = true
break
}
if found {
t.Fatalf("%s: expected: empty, got: non-empty. content: %#v", tmpMetaDir, files)
}
}
}

View file

@ -58,8 +58,11 @@ const (
mpartMetaPrefix = "multipart"
// MinIO Multipart meta prefix.
minioMetaMultipartBucket = minioMetaBucket + SlashSeparator + mpartMetaPrefix
// MinIO Tmp meta prefix.
// MinIO tmp meta prefix.
minioMetaTmpBucket = minioMetaBucket + "/tmp"
// MinIO tmp meta prefix for deleted objects.
minioMetaTmpDeletedBucket = minioMetaTmpBucket + "/.trash"
// DNS separator (period), used for bucket name validation.
dnsDelimiter = "."
// On compressed files bigger than this;

View file

@ -940,7 +940,7 @@ func (s *xlStorage) DeleteVersion(ctx context.Context, volume, path string, fi F
if !isXL2V1Format(buf) {
// Delete the meta file, if there are no more versions the
// top level parent is automatically removed.
return deleteFile(volumeDir, pathJoin(volumeDir, path), true)
return s.deleteFile(volumeDir, pathJoin(volumeDir, path), true)
}
var xlMeta xlMetaV2
@ -967,7 +967,8 @@ func (s *xlStorage) DeleteVersion(ctx context.Context, volume, path string, fi F
return err
}
if err = removeAll(filePath); err != nil {
tmpuuid := mustGetUUID()
if err = renameAll(filePath, pathutil.Join(s.diskPath, minioMetaTmpDeletedBucket, tmpuuid)); err != nil {
return err
}
}
@ -985,7 +986,7 @@ func (s *xlStorage) DeleteVersion(ctx context.Context, volume, path string, fi F
return err
}
return deleteFile(volumeDir, filePath, false)
return s.deleteFile(volumeDir, filePath, false)
}
// WriteMetadata - writes FileInfo metadata for path at `xl.meta`
@ -1828,7 +1829,7 @@ func (s *xlStorage) CheckFile(ctx context.Context, volume string, path string) e
// move up the tree, deleting empty parent directories until it finds one
// with files in it. Returns nil for a non-empty directory even when
// recursive is set to false.
func deleteFile(basePath, deletePath string, recursive bool) error {
func (s *xlStorage) deleteFile(basePath, deletePath string, recursive bool) error {
if basePath == "" || deletePath == "" {
return nil
}
@ -1841,7 +1842,8 @@ func deleteFile(basePath, deletePath string, recursive bool) error {
var err error
if recursive {
err = os.RemoveAll(deletePath)
tmpuuid := mustGetUUID()
err = renameAll(deletePath, pathutil.Join(s.diskPath, minioMetaTmpDeletedBucket, tmpuuid))
} else {
err = os.Remove(deletePath)
}
@ -1872,7 +1874,7 @@ func deleteFile(basePath, deletePath string, recursive bool) error {
// Delete parent directory obviously not recursively. Errors for
// parent directories shouldn't trickle down.
deleteFile(basePath, deletePath, false)
s.deleteFile(basePath, deletePath, false)
return nil
}
@ -1910,46 +1912,7 @@ func (s *xlStorage) Delete(ctx context.Context, volume string, path string, recu
}
// Delete file and delete parent directory as well if it's empty.
return deleteFile(volumeDir, filePath, recursive)
}
func (s *xlStorage) DeleteFileBulk(volume string, paths []string) (errs []error, err error) {
atomic.AddInt32(&s.activeIOCount, 1)
defer func() {
atomic.AddInt32(&s.activeIOCount, -1)
}()
volumeDir, err := s.getVolDir(volume)
if err != nil {
return nil, err
}
// Stat a volume entry.
_, err = os.Lstat(volumeDir)
if err != nil {
if osIsNotExist(err) {
return nil, errVolumeNotFound
} else if osIsPermission(err) {
return nil, errVolumeAccessDenied
} else if isSysErrIO(err) {
return nil, errFaultyDisk
}
return nil, err
}
errs = make([]error, len(paths))
// Following code is needed so that we retain SlashSeparator
// suffix if any in path argument.
for idx, path := range paths {
filePath := pathJoin(volumeDir, path)
errs[idx] = checkPathLength(filePath)
if errs[idx] != nil {
continue
}
// Delete file and delete parent directory as well if its empty.
errs[idx] = deleteFile(volumeDir, filePath, false)
}
return
return s.deleteFile(volumeDir, filePath, recursive)
}
// RenameData - rename source path to destination path atomically, metadata and data directory.
@ -2180,8 +2143,10 @@ func (s *xlStorage) RenameData(ctx context.Context, srcVolume, srcPath, dataDir,
// Commit data
if srcDataPath != "" {
removeAll(oldDstDataPath)
removeAll(dstDataPath)
tmpuuid := mustGetUUID()
renameAll(oldDstDataPath, pathutil.Join(s.diskPath, minioMetaTmpDeletedBucket, tmpuuid))
tmpuuid = mustGetUUID()
renameAll(dstDataPath, pathutil.Join(s.diskPath, minioMetaTmpDeletedBucket, tmpuuid))
if err = renameAll(srcDataPath, dstDataPath); err != nil {
return osErrToFileErr(err)
}
@ -2194,12 +2159,12 @@ func (s *xlStorage) RenameData(ctx context.Context, srcVolume, srcPath, dataDir,
// Remove parent dir of the source file if empty
if parentDir := pathutil.Dir(srcFilePath); isDirEmpty(parentDir) {
deleteFile(srcVolumeDir, parentDir, false)
s.deleteFile(srcVolumeDir, parentDir, false)
}
if srcDataPath != "" {
if parentDir := pathutil.Dir(srcDataPath); isDirEmpty(parentDir) {
deleteFile(srcVolumeDir, parentDir, false)
s.deleteFile(srcVolumeDir, parentDir, false)
}
}
@ -2286,7 +2251,7 @@ func (s *xlStorage) RenameFile(ctx context.Context, srcVolume, srcPath, dstVolum
// Remove parent dir of the source file if empty
if parentDir := pathutil.Dir(srcFilePath); isDirEmpty(parentDir) {
deleteFile(srcVolumeDir, parentDir, false)
s.deleteFile(srcVolumeDir, parentDir, false)
}
return nil