minio/cmd/object-api-common.go
Harshavardhana fb96779a8a Add large bucket support for erasure coded backend (#5160)
This PR implements an object layer which
combines input erasure sets of XL layers
into a unified namespace.

This object layer extends the existing
erasure coded implementation, it is assumed
in this design that providing > 16 disks is
a static configuration as well i.e if you started
the setup with 32 disks with 4 sets 8 disks per
pack then you would need to provide 4 sets always.

Some design details and restrictions:

- Objects are distributed using consistent ordering
  to a unique erasure coded layer.
- Each pack has its own dsync so locks are synchronized
  properly at pack (erasure layer).
- Each pack still has a maximum of 16 disks
  requirement, you can start with multiple
  such sets statically.
- Static sets set of disks and cannot be
  changed, there is no elastic expansion allowed.
- Static sets set of disks and cannot be
  changed, there is no elastic removal allowed.
- ListObjects() across sets can be noticeably
  slower since List happens on all servers,
  and is merged at this sets layer.

Fixes #5465
Fixes #5464
Fixes #5461
Fixes #5460
Fixes #5459
Fixes #5458
Fixes #5460
Fixes #5488
Fixes #5489
Fixes #5497
Fixes #5496
2018-02-15 17:45:57 -08:00

197 lines
5.4 KiB
Go

/*
* Minio Cloud Storage, (C) 2016, 2017 Minio, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package cmd
import (
"path"
"sync"
humanize "github.com/dustin/go-humanize"
"github.com/minio/minio/pkg/errors"
)
const (
// Block size used for all internal operations version 1.
blockSizeV1 = 10 * humanize.MiByte
// Staging buffer read size for all internal operations version 1.
readSizeV1 = 1 * humanize.MiByte
// Buckets meta prefix.
bucketMetaPrefix = "buckets"
// ETag (hex encoded md5sum) of empty string.
emptyETag = "d41d8cd98f00b204e9800998ecf8427e"
)
// Global object layer mutex, used for safely updating object layer.
var globalObjLayerMutex *sync.RWMutex
// Global object layer, only accessed by newObjectLayerFn().
var globalObjectAPI ObjectLayer
func init() {
// Initialize this once per server initialization.
globalObjLayerMutex = &sync.RWMutex{}
}
// Check if the disk is remote.
func isRemoteDisk(disk StorageAPI) bool {
_, ok := disk.(*networkStorage)
return ok
}
// Checks if the object is a directory, this logic uses
// if size == 0 and object ends with slashSeparator then
// returns true.
func isObjectDir(object string, size int64) bool {
return hasSuffix(object, slashSeparator) && size == 0
}
// Converts just bucket, object metadata into ObjectInfo datatype.
func dirObjectInfo(bucket, object string, size int64, metadata map[string]string) ObjectInfo {
// This is a special case with size as '0' and object ends with
// a slash separator, we treat it like a valid operation and
// return success.
etag := metadata["etag"]
delete(metadata, "etag")
if etag == "" {
etag = emptyETag
}
return ObjectInfo{
Bucket: bucket,
Name: object,
ModTime: UTCNow(),
ContentType: "application/octet-stream",
IsDir: true,
Size: size,
ETag: etag,
UserDefined: metadata,
}
}
func deleteBucketMetadata(bucket string, objAPI ObjectLayer) {
// Delete bucket access policy, if present - ignore any errors.
_ = removeBucketPolicy(bucket, objAPI)
// Notify all peers (including self) to update in-memory state
S3PeersUpdateBucketPolicy(bucket)
// Delete notification config, if present - ignore any errors.
_ = removeNotificationConfig(bucket, objAPI)
// Notify all peers (including self) to update in-memory state
S3PeersUpdateBucketNotification(bucket, nil)
// Delete listener config, if present - ignore any errors.
_ = removeListenerConfig(bucket, objAPI)
// Notify all peers (including self) to update in-memory state
S3PeersUpdateBucketListener(bucket, []listenerConfig{})
}
// House keeping code for FS/XL and distributed Minio setup.
func houseKeeping(storageDisks []StorageAPI) error {
var wg = &sync.WaitGroup{}
// Initialize errs to collect errors inside go-routine.
var errs = make([]error, len(storageDisks))
// Initialize all disks in parallel.
for index, disk := range storageDisks {
if disk == nil {
continue
}
// Skip remote disks.
if isRemoteDisk(disk) {
continue
}
wg.Add(1)
go func(index int, disk StorageAPI) {
// Indicate this wait group is done.
defer wg.Done()
// Cleanup all temp entries upon start.
err := cleanupDir(disk, minioMetaTmpBucket, "")
if err != nil {
if !errors.IsErrIgnored(errors.Cause(err), errDiskNotFound, errVolumeNotFound, errFileNotFound) {
errs[index] = err
}
}
}(index, disk)
}
// Wait for all cleanup to finish.
wg.Wait()
// Return upon first error.
for _, err := range errs {
if err == nil {
continue
}
return toObjectErr(err, minioMetaTmpBucket, "*")
}
// Return success here.
return nil
}
// Depending on the disk type network or local, initialize storage API.
func newStorageAPI(endpoint Endpoint) (storage StorageAPI, err error) {
if endpoint.IsLocal {
return newPosix(endpoint.Path)
}
return newStorageRPC(endpoint), nil
}
// Cleanup a directory recursively.
func cleanupDir(storage StorageAPI, volume, dirPath string) error {
var delFunc func(string) error
// Function to delete entries recursively.
delFunc = func(entryPath string) error {
if !hasSuffix(entryPath, slashSeparator) {
// Delete the file entry.
return errors.Trace(storage.DeleteFile(volume, entryPath))
}
// If it's a directory, list and call delFunc() for each entry.
entries, err := storage.ListDir(volume, entryPath)
// If entryPath prefix never existed, safe to ignore.
if err == errFileNotFound {
return nil
} else if err != nil { // For any other errors fail.
return errors.Trace(err)
} // else on success..
// Entry path is empty, just delete it.
if len(entries) == 0 {
return errors.Trace(storage.DeleteFile(volume, path.Clean(entryPath)))
}
// Recurse and delete all other entries.
for _, entry := range entries {
if err = delFunc(pathJoin(entryPath, entry)); err != nil {
return err
}
}
return nil
}
err := delFunc(retainSlash(pathJoin(dirPath)))
return err
}