heal: Add healing support for bucket, bucket metadata files. (#3252)

This patch implements healing in general but it is only used
as part of quickHeal().

Fixes #3237
This commit is contained in:
Harshavardhana 2016-11-16 16:42:23 -08:00 committed by GitHub
parent df8153859c
commit c91d3791f9
18 changed files with 700 additions and 217 deletions

View file

@ -28,7 +28,7 @@ import "reflect"
// Tests getReadDisks which returns readable disks slice from which we can
// read parallelly.
func testGetReadDisks(t *testing.T, xl xlObjects) {
func testGetReadDisks(t *testing.T, xl *xlObjects) {
d := xl.storageDisks
testCases := []struct {
index int // index argument for getReadDisks
@ -121,7 +121,7 @@ func testGetReadDisks(t *testing.T, xl xlObjects) {
// Test getOrderedDisks which returns ordered slice of disks from their
// actual distribution.
func testGetOrderedDisks(t *testing.T, xl xlObjects) {
func testGetOrderedDisks(t *testing.T, xl *xlObjects) {
disks := xl.storageDisks
distribution := []int{16, 14, 12, 10, 8, 6, 4, 2, 1, 3, 5, 7, 9, 11, 13, 15}
orderedDisks := getOrderedDisks(distribution, disks)
@ -232,7 +232,7 @@ func TestErasureReadUtils(t *testing.T) {
t.Fatal(err)
}
defer removeRoots(disks)
xl := objLayer.(xlObjects)
xl := objLayer.(*xlObjects)
testGetReadDisks(t, xl)
testGetOrderedDisks(t, xl)
}

View file

@ -304,13 +304,11 @@ func loadNotificationConfig(bucket string, objAPI ObjectLayer) (*notificationCon
// Construct the notification config path.
notificationConfigPath := path.Join(bucketConfigPrefix, bucket, bucketNotificationConfig)
objInfo, err := objAPI.GetObjectInfo(minioMetaBucket, notificationConfigPath)
err = errorCause(err)
if err != nil {
// 'notification.xml' not found return
// 'errNoSuchNotifications'. This is default when no
// bucket notifications are found on the bucket.
switch err.(type) {
case ObjectNotFound:
if isErrObjectNotFound(err) {
return nil, errNoSuchNotifications
}
errorIf(err, "Unable to load bucket-notification for bucket %s", bucket)
@ -319,13 +317,11 @@ func loadNotificationConfig(bucket string, objAPI ObjectLayer) (*notificationCon
}
var buffer bytes.Buffer
err = objAPI.GetObject(minioMetaBucket, notificationConfigPath, 0, objInfo.Size, &buffer)
err = errorCause(err)
if err != nil {
// 'notification.xml' not found return
// 'errNoSuchNotifications'. This is default when no
// bucket notifications are found on the bucket.
switch err.(type) {
case ObjectNotFound:
if isErrObjectNotFound(err) {
return nil, errNoSuchNotifications
}
errorIf(err, "Unable to load bucket-notification for bucket %s", bucket)
@ -357,13 +353,11 @@ func loadListenerConfig(bucket string, objAPI ObjectLayer) ([]listenerConfig, er
// Construct the notification config path.
listenerConfigPath := path.Join(bucketConfigPrefix, bucket, bucketListenerConfig)
objInfo, err := objAPI.GetObjectInfo(minioMetaBucket, listenerConfigPath)
err = errorCause(err)
if err != nil {
// 'listener.json' not found return
// 'errNoSuchNotifications'. This is default when no
// bucket notifications are found on the bucket.
switch err.(type) {
case ObjectNotFound:
if isErrObjectNotFound(err) {
return nil, errNoSuchNotifications
}
errorIf(err, "Unable to load bucket-listeners for bucket %s", bucket)
@ -372,13 +366,11 @@ func loadListenerConfig(bucket string, objAPI ObjectLayer) ([]listenerConfig, er
}
var buffer bytes.Buffer
err = objAPI.GetObject(minioMetaBucket, listenerConfigPath, 0, objInfo.Size, &buffer)
err = errorCause(err)
if err != nil {
// 'notification.xml' not found return
// 'errNoSuchNotifications'. This is default when no
// bucket listeners are found on the bucket.
switch err.(type) {
case ObjectNotFound:
if isErrObjectNotFound(err) {
return nil, errNoSuchNotifications
}
errorIf(err, "Unable to load bucket-listeners for bucket %s", bucket)

View file

@ -223,7 +223,7 @@ func genFormatXLInvalidDisksOrder() []*formatConfigV1 {
func prepareFormatXLHealFreshDisks(obj ObjectLayer) ([]StorageAPI, error) {
var err error
xl := obj.(xlObjects)
xl := obj.(*xlObjects)
err = obj.MakeBucket("bucket")
if err != nil {
@ -354,7 +354,7 @@ func TestFormatXLHealCorruptedDisks(t *testing.T) {
t.Fatal(err)
}
xl := obj.(xlObjects)
xl := obj.(*xlObjects)
err = obj.MakeBucket("bucket")
if err != nil {
@ -427,7 +427,7 @@ func TestFormatXLReorderByInspection(t *testing.T) {
t.Fatal(err)
}
xl := obj.(xlObjects)
xl := obj.(*xlObjects)
err = obj.MakeBucket("bucket")
if err != nil {
@ -609,7 +609,7 @@ func TestInitFormatXLErrors(t *testing.T) {
if err != nil {
t.Fatal(err)
}
xl := obj.(xlObjects)
xl := obj.(*xlObjects)
testStorageDisks := make([]StorageAPI, 16)
@ -715,7 +715,7 @@ func TestLoadFormatXLErrs(t *testing.T) {
if err != nil {
t.Fatal(err)
}
xl := obj.(xlObjects)
xl := obj.(*xlObjects)
xl.storageDisks[11] = nil
@ -745,7 +745,7 @@ func TestLoadFormatXLErrs(t *testing.T) {
if err != nil {
t.Fatal(err)
}
xl = obj.(xlObjects)
xl = obj.(*xlObjects)
// disks 0..10 returns disk not found
for i := 0; i <= 10; i++ {
@ -773,7 +773,7 @@ func TestLoadFormatXLErrs(t *testing.T) {
if err != nil {
t.Fatal(err)
}
xl = obj.(xlObjects)
xl = obj.(*xlObjects)
// disks 0..10 returns unformatted disk
for i := 0; i <= 10; i++ {
@ -799,7 +799,7 @@ func TestLoadFormatXLErrs(t *testing.T) {
if err != nil {
t.Fatal(err)
}
xl = obj.(xlObjects)
xl = obj.(*xlObjects)
// disks 0..15 returns is nil (disk not found)
for i := 0; i < 16; i++ {
@ -812,6 +812,12 @@ func TestLoadFormatXLErrs(t *testing.T) {
// Tests for healFormatXLCorruptedDisks() with cases which lead to errors
func TestHealFormatXLCorruptedDisksErrs(t *testing.T) {
root, err := newTestConfig("us-east-1")
if err != nil {
t.Fatal(err)
}
defer removeAll(root)
nDisks := 16
fsDirs, err := getRandomDisks(nDisks)
if err != nil {
@ -828,7 +834,7 @@ func TestHealFormatXLCorruptedDisksErrs(t *testing.T) {
if err != nil {
t.Fatal(err)
}
xl := obj.(xlObjects)
xl := obj.(*xlObjects)
if err = healFormatXLCorruptedDisks(xl.storageDisks); err != nil {
t.Fatal("Got an unexpected error: ", err)
}
@ -850,7 +856,7 @@ func TestHealFormatXLCorruptedDisksErrs(t *testing.T) {
if err != nil {
t.Fatal(err)
}
xl = obj.(xlObjects)
xl = obj.(*xlObjects)
for i := 0; i <= 15; i++ {
xl.storageDisks[i] = nil
}
@ -874,7 +880,7 @@ func TestHealFormatXLCorruptedDisksErrs(t *testing.T) {
if err != nil {
t.Fatal(err)
}
xl = obj.(xlObjects)
xl = obj.(*xlObjects)
posixDisk, ok := xl.storageDisks[0].(*posix)
if !ok {
t.Fatal("storage disk is not *posix type")
@ -900,7 +906,7 @@ func TestHealFormatXLCorruptedDisksErrs(t *testing.T) {
if err != nil {
t.Fatal(err)
}
xl = obj.(xlObjects)
xl = obj.(*xlObjects)
xl.storageDisks[0] = nil
if err = healFormatXLCorruptedDisks(xl.storageDisks); err != nil {
t.Fatal("Got an unexpected error: ", err)
@ -922,7 +928,7 @@ func TestHealFormatXLCorruptedDisksErrs(t *testing.T) {
if err != nil {
t.Fatal(err)
}
xl = obj.(xlObjects)
xl = obj.(*xlObjects)
for i := 0; i <= 15; i++ {
if err = xl.storageDisks[i].DeleteFile(".minio.sys", "format.json"); err != nil {
t.Fatal(err)
@ -948,7 +954,7 @@ func TestHealFormatXLCorruptedDisksErrs(t *testing.T) {
if err != nil {
t.Fatal(err)
}
xl = obj.(xlObjects)
xl = obj.(*xlObjects)
for i := 0; i <= 15; i++ {
if err = xl.storageDisks[i].AppendFile(".minio.sys", "format.json", []byte("corrupted data")); err != nil {
t.Fatal(err)
@ -962,6 +968,12 @@ func TestHealFormatXLCorruptedDisksErrs(t *testing.T) {
// Tests for healFormatXLFreshDisks() with cases which lead to errors
func TestHealFormatXLFreshDisksErrs(t *testing.T) {
root, err := newTestConfig("us-east-1")
if err != nil {
t.Fatal(err)
}
defer removeAll(root)
nDisks := 16
fsDirs, err := getRandomDisks(nDisks)
if err != nil {
@ -978,7 +990,7 @@ func TestHealFormatXLFreshDisksErrs(t *testing.T) {
if err != nil {
t.Fatal(err)
}
xl := obj.(xlObjects)
xl := obj.(*xlObjects)
if err = healFormatXLFreshDisks(xl.storageDisks); err != nil {
t.Fatal("Got an unexpected error: ", err)
}
@ -999,7 +1011,7 @@ func TestHealFormatXLFreshDisksErrs(t *testing.T) {
if err != nil {
t.Fatal(err)
}
xl = obj.(xlObjects)
xl = obj.(*xlObjects)
for i := 0; i <= 15; i++ {
xl.storageDisks[i] = nil
}
@ -1023,7 +1035,7 @@ func TestHealFormatXLFreshDisksErrs(t *testing.T) {
if err != nil {
t.Fatal(err)
}
xl = obj.(xlObjects)
xl = obj.(*xlObjects)
posixDisk, ok := xl.storageDisks[0].(*posix)
if !ok {
t.Fatal("storage disk is not *posix type")
@ -1049,7 +1061,7 @@ func TestHealFormatXLFreshDisksErrs(t *testing.T) {
if err != nil {
t.Fatal(err)
}
xl = obj.(xlObjects)
xl = obj.(*xlObjects)
xl.storageDisks[0] = nil
if err = healFormatXLFreshDisks(xl.storageDisks); err != nil {
t.Fatal("Got an unexpected error: ", err)
@ -1071,7 +1083,7 @@ func TestHealFormatXLFreshDisksErrs(t *testing.T) {
if err != nil {
t.Fatal(err)
}
xl = obj.(xlObjects)
xl = obj.(*xlObjects)
for i := 0; i <= 15; i++ {
if err = xl.storageDisks[i].DeleteFile(".minio.sys", "format.json"); err != nil {
t.Fatal(err)
@ -1081,33 +1093,6 @@ func TestHealFormatXLFreshDisksErrs(t *testing.T) {
t.Fatal("Got an unexpected error: ", err)
}
removeRoots(fsDirs)
fsDirs, err = getRandomDisks(nDisks)
if err != nil {
t.Fatal(err)
}
endpoints, err = parseStorageEndpoints(fsDirs)
if err != nil {
t.Fatal(err)
}
// Remove format.json of all disks
obj, _, err = initObjectLayer(endpoints)
if err != nil {
t.Fatal(err)
}
xl = obj.(xlObjects)
for i := 0; i <= 15; i++ {
if err = xl.storageDisks[i].DeleteFile(".minio.sys", "format.json"); err != nil {
t.Fatal(err)
}
}
if err = healFormatXLFreshDisks(xl.storageDisks); err != nil {
t.Fatal("Got an unexpected error: ", err)
}
removeRoots(fsDirs)
}
// Tests for isFormatFound()

View file

@ -324,3 +324,23 @@ type NotImplemented struct{}
func (e NotImplemented) Error() string {
return "Not Implemented"
}
// Check if error type is ObjectNameInvalid.
func isErrObjectNameInvalid(err error) bool {
err = errorCause(err)
switch err.(type) {
case ObjectNameInvalid:
return true
}
return false
}
// Check if error type is ObjectNotFound.
func isErrObjectNotFound(err error) bool {
err = errorCause(err)
switch err.(type) {
case ObjectNotFound:
return true
}
return false
}

View file

@ -710,13 +710,11 @@ func testNonExistantObjectInBucket(obj ObjectLayer, instanceType string, c TestE
if err == nil {
c.Fatalf("%s: Expected error but found nil", instanceType)
}
err = errorCause(err)
switch err := err.(type) {
case ObjectNotFound:
if isErrObjectNotFound(err) {
if err.Error() != "Object not found: bucket#dir1" {
c.Errorf("%s: Expected the Error message to be `%s`, but instead found `%s`", instanceType, "Object not found: bucket#dir1", err.Error())
}
default:
} else {
if err.Error() != "fails" {
c.Errorf("%s: Expected the Error message to be `%s`, but instead found it to be `%s`", instanceType, "fails", err.Error())
}
@ -744,32 +742,37 @@ func testGetDirectoryReturnsObjectNotFound(obj ObjectLayer, instanceType string,
}
_, err = obj.GetObjectInfo("bucket", "dir1")
err = errorCause(err)
switch err := err.(type) {
case ObjectNotFound:
if err.Bucket != "bucket" {
c.Errorf("%s: Expected the bucket name in the error message to be `%s`, but instead found `%s`", instanceType, "bucket", err.Bucket)
if isErrObjectNotFound(err) {
err = errorCause(err)
err1 := err.(ObjectNotFound)
if err1.Bucket != "bucket" {
c.Errorf("%s: Expected the bucket name in the error message to be `%s`, but instead found `%s`",
instanceType, "bucket", err1.Bucket)
}
if err.Object != "dir1" {
c.Errorf("%s: Expected the object name in the error message to be `%s`, but instead found `%s`", instanceType, "dir1", err.Object)
if err1.Object != "dir1" {
c.Errorf("%s: Expected the object name in the error message to be `%s`, but instead found `%s`",
instanceType, "dir1", err1.Object)
}
default:
} else {
if err.Error() != "ObjectNotFound" {
c.Errorf("%s: Expected the error message to be `%s`, but instead found `%s`", instanceType, "ObjectNotFound", err.Error())
c.Errorf("%s: Expected the error message to be `%s`, but instead found `%s`", instanceType,
"ObjectNotFound", err.Error())
}
}
_, err = obj.GetObjectInfo("bucket", "dir1/")
err = errorCause(err)
switch err := err.(type) {
case ObjectNameInvalid:
if err.Bucket != "bucket" {
c.Errorf("%s: Expected the bucket name in the error message to be `%s`, but instead found `%s`", instanceType, "bucket", err.Bucket)
if isErrObjectNameInvalid(err) {
err = errorCause(err)
err1 := err.(ObjectNameInvalid)
if err1.Bucket != "bucket" {
c.Errorf("%s: Expected the bucket name in the error message to be `%s`, but instead found `%s`",
instanceType, "bucket", err1.Bucket)
}
if err.Object != "dir1/" {
c.Errorf("%s: Expected the object name in the error message to be `%s`, but instead found `%s`", instanceType, "dir1/", err.Object)
if err1.Object != "dir1/" {
c.Errorf("%s: Expected the object name in the error message to be `%s`, but instead found `%s`",
instanceType, "dir1/", err1.Object)
}
default:
} else {
// force a failure with a line number.
if err.Error() != "ObjectNotFound" {
c.Errorf("%s: Expected the error message to be `%s`, but instead found `%s`", instanceType, "ObjectNotFound", err.Error())

View file

@ -24,15 +24,6 @@ import (
"github.com/minio/mc/pkg/console"
)
// Channel where minioctl heal handler would notify if it were successful. This
// would be used by waitForFormattingDisks routine to check if it's worth
// retrying loadAllFormats.
var globalWakeupCh chan struct{}
func init() {
globalWakeupCh = make(chan struct{}, 1)
}
/*
Following table lists different possible states the backend could be in.
@ -309,20 +300,5 @@ func waitForFormatDisks(firstDisk bool, endpoints []*url.URL, storageDisks []Sto
}
// Start retry loop retrying until disks are formatted properly, until we have reached
// a conditional quorum of formatted disks.
err = retryFormattingDisks(firstDisk, endpoints, storageDisks)
if err != nil {
return err
}
if firstDisk {
// Notify every one else that they can try init again.
for _, storage := range storageDisks {
switch store := storage.(type) {
// Wake up remote storage servers to initiate init again.
case networkStorage:
var reply GenericReply
_ = store.rpcClient.Call("Storage.TryInitHandler", &GenericArgs{}, &reply)
}
}
}
return nil
return retryFormattingDisks(firstDisk, endpoints, storageDisks)
}

View file

@ -96,10 +96,6 @@ func newRetryTimer(unit time.Duration, cap time.Duration, jitter float64, doneCh
// Attempts starts.
case attemptCh <- nextBackoff:
nextBackoff++
case <-globalWakeupCh:
// Reset nextBackoff to reduce the subsequent wait and re-read
// format.json from all disks again.
nextBackoff = 0
case <-doneCh:
// Stop the routine.
return

View file

@ -212,18 +212,6 @@ func (s *storageServer) RenameFileHandler(args *RenameFileArgs, reply *GenericRe
return s.storage.RenameFile(args.SrcVol, args.SrcPath, args.DstVol, args.DstPath)
}
// TryInitHandler - wake up storage server.
func (s *storageServer) TryInitHandler(args *GenericArgs, reply *GenericReply) error {
if !isRPCTokenValid(args.Token) {
return errInvalidToken
}
go func() {
globalWakeupCh <- struct{}{}
}()
*reply = GenericReply{}
return nil
}
// Initialize new storage rpc.
func newRPCServer(srvConfig serverCmdConfig) (servers []*storageServer, err error) {
for _, ep := range srvConfig.endpoints {

View file

@ -90,23 +90,6 @@ func errorIfInvalidToken(t *testing.T, err error) {
}
}
func TestStorageRPCTryInitHandler(t *testing.T) {
st := createTestStorageServer(t)
defer removeRoots(st.diskDirs)
defer removeAll(st.configDir)
storageRPC := st.stServer
timestamp := time.Now().UTC()
tryArgs := &GenericArgs{
Token: st.token,
Timestamp: timestamp,
}
tryReply := &GenericReply{}
err := storageRPC.TryInitHandler(tryArgs, tryReply)
if err != nil {
t.Errorf("TryInitHandler failed with %s", err)
}
}
func TestStorageRPCInvalidToken(t *testing.T) {
st := createTestStorageServer(t)
defer removeRoots(st.diskDirs)
@ -217,10 +200,4 @@ func TestStorageRPCInvalidToken(t *testing.T) {
renameReply := &GenericReply{}
err = storageRPC.RenameFileHandler(renameArgs, renameReply)
errorIfInvalidToken(t, err)
// 14. TryInitHandler
tryArgs := &badga
tryReply := &GenericReply{}
err = storageRPC.TryInitHandler(tryArgs, tryReply)
errorIfInvalidToken(t, err)
}

View file

@ -1573,7 +1573,7 @@ func initObjectLayer(endpoints []*url.URL) (ObjectLayer, []StorageAPI, error) {
// Disabling the cache for integration tests.
// Should use the object layer tests for validating cache.
if xl, ok := objLayer.(xlObjects); ok {
if xl, ok := objLayer.(*xlObjects); ok {
xl.objCacheEnabled = false
}

View file

@ -272,15 +272,12 @@ func (web *webAPIHandlers) RemoveObject(r *http.Request, args *RemoveObjectArgs,
return &json2.Error{Message: errAuthentication.Error()}
}
if err := objectAPI.DeleteObject(args.BucketName, args.ObjectName); err != nil {
objErr := errorCause(err)
switch objErr.(type) {
case ObjectNotFound:
if isErrObjectNotFound(err) {
// Ignore object not found error.
reply.UIVersion = miniobrowser.UIVersion
return nil
default:
return &json2.Error{Message: err.Error()}
}
return &json2.Error{Message: err.Error()}
}
// Notify object deleted event.

View file

@ -1338,7 +1338,7 @@ func TestWebObjectLayerFaultyDisks(t *testing.T) {
defer removeRoots(fsDirs)
// Set faulty disks to XL backend
xl := obj.(xlObjects)
xl := obj.(*xlObjects)
for i, d := range xl.storageDisks {
xl.storageDisks[i] = newNaughtyDisk(d.(*posix), nil, errFaultyDisk)
}

View file

@ -64,7 +64,7 @@ func (xl xlObjects) MakeBucket(bucket string) error {
// Do we have write quorum?.
if !isDiskQuorum(dErrs, xl.writeQuorum) {
// Purge successfully created buckets if we don't have writeQuorum.
xl.undoMakeBucket(bucket)
undoMakeBucket(xl.storageDisks, bucket)
return toObjectErr(traceError(errXLWriteQuorum), bucket)
}
@ -100,11 +100,11 @@ func (xl xlObjects) undoDeleteBucket(bucket string) {
}
// undo make bucket operation upon quorum failure.
func (xl xlObjects) undoMakeBucket(bucket string) {
func undoMakeBucket(storageDisks []StorageAPI, bucket string) {
// Initialize sync waitgroup.
var wg = &sync.WaitGroup{}
// Undo previous make bucket entry on all underlying storage disks.
for index, disk := range xl.storageDisks {
for index, disk := range storageDisks {
if disk == nil {
continue
}
@ -214,7 +214,7 @@ func (xl xlObjects) listBuckets() (bucketsInfo []BucketInfo, err error) {
})
}
// For buckets info empty, loop once again to check
// if we have, can happen if disks are down.
// if we have, can happen if disks were down.
if len(bucketsInfo) == 0 {
continue
}

View file

@ -16,9 +16,47 @@
package cmd
import "sync"
import (
"fmt"
"path"
"sync"
)
// Heals a bucket if it doesn't exist on one of the disks.
// healFormatXL - heals missing `format.json` on freshly or corrupted
// disks (missing format.json but does have erasure coded data in it).
func healFormatXL(storageDisks []StorageAPI) (err error) {
// Attempt to load all `format.json`.
formatConfigs, sErrs := loadAllFormats(storageDisks)
// Generic format check validates
// if (no quorum) return error
// if (disks not recognized) // Always error.
if err = genericFormatCheck(formatConfigs, sErrs); err != nil {
return err
}
// Handles different cases properly.
switch reduceFormatErrs(sErrs, len(storageDisks)) {
case errCorruptedFormat:
if err = healFormatXLCorruptedDisks(storageDisks); err != nil {
return fmt.Errorf("Unable to repair corrupted format, %s", err)
}
case errSomeDiskUnformatted:
// All drives online but some report missing format.json.
if err = healFormatXLFreshDisks(storageDisks); err != nil {
// There was an unexpected unrecoverable error during healing.
return fmt.Errorf("Unable to heal backend %s", err)
}
case errSomeDiskOffline:
// FIXME: in future.
return fmt.Errorf("Unable to initialize format %s and %s", errSomeDiskOffline, errSomeDiskUnformatted)
}
return nil
}
// Heals a bucket if it doesn't exist on one of the disks, additionally
// also heals the missing entries for bucket metadata files
// `policy.json, notification.xml, listeners.json`.
func (xl xlObjects) HealBucket(bucket string) error {
// Verify if bucket is valid.
if !IsValidBucketName(bucket) {
@ -30,8 +68,17 @@ func (xl xlObjects) HealBucket(bucket string) error {
return traceError(BucketNotFound{Bucket: bucket})
}
// Heal bucket - create buckets on disks where it does not exist.
// Heal bucket.
if err := healBucket(xl.storageDisks, bucket, xl.writeQuorum); err != nil {
return err
}
// Proceed to heal bucket metadata.
return healBucketMetadata(xl.storageDisks, bucket)
}
func healBucket(storageDisks []StorageAPI, bucket string, writeQuorum int) error {
// Heal bucket - create buckets on disks where it does not exist.
bucketLock := nsMutex.NewNSLock(bucket, "")
bucketLock.Lock()
defer bucketLock.Unlock()
@ -40,10 +87,10 @@ func (xl xlObjects) HealBucket(bucket string) error {
var wg = &sync.WaitGroup{}
// Initialize list of errors.
var dErrs = make([]error, len(xl.storageDisks))
var dErrs = make([]error, len(storageDisks))
// Make a volume entry on all underlying storage disks.
for index, disk := range xl.storageDisks {
for index, disk := range storageDisks {
if disk == nil {
dErrs[index] = traceError(errDiskNotFound)
continue
@ -68,9 +115,9 @@ func (xl xlObjects) HealBucket(bucket string) error {
wg.Wait()
// Do we have write quorum?.
if !isDiskQuorum(dErrs, xl.writeQuorum) {
if !isDiskQuorum(dErrs, writeQuorum) {
// Purge successfully created buckets if we don't have writeQuorum.
xl.undoMakeBucket(bucket)
undoMakeBucket(storageDisks, bucket)
return toObjectErr(traceError(errXLWriteQuorum), bucket)
}
@ -85,26 +132,101 @@ func (xl xlObjects) HealBucket(bucket string) error {
return nil
}
// HealObject heals a given object for all its missing entries.
// FIXME: If an object object was deleted and one disk was down, and later the disk comes back
// up again, heal on the object should delete it.
func (xl xlObjects) HealObject(bucket, object string) error {
// Verify if bucket is valid.
if !IsValidBucketName(bucket) {
return traceError(BucketNameInvalid{Bucket: bucket})
// Heals all the metadata associated for a given bucket, this function
// heals `policy.json`, `notification.xml` and `listeners.json`.
func healBucketMetadata(storageDisks []StorageAPI, bucket string) error {
healBucketMetaFn := func(metaPath string) error {
metaLock := nsMutex.NewNSLock(minioMetaBucket, metaPath)
metaLock.RLock()
defer metaLock.RUnlock()
// Heals the metaPath.
if err := healObject(storageDisks, minioMetaBucket, metaPath); err != nil && !isErrObjectNotFound(err) {
return err
} // Success.
return nil
}
// Verify if object is valid.
if !IsValidObjectName(object) {
return traceError(ObjectNameInvalid{Bucket: bucket, Object: object})
// Heal `policy.json` for missing entries, ignores if `policy.json` is not found.
policyPath := pathJoin(bucketConfigPrefix, bucket, policyJSON)
if err := healBucketMetaFn(policyPath); err != nil {
return err
}
// Lock the object before healing.
objectLock := nsMutex.NewNSLock(bucket, object)
objectLock.RLock()
defer objectLock.RUnlock()
// Heal `notification.xml` for missing entries, ignores if `notification.xml` is not found.
nConfigPath := path.Join(bucketConfigPrefix, bucket, bucketNotificationConfig)
if err := healBucketMetaFn(nConfigPath); err != nil {
return err
}
partsMetadata, errs := readAllXLMetadata(xl.storageDisks, bucket, object)
// Heal `listeners.json` for missing entries, ignores if `listeners.json` is not found.
lConfigPath := path.Join(bucketConfigPrefix, bucket, bucketListenerConfig)
return healBucketMetaFn(lConfigPath)
}
// listBucketNames list all bucket names from all disks to heal.
func listBucketNames(storageDisks []StorageAPI) (bucketNames map[string]struct{}, err error) {
bucketNames = make(map[string]struct{})
for _, disk := range storageDisks {
if disk == nil {
continue
}
var volsInfo []VolInfo
volsInfo, err = disk.ListVols()
if err == nil {
for _, volInfo := range volsInfo {
// StorageAPI can send volume names which are
// incompatible with buckets, handle it and skip them.
if !IsValidBucketName(volInfo.Name) {
continue
}
// Ignore the volume special bucket.
if volInfo.Name == minioMetaBucket {
continue
}
bucketNames[volInfo.Name] = struct{}{}
}
continue
}
// Ignore any disks not found.
if isErrIgnored(err, bucketMetadataOpIgnoredErrs) {
continue
}
break
}
return bucketNames, err
}
// This function is meant for all the healing that needs to be done
// during startup i.e healing of buckets, bucket metadata (policy.json,
// notification.xml, listeners.json) etc. Currently this function
// supports quick healing of buckets, bucket metadata.
//
// TODO :-
// - add support for healing dangling `uploads.json`.
// - add support for healing dangling `xl.json`.
func quickHeal(storageDisks []StorageAPI, writeQuorum int) error {
// List all bucket names from all disks.
bucketNames, err := listBucketNames(storageDisks)
if err != nil {
return err
}
// All bucket names and bucket metadata should be healed.
for bucketName := range bucketNames {
// Heal bucket and then proceed to heal bucket metadata.
if err = healBucket(storageDisks, bucketName, writeQuorum); err == nil {
if err = healBucketMetadata(storageDisks, bucketName); err == nil {
continue
}
return err
}
return err
}
return nil
}
// Heals an object only the corrupted/missing erasure blocks.
func healObject(storageDisks []StorageAPI, bucket string, object string) error {
partsMetadata, errs := readAllXLMetadata(storageDisks, bucket, object)
if err := reduceErrs(errs, nil); err != nil {
return toObjectErr(err, bucket, object)
}
@ -115,9 +237,9 @@ func (xl xlObjects) HealObject(bucket, object string) error {
}
// List of disks having latest version of the object.
latestDisks, modTime := listOnlineDisks(xl.storageDisks, partsMetadata, errs)
latestDisks, modTime := listOnlineDisks(storageDisks, partsMetadata, errs)
// List of disks having outdated version of the object or missing object.
outDatedDisks := outDatedDisks(xl.storageDisks, partsMetadata, errs)
outDatedDisks := outDatedDisks(storageDisks, partsMetadata, errs)
// Latest xlMetaV1 for reference.
latestMeta := pickValidXLMeta(partsMetadata, modTime)
@ -217,3 +339,27 @@ func (xl xlObjects) HealObject(bucket, object string) error {
}
return nil
}
// HealObject heals a given object for all its missing entries.
// FIXME: If an object object was deleted and one disk was down,
// and later the disk comes back up again, heal on the object
// should delete it.
func (xl xlObjects) HealObject(bucket, object string) error {
// Verify if bucket is valid.
if !IsValidBucketName(bucket) {
return traceError(BucketNameInvalid{Bucket: bucket})
}
// Verify if object is valid.
if !IsValidObjectName(object) {
return traceError(ObjectNameInvalid{Bucket: bucket, Object: object})
}
// Lock the object before healing.
objectLock := nsMutex.NewNSLock(bucket, object)
objectLock.RLock()
defer objectLock.RUnlock()
// Heal the object.
return healObject(xl.storageDisks, bucket, object)
}

425
cmd/xl-v1-healing_test.go Normal file
View file

@ -0,0 +1,425 @@
/*
* Minio Cloud Storage, (C) 2016 Minio, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package cmd
import (
"fmt"
"testing"
)
// Tests healing of format XL.
func TestHealFormatXL(t *testing.T) {
root, err := newTestConfig("us-east-1")
if err != nil {
t.Fatal(err)
}
defer removeAll(root)
nDisks := 16
fsDirs, err := getRandomDisks(nDisks)
if err != nil {
t.Fatal(err)
}
endpoints, err := parseStorageEndpoints(fsDirs)
if err != nil {
t.Fatal(err)
}
// Everything is fine, should return nil
obj, _, err := initObjectLayer(endpoints)
if err != nil {
t.Fatal(err)
}
xl := obj.(*xlObjects)
if err = healFormatXL(xl.storageDisks); err != nil {
t.Fatal("Got an unexpected error: ", err)
}
removeRoots(fsDirs)
fsDirs, err = getRandomDisks(nDisks)
if err != nil {
t.Fatal(err)
}
endpoints, err = parseStorageEndpoints(fsDirs)
if err != nil {
t.Fatal(err)
}
// Disks 0..15 are nil
obj, _, err = initObjectLayer(endpoints)
if err != nil {
t.Fatal(err)
}
xl = obj.(*xlObjects)
for i := 0; i <= 15; i++ {
xl.storageDisks[i] = nil
}
if err = healFormatXL(xl.storageDisks); err != errXLReadQuorum {
t.Fatal("Got an unexpected error: ", err)
}
removeRoots(fsDirs)
fsDirs, err = getRandomDisks(nDisks)
if err != nil {
t.Fatal(err)
}
endpoints, err = parseStorageEndpoints(fsDirs)
if err != nil {
t.Fatal(err)
}
// One disk returns Faulty Disk
obj, _, err = initObjectLayer(endpoints)
if err != nil {
t.Fatal(err)
}
xl = obj.(*xlObjects)
for i := range xl.storageDisks {
posixDisk, ok := xl.storageDisks[i].(*posix)
if !ok {
t.Fatal("storage disk is not *posix type")
}
xl.storageDisks[i] = newNaughtyDisk(posixDisk, nil, errDiskFull)
}
if err = healFormatXL(xl.storageDisks); err != errXLReadQuorum {
t.Fatal("Got an unexpected error: ", err)
}
removeRoots(fsDirs)
fsDirs, err = getRandomDisks(nDisks)
if err != nil {
t.Fatal(err)
}
endpoints, err = parseStorageEndpoints(fsDirs)
if err != nil {
t.Fatal(err)
}
// One disk is not found, heal corrupted disks should return nil
obj, _, err = initObjectLayer(endpoints)
if err != nil {
t.Fatal(err)
}
xl = obj.(*xlObjects)
xl.storageDisks[0] = nil
if err = healFormatXL(xl.storageDisks); err != nil {
t.Fatal("Got an unexpected error: ", err)
}
removeRoots(fsDirs)
fsDirs, err = getRandomDisks(nDisks)
if err != nil {
t.Fatal(err)
}
endpoints, err = parseStorageEndpoints(fsDirs)
if err != nil {
t.Fatal(err)
}
// Remove format.json of all disks
obj, _, err = initObjectLayer(endpoints)
if err != nil {
t.Fatal(err)
}
xl = obj.(*xlObjects)
for i := 0; i <= 15; i++ {
if err = xl.storageDisks[i].DeleteFile(".minio.sys", "format.json"); err != nil {
t.Fatal(err)
}
}
if err = healFormatXL(xl.storageDisks); err != nil {
t.Fatal("Got an unexpected error: ", err)
}
removeRoots(fsDirs)
fsDirs, err = getRandomDisks(nDisks)
if err != nil {
t.Fatal(err)
}
endpoints, err = parseStorageEndpoints(fsDirs)
if err != nil {
t.Fatal(err)
}
// Corrupted format json in one disk
obj, _, err = initObjectLayer(endpoints)
if err != nil {
t.Fatal(err)
}
xl = obj.(*xlObjects)
for i := 0; i <= 15; i++ {
if err = xl.storageDisks[i].AppendFile(".minio.sys", "format.json", []byte("corrupted data")); err != nil {
t.Fatal(err)
}
}
if err = healFormatXL(xl.storageDisks); err == nil {
t.Fatal("Should get a json parsing error, ")
}
removeRoots(fsDirs)
fsDirs, err = getRandomDisks(nDisks)
if err != nil {
t.Fatal(err)
}
endpoints, err = parseStorageEndpoints(fsDirs)
if err != nil {
t.Fatal(err)
}
// Remove format.json on 3 disks.
obj, _, err = initObjectLayer(endpoints)
if err != nil {
t.Fatal(err)
}
xl = obj.(*xlObjects)
for i := 0; i <= 2; i++ {
if err = xl.storageDisks[i].DeleteFile(".minio.sys", "format.json"); err != nil {
t.Fatal(err)
}
}
if err = healFormatXL(xl.storageDisks); err != nil {
t.Fatal("Got an unexpected error: ", err)
}
removeRoots(fsDirs)
fsDirs, err = getRandomDisks(nDisks)
if err != nil {
t.Fatal(err)
}
endpoints, err = parseStorageEndpoints(fsDirs)
if err != nil {
t.Fatal(err)
}
// One disk is not found, heal corrupted disks should return nil
obj, _, err = initObjectLayer(endpoints)
if err != nil {
t.Fatal(err)
}
xl = obj.(*xlObjects)
for i := 0; i <= 2; i++ {
if err = xl.storageDisks[i].DeleteFile(".minio.sys", "format.json"); err != nil {
t.Fatal(err)
}
}
posixDisk, ok := xl.storageDisks[3].(*posix)
if !ok {
t.Fatal("storage disk is not *posix type")
}
xl.storageDisks[3] = newNaughtyDisk(posixDisk, nil, errDiskNotFound)
expectedErr := fmt.Errorf("Unable to initialize format %s and %s", errSomeDiskOffline, errSomeDiskUnformatted)
if err = healFormatXL(xl.storageDisks); err != nil {
if err.Error() != expectedErr.Error() {
t.Fatal("Got an unexpected error: ", err)
}
}
removeRoots(fsDirs)
fsDirs, err = getRandomDisks(nDisks)
if err != nil {
t.Fatal(err)
}
endpoints, err = parseStorageEndpoints(fsDirs)
if err != nil {
t.Fatal(err)
}
// One disk is not found, heal corrupted disks should return nil
obj, _, err = initObjectLayer(endpoints)
if err != nil {
t.Fatal(err)
}
xl = obj.(*xlObjects)
if err = obj.MakeBucket(getRandomBucketName()); err != nil {
t.Fatal(err)
}
for i := 0; i <= 2; i++ {
if err = xl.storageDisks[i].DeleteFile(".minio.sys", "format.json"); err != nil {
t.Fatal(err)
}
}
if err = healFormatXL(xl.storageDisks); err != nil {
t.Fatal("Got an unexpected error: ", err)
}
removeRoots(fsDirs)
}
// Tests undoes and validates if the undoing completes successfully.
func TestUndoMakeBucket(t *testing.T) {
root, err := newTestConfig("us-east-1")
if err != nil {
t.Fatal(err)
}
defer removeAll(root)
nDisks := 16
fsDirs, err := getRandomDisks(nDisks)
if err != nil {
t.Fatal(err)
}
defer removeRoots(fsDirs)
endpoints, err := parseStorageEndpoints(fsDirs)
if err != nil {
t.Fatal(err)
}
// Remove format.json on 16 disks.
obj, _, err := initObjectLayer(endpoints)
if err != nil {
t.Fatal(err)
}
bucketName := getRandomBucketName()
if err = obj.MakeBucket(bucketName); err != nil {
t.Fatal(err)
}
xl := obj.(*xlObjects)
undoMakeBucket(xl.storageDisks, bucketName)
// Validate if bucket was deleted properly.
_, err = obj.GetBucketInfo(bucketName)
if err != nil {
err = errorCause(err)
switch err.(type) {
case BucketNotFound:
default:
t.Fatal(err)
}
}
}
// Tests quick healing of bucket and bucket metadata.
func TestQuickHeal(t *testing.T) {
root, err := newTestConfig("us-east-1")
if err != nil {
t.Fatal(err)
}
defer removeAll(root)
nDisks := 16
fsDirs, err := getRandomDisks(nDisks)
if err != nil {
t.Fatal(err)
}
defer removeRoots(fsDirs)
endpoints, err := parseStorageEndpoints(fsDirs)
if err != nil {
t.Fatal(err)
}
// Remove format.json on 16 disks.
obj, _, err := initObjectLayer(endpoints)
if err != nil {
t.Fatal(err)
}
bucketName := getRandomBucketName()
if err = obj.MakeBucket(bucketName); err != nil {
t.Fatal(err)
}
xl := obj.(*xlObjects)
for i := 0; i <= 2; i++ {
if err = xl.storageDisks[i].DeleteVol(bucketName); err != nil {
t.Fatal(err)
}
}
// Heal the missing buckets.
if err = quickHeal(xl.storageDisks, xl.writeQuorum); err != nil {
t.Fatal(err)
}
// Validate if buckets were indeed healed.
for i := 0; i <= 2; i++ {
if _, err = xl.storageDisks[i].StatVol(bucketName); err != nil {
t.Fatal(err)
}
}
// Corrupt one of the disks to return unformatted disk.
posixDisk, ok := xl.storageDisks[0].(*posix)
if !ok {
t.Fatal("storage disk is not *posix type")
}
xl.storageDisks[0] = newNaughtyDisk(posixDisk, nil, errUnformattedDisk)
if err = quickHeal(xl.storageDisks, xl.writeQuorum); err != errUnformattedDisk {
t.Fatal(err)
}
fsDirs, err = getRandomDisks(nDisks)
if err != nil {
t.Fatal(err)
}
defer removeRoots(fsDirs)
endpoints, err = parseStorageEndpoints(fsDirs)
if err != nil {
t.Fatal(err)
}
// One disk is not found, heal corrupted disks should return nil
obj, _, err = initObjectLayer(endpoints)
if err != nil {
t.Fatal(err)
}
xl = obj.(*xlObjects)
xl.storageDisks[0] = nil
if err = quickHeal(xl.storageDisks, xl.writeQuorum); err != nil {
t.Fatal("Got an unexpected error: ", err)
}
fsDirs, err = getRandomDisks(nDisks)
if err != nil {
t.Fatal(err)
}
defer removeRoots(fsDirs)
endpoints, err = parseStorageEndpoints(fsDirs)
if err != nil {
t.Fatal(err)
}
// One disk is not found, heal corrupted disks should return nil
obj, _, err = initObjectLayer(endpoints)
if err != nil {
t.Fatal(err)
}
xl = obj.(*xlObjects)
// Corrupt one of the disks to return unformatted disk.
posixDisk, ok = xl.storageDisks[0].(*posix)
if !ok {
t.Fatal("storage disk is not *posix type")
}
xl.storageDisks[0] = newNaughtyDisk(posixDisk, nil, errDiskNotFound)
if err = quickHeal(xl.storageDisks, xl.writeQuorum); err != nil {
t.Fatal("Got an unexpected error: ", err)
}
}

View file

@ -52,7 +52,7 @@ func TestUpdateUploadJSON(t *testing.T) {
{uploadIDChange{uploadID: "111abc", isRemove: true}, nil},
}
xl := obj.(xlObjects)
xl := obj.(*xlObjects)
for i, test := range testCases {
testErrVal := xl.updateUploadJSON(bucket, object, test.uCh)
if testErrVal != test.errVal {

View file

@ -119,7 +119,7 @@ func TestXLDeleteObjectDiskNotFound(t *testing.T) {
t.Fatal(err)
}
xl := obj.(xlObjects)
xl := obj.(*xlObjects)
// Create "bucket"
err = obj.MakeBucket("bucket")
@ -169,7 +169,7 @@ func TestGetObjectNoQuorum(t *testing.T) {
t.Fatal(err)
}
xl := obj.(xlObjects)
xl := obj.(*xlObjects)
// Create "bucket"
err = obj.MakeBucket("bucket")
@ -221,7 +221,7 @@ func TestPutObjectNoQuorum(t *testing.T) {
t.Fatal(err)
}
xl := obj.(xlObjects)
xl := obj.(*xlObjects)
// Create "bucket"
err = obj.MakeBucket("bucket")
@ -272,7 +272,7 @@ func TestHealing(t *testing.T) {
t.Fatal(err)
}
defer removeRoots(fsDirs)
xl := obj.(xlObjects)
xl := obj.(*xlObjects)
// Create "bucket"
err = obj.MakeBucket("bucket")

View file

@ -21,6 +21,7 @@ import (
"os"
"sort"
"strings"
"sync"
"github.com/minio/minio/pkg/disk"
"github.com/minio/minio/pkg/objcache"
@ -52,6 +53,7 @@ const (
// xlObjects - Implements XL object layer.
type xlObjects struct {
mutex *sync.Mutex
storageDisks []StorageAPI // Collection of initialized backend disks.
dataBlocks int // dataBlocks count caculated for erasure.
parityBlocks int // parityBlocks count calculated for erasure.
@ -77,36 +79,6 @@ var xlTreeWalkIgnoredErrs = []error{
errFaultyDisk,
}
func healFormatXL(storageDisks []StorageAPI) error {
// Attempt to load all `format.json`.
formatConfigs, sErrs := loadAllFormats(storageDisks)
// Generic format check validates
// if (no quorum) return error
// if (disks not recognized) // Always error.
if err := genericFormatCheck(formatConfigs, sErrs); err != nil {
return err
}
// Handles different cases properly.
switch reduceFormatErrs(sErrs, len(storageDisks)) {
case errCorruptedFormat:
if err := healFormatXLCorruptedDisks(storageDisks); err != nil {
return fmt.Errorf("Unable to repair corrupted format, %s", err)
}
case errSomeDiskUnformatted:
// All drives online but some report missing format.json.
if err := healFormatXLFreshDisks(storageDisks); err != nil {
// There was an unexpected unrecoverable error during healing.
return fmt.Errorf("Unable to heal backend %s", err)
}
case errSomeDiskOffline:
// FIXME: in future.
return fmt.Errorf("Unable to initialize format %s and %s", errSomeDiskOffline, errSomeDiskUnformatted)
}
return nil
}
// newXLObjects - initialize new xl object layer.
func newXLObjects(storageDisks []StorageAPI) (ObjectLayer, error) {
if storageDisks == nil {
@ -135,7 +107,8 @@ func newXLObjects(storageDisks []StorageAPI) (ObjectLayer, error) {
objCacheDisabled := strings.EqualFold(os.Getenv("_MINIO_CACHE"), "off")
// Initialize xl objects.
xl := xlObjects{
xl := &xlObjects{
mutex: &sync.Mutex{},
storageDisks: newStorageDisks,
dataBlocks: dataBlocks,
parityBlocks: parityBlocks,
@ -149,6 +122,11 @@ func newXLObjects(storageDisks []StorageAPI) (ObjectLayer, error) {
xl.readQuorum = readQuorum
xl.writeQuorum = writeQuorum
// Do a quick heal on the buckets themselves for any discrepancies.
if err := quickHeal(xl.storageDisks, xl.writeQuorum); err != nil {
return xl, err
}
// Return successfully initialized object layer.
return xl, nil
}