From d9db7f3308f599b33c9813106dd7d0ccafeb391e Mon Sep 17 00:00:00 2001 From: Harshavardhana Date: Sat, 24 Oct 2020 13:23:16 -0700 Subject: [PATCH] expire lockers if lockers are offline (#10749) lockers currently might leave stale lockers, in unknown ways waiting for downed lockers. locker check interval is high enough to safely cleanup stale locks. --- cmd/admin-handlers.go | 14 +++++--------- cmd/erasure.go | 4 +++- cmd/local-locker.go | 4 ++++ cmd/lock-rest-client.go | 2 ++ cmd/lock-rest-server-common.go | 6 +++++- cmd/lock-rest-server.go | 23 ++++++++++------------- pkg/dsync/drwmutex.go | 1 + pkg/dsync/rpc-client-interface.go | 3 +++ pkg/madmin/top-commands.go | 3 ++- 9 files changed, 35 insertions(+), 25 deletions(-) diff --git a/cmd/admin-handlers.go b/cmd/admin-handlers.go index 3ab0b6cd6..e808ffc48 100644 --- a/cmd/admin-handlers.go +++ b/cmd/admin-handlers.go @@ -348,7 +348,7 @@ func (a adminAPIHandlers) DataUsageInfoHandler(w http.ResponseWriter, r *http.Re writeSuccessResponseJSON(w, dataUsageInfoJSON) } -func lriToLockEntry(l lockRequesterInfo, resource, server string, rquorum, wquorum int) *madmin.LockEntry { +func lriToLockEntry(l lockRequesterInfo, resource, server string) *madmin.LockEntry { entry := &madmin.LockEntry{ Timestamp: l.Timestamp, Resource: resource, @@ -356,18 +356,17 @@ func lriToLockEntry(l lockRequesterInfo, resource, server string, rquorum, wquor Source: l.Source, Owner: l.Owner, ID: l.UID, + Quorum: l.Quorum, } if l.Writer { entry.Type = "WRITE" - entry.Quorum = wquorum } else { entry.Type = "READ" - entry.Quorum = rquorum } return entry } -func topLockEntries(peerLocks []*PeerLocks, rquorum, wquorum int, stale bool) madmin.LockEntries { +func topLockEntries(peerLocks []*PeerLocks, stale bool) madmin.LockEntries { entryMap := make(map[string]*madmin.LockEntry) for _, peerLock := range peerLocks { if peerLock == nil { @@ -379,7 +378,7 @@ func topLockEntries(peerLocks []*PeerLocks, rquorum, wquorum int, stale bool) ma if val, ok := entryMap[lockReqInfo.UID]; ok { val.ServerList = append(val.ServerList, peerLock.Addr) } else { - entryMap[lockReqInfo.UID] = lriToLockEntry(lockReqInfo, k, peerLock.Addr, rquorum, wquorum) + entryMap[lockReqInfo.UID] = lriToLockEntry(lockReqInfo, k, peerLock.Addr) } } } @@ -429,10 +428,7 @@ func (a adminAPIHandlers) TopLocksHandler(w http.ResponseWriter, r *http.Request peerLocks := globalNotificationSys.GetLocks(ctx, r) - rquorum := getReadQuorum(objectAPI.SetDriveCount()) - wquorum := getWriteQuorum(objectAPI.SetDriveCount()) - - topLocks := topLockEntries(peerLocks, rquorum, wquorum, stale) + topLocks := topLockEntries(peerLocks, stale) // Marshal API response upto requested count. if len(topLocks) > count && count > 0 { diff --git a/cmd/erasure.go b/cmd/erasure.go index f3705916d..7ea53fa7a 100644 --- a/cmd/erasure.go +++ b/cmd/erasure.go @@ -178,7 +178,9 @@ func getDisksInfo(disks []StorageAPI, endpoints []string) (disksInfo []madmin.Di } } - if len(disksInfo) == rootDiskCount { + // Count offline disks as well to ensure consistent + // reportability of offline drives on local setups. + if len(disksInfo) == (rootDiskCount + offlineDisks.Sum()) { // Success. return disksInfo, errs, onlineDisks, offlineDisks } diff --git a/cmd/local-locker.go b/cmd/local-locker.go index c3140c3f1..1b0731cdb 100644 --- a/cmd/local-locker.go +++ b/cmd/local-locker.go @@ -35,6 +35,8 @@ type lockRequesterInfo struct { // Owner represents the UUID of the owner who originally requested the lock // useful in expiry. Owner string + // Quorum represents the quorum required for this lock to be active. + Quorum int } // isWriteLock returns whether the lock is a write or read lock. @@ -96,6 +98,7 @@ func (l *localLocker) Lock(ctx context.Context, args dsync.LockArgs) (reply bool UID: args.UID, Timestamp: UTCNow(), TimeLastCheck: UTCNow(), + Quorum: args.Quorum, }, } } @@ -153,6 +156,7 @@ func (l *localLocker) RLock(ctx context.Context, args dsync.LockArgs) (reply boo UID: args.UID, Timestamp: UTCNow(), TimeLastCheck: UTCNow(), + Quorum: args.Quorum, } resource := args.Resources[0] if lri, ok := l.lockMap[resource]; ok { diff --git a/cmd/lock-rest-client.go b/cmd/lock-rest-client.go index c89eb2945..7211867fe 100644 --- a/cmd/lock-rest-client.go +++ b/cmd/lock-rest-client.go @@ -23,6 +23,7 @@ import ( "errors" "io" "net/url" + "strconv" "github.com/minio/minio/cmd/http" xhttp "github.com/minio/minio/cmd/http" @@ -93,6 +94,7 @@ func (client *lockRESTClient) restCall(ctx context.Context, call string, args ds values.Set(lockRESTUID, args.UID) values.Set(lockRESTOwner, args.Owner) values.Set(lockRESTSource, args.Source) + values.Set(lockRESTQuorum, strconv.Itoa(args.Quorum)) var buffer bytes.Buffer for _, resource := range args.Resources { buffer.WriteString(resource) diff --git a/cmd/lock-rest-server-common.go b/cmd/lock-rest-server-common.go index 0afec2ad2..33eac9b5c 100644 --- a/cmd/lock-rest-server-common.go +++ b/cmd/lock-rest-server-common.go @@ -21,7 +21,7 @@ import ( ) const ( - lockRESTVersion = "v3" + lockRESTVersion = "v4" // Add Quorum query param lockRESTVersionPrefix = SlashSeparator + lockRESTVersion lockRESTPrefix = minioReservedBucketPath + "/lock" ) @@ -43,6 +43,10 @@ const ( // Source contains the line number, function and file name of the code // on the client node that requested the lock. lockRESTSource = "source" + + // Quroum value to be saved along lock requester info, useful + // in verifying stale locks + lockRESTQuorum = "quorum" ) var ( diff --git a/cmd/lock-rest-server.go b/cmd/lock-rest-server.go index d65d955bd..722f0d318 100644 --- a/cmd/lock-rest-server.go +++ b/cmd/lock-rest-server.go @@ -24,6 +24,7 @@ import ( "net/http" "path" "sort" + "strconv" "time" "github.com/gorilla/mux" @@ -32,10 +33,10 @@ import ( const ( // Lock maintenance interval. - lockMaintenanceInterval = 30 * time.Second + lockMaintenanceInterval = 15 * time.Second // Lock validity check interval. - lockValidityCheckInterval = 30 * time.Second + lockValidityCheckInterval = 5 * time.Second ) // To abstract a node over network. @@ -63,10 +64,16 @@ func (l *lockRESTServer) IsValid(w http.ResponseWriter, r *http.Request) bool { } func getLockArgs(r *http.Request) (args dsync.LockArgs, err error) { + quorum, err := strconv.Atoi(r.URL.Query().Get(lockRESTQuorum)) + if err != nil { + return args, err + } + args = dsync.LockArgs{ Owner: r.URL.Query().Get(lockRESTOwner), UID: r.URL.Query().Get(lockRESTUID), Source: r.URL.Query().Get(lockRESTSource), + Quorum: quorum, } var resources []string @@ -277,12 +284,8 @@ func lockMaintenance(ctx context.Context, interval time.Duration) error { for lendpoint, nlrips := range getLongLivedLocks(interval) { nlripsMap := make(map[string]nlock, len(nlrips)) for _, nlrip := range nlrips { - // Locks are only held on first zone, make sure that - // we only look for ownership of locks from endpoints - // on first zone. for _, c := range allLockersFn() { if !c.IsOnline() || c == nil { - updateNlocks(nlripsMap, nlrip.name, nlrip.lri.Writer) continue } @@ -306,14 +309,8 @@ func lockMaintenance(ctx context.Context, interval time.Duration) error { } } - // Read locks we assume quorum for be N/2 success - quorum := getReadQuorum(objAPI.SetDriveCount()) - if nlrip.lri.Writer { - quorum = getWriteQuorum(objAPI.SetDriveCount()) - } - // less than the quorum, we have locks expired. - if nlripsMap[nlrip.name].locks < quorum { + if nlripsMap[nlrip.name].locks < nlrip.lri.Quorum { // The lock is no longer active at server that originated // the lock, attempt to remove the lock. globalLockServers[lendpoint].mutex.Lock() diff --git a/pkg/dsync/drwmutex.go b/pkg/dsync/drwmutex.go index c0ac024ce..eff41e331 100644 --- a/pkg/dsync/drwmutex.go +++ b/pkg/dsync/drwmutex.go @@ -241,6 +241,7 @@ func lock(ctx context.Context, ds *Dsync, locks *[]string, id, source string, is UID: id, Resources: lockNames, Source: source, + Quorum: quorum, } var locked bool diff --git a/pkg/dsync/rpc-client-interface.go b/pkg/dsync/rpc-client-interface.go index f385ad401..eb96408c3 100644 --- a/pkg/dsync/rpc-client-interface.go +++ b/pkg/dsync/rpc-client-interface.go @@ -33,6 +33,9 @@ type LockArgs struct { // Owner represents unique ID for this instance, an owner who originally requested // the locked resource, useful primarily in figuring our stale locks. Owner string + + // Quorum represents the expected quorum for this lock type. + Quorum int } // NetLocker is dsync compatible locker interface. diff --git a/pkg/madmin/top-commands.go b/pkg/madmin/top-commands.go index 80bd47df5..319baf985 100644 --- a/pkg/madmin/top-commands.go +++ b/pkg/madmin/top-commands.go @@ -38,7 +38,8 @@ type LockEntry struct { ServerList []string `json:"serverlist"` // List of servers participating in the lock. Owner string `json:"owner"` // Owner UUID indicates server owns the lock. ID string `json:"id"` // UID to uniquely identify request of client. - Quorum int `json:"quorum"` // represents quorum number of servers required to hold this lock, used to look for stale locks. + // Represents quorum number of servers required to hold this lock, used to look for stale locks. + Quorum int `json:"quorum"` } // LockEntries - To sort the locks