From 7fdffa03638bbfc008631c404578b0d191f02792 Mon Sep 17 00:00:00 2001
From: Anis Elleuch <vadmeste@users.noreply.github.com>
Date: Fri, 10 Sep 2021 16:53:46 +0100
Subject: [PATCH] locks: Ensure local lock removal after a failed refresh
 (#12979) (#13183)

In the event when a lock is not refreshed in the cluster, this latter
will be automatically removed in the subsequent cleanup of non
refreshed locks routine, but it forgot to clean the local server,
hence having the same weird stale locks present.

This commit will remove the lock locally also in remote nodes, if
removing a lock from a remote node will fail, it will be anyway
removed later in the locks cleanup routine.
---
 cmd/local-locker.go   | 21 ++++++++++++++++-----
 pkg/dsync/drwmutex.go | 27 +++++++++++++++++++++++++++
 2 files changed, 43 insertions(+), 5 deletions(-)

diff --git a/cmd/local-locker.go b/cmd/local-locker.go
index 8082a8e5d..187a812c7 100644
--- a/cmd/local-locker.go
+++ b/cmd/local-locker.go
@@ -222,13 +222,24 @@ func (l *localLocker) ForceUnlock(ctx context.Context, args dsync.LockArgs) (rep
 	default:
 		l.mutex.Lock()
 		defer l.mutex.Unlock()
-		if len(args.UID) != 0 {
-			return false, fmt.Errorf("ForceUnlock called with non-empty UID: %s", args.UID)
+		if len(args.UID) == 0 {
+			for _, resource := range args.Resources {
+				delete(l.lockMap, resource) // Remove the lock (irrespective of write or read lock)
+			}
+			return true, nil
 		}
-		for _, resource := range args.Resources {
-			delete(l.lockMap, resource) // Remove the lock (irrespective of write or read lock)
+
+		lockUIDFound := false
+
+		for resource, lris := range l.lockMap {
+			for _, lri := range lris {
+				if lri.UID == args.UID {
+					l.removeEntry(resource, dsync.LockArgs{Owner: lri.Owner, UID: lri.UID}, &lris)
+					lockUIDFound = true
+				}
+			}
 		}
-		return true, nil
+		return lockUIDFound, nil
 	}
 }
 
diff --git a/pkg/dsync/drwmutex.go b/pkg/dsync/drwmutex.go
index d1083d63a..99e671e8f 100644
--- a/pkg/dsync/drwmutex.go
+++ b/pkg/dsync/drwmutex.go
@@ -50,6 +50,9 @@ const drwMutexUnlockCallTimeout = 30 * time.Second
 // dRWMutexRefreshTimeout - timeout for the refresh call
 const drwMutexRefreshTimeout = 5 * time.Second
 
+// dRWMutexForceUnlockTimeout - timeout for the unlock call
+const drwMutexForceUnlockCallTimeout = 30 * time.Second
+
 // dRWMutexRefreshInterval - the interval between two refresh calls
 const drwMutexRefreshInterval = 10 * time.Second
 
@@ -239,6 +242,9 @@ func (dm *DRWMutex) startContinousLockRefresh(lockLossCallback func(), id, sourc
 				refreshTimer.Reset(drwMutexRefreshInterval)
 				refreshed, err := refresh(ctx, dm.clnt, id, source, quorum, dm.Names...)
 				if err == nil && !refreshed {
+					// Clean the lock locally and in remote nodes
+					forceUnlock(ctx, dm.clnt, id)
+					// Execute the caller lock loss callback
 					if lockLossCallback != nil {
 						lockLossCallback()
 					}
@@ -249,6 +255,27 @@ func (dm *DRWMutex) startContinousLockRefresh(lockLossCallback func(), id, sourc
 	}()
 }
 
+func forceUnlock(ctx context.Context, ds *Dsync, id string) {
+	ctx, cancel := context.WithTimeout(ctx, drwMutexForceUnlockCallTimeout)
+	defer cancel()
+
+	restClnts, _ := ds.GetLockers()
+
+	var wg sync.WaitGroup
+	for index, c := range restClnts {
+		wg.Add(1)
+		// Send refresh request to all nodes
+		go func(index int, c NetLocker) {
+			defer wg.Done()
+			args := LockArgs{
+				UID: id,
+			}
+			c.ForceUnlock(ctx, args)
+		}(index, c)
+	}
+	wg.Wait()
+}
+
 type refreshResult struct {
 	offline   bool
 	succeeded bool