fix: make sure to use uniform drive count calculation (#10208)

It is possible in situations when server was deployed
in asymmetric configuration in the past such as

```
minio server ~/fs{1...4}/disk{1...5}
```

Results in setDriveCount of 10 in older releases
but with fairly recent releases we have moved to
having server affinity which means that a set drive
count ascertained from above config will be now '4'

While the object layer make sure that we honor
`format.json` the storageClass configuration however
was by mistake was using the global value obtained
by heuristics. Which leads to prematurely using
lower parity without being requested by the an
administrator.

This PR fixes this behavior.
This commit is contained in:
Harshavardhana 2020-08-05 13:31:12 -07:00 committed by GitHub
parent e656beb915
commit a20d4568a2
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
16 changed files with 64 additions and 39 deletions

View file

@ -136,7 +136,7 @@ func (a adminAPIHandlers) SetConfigKVHandler(w http.ResponseWriter, r *http.Requ
return
}
if err = validateConfig(cfg); err != nil {
if err = validateConfig(cfg, objectAPI.SetDriveCount()); err != nil {
writeCustomErrorResponseJSON(ctx, w, errorCodes.ToAPIErr(ErrAdminConfigBadJSON), err.Error(), r.URL)
return
}
@ -271,7 +271,7 @@ func (a adminAPIHandlers) RestoreConfigHistoryKVHandler(w http.ResponseWriter, r
return
}
if err = validateConfig(cfg); err != nil {
if err = validateConfig(cfg, objectAPI.SetDriveCount()); err != nil {
writeCustomErrorResponseJSON(ctx, w, errorCodes.ToAPIErr(ErrAdminConfigBadJSON), err.Error(), r.URL)
return
}
@ -383,7 +383,7 @@ func (a adminAPIHandlers) SetConfigHandler(w http.ResponseWriter, r *http.Reques
return
}
if err = validateConfig(cfg); err != nil {
if err = validateConfig(cfg, objectAPI.SetDriveCount()); err != nil {
writeCustomErrorResponseJSON(ctx, w, errorCodes.ToAPIErr(ErrAdminConfigBadJSON), err.Error(), r.URL)
return
}

View file

@ -213,7 +213,7 @@ var (
globalServerConfigMu sync.RWMutex
)
func validateConfig(s config.Config) error {
func validateConfig(s config.Config, setDriveCount int) error {
// Disable merging env values with config for validation.
env.SetEnvOff()
@ -233,8 +233,7 @@ func validateConfig(s config.Config) error {
}
if globalIsErasure {
if _, err := storageclass.LookupConfig(s[config.StorageClassSubSys][config.Default],
globalErasureSetDriveCount); err != nil {
if _, err := storageclass.LookupConfig(s[config.StorageClassSubSys][config.Default], setDriveCount); err != nil {
return err
}
}
@ -311,7 +310,7 @@ func validateConfig(s config.Config) error {
globalNotificationSys.ConfiguredTargetIDs())
}
func lookupConfigs(s config.Config) {
func lookupConfigs(s config.Config, setDriveCount int) {
ctx := GlobalContext
var err error
@ -382,8 +381,7 @@ func lookupConfigs(s config.Config) {
globalAPIConfig.init(apiConfig)
if globalIsErasure {
globalStorageClass, err = storageclass.LookupConfig(s[config.StorageClassSubSys][config.Default],
globalErasureSetDriveCount)
globalStorageClass, err = storageclass.LookupConfig(s[config.StorageClassSubSys][config.Default], setDriveCount)
if err != nil {
logger.LogIf(ctx, fmt.Errorf("Unable to initialize storage class config: %w", err))
}
@ -601,7 +599,7 @@ func loadConfig(objAPI ObjectLayer) error {
}
// Override any values from ENVs.
lookupConfigs(srvCfg)
lookupConfigs(srvCfg, objAPI.SetDriveCount())
// hold the mutex lock before a new config is assigned.
globalServerConfigMu.Lock()

View file

@ -329,28 +329,28 @@ var (
// CreateServerEndpoints - validates and creates new endpoints from input args, supports
// both ellipses and without ellipses transparently.
func createServerEndpoints(serverAddr string, args ...string) (
endpointZones EndpointZones, setDriveCount int,
setupType SetupType, err error) {
endpointZones EndpointZones, setupType SetupType, err error) {
if len(args) == 0 {
return nil, -1, -1, errInvalidArgument
return nil, -1, errInvalidArgument
}
var setDriveCount int
if v := env.Get(EnvErasureSetDriveCount, ""); v != "" {
setDriveCount, err = strconv.Atoi(v)
if err != nil {
return nil, -1, -1, config.ErrInvalidErasureSetSize(err)
return nil, -1, config.ErrInvalidErasureSetSize(err)
}
}
if !ellipses.HasEllipses(args...) {
setArgs, err := GetAllSets(uint64(setDriveCount), args...)
if err != nil {
return nil, -1, -1, err
return nil, -1, err
}
endpointList, newSetupType, err := CreateEndpoints(serverAddr, false, setArgs...)
if err != nil {
return nil, -1, -1, err
return nil, -1, err
}
endpointZones = append(endpointZones, ZoneEndpoints{
SetCount: len(setArgs),
@ -358,7 +358,7 @@ func createServerEndpoints(serverAddr string, args ...string) (
Endpoints: endpointList,
})
setupType = newSetupType
return endpointZones, len(setArgs[0]), setupType, nil
return endpointZones, setupType, nil
}
var prevSetupType SetupType
@ -366,25 +366,25 @@ func createServerEndpoints(serverAddr string, args ...string) (
for _, arg := range args {
setArgs, err := GetAllSets(uint64(setDriveCount), arg)
if err != nil {
return nil, -1, -1, err
return nil, -1, err
}
var endpointList Endpoints
endpointList, setupType, err = CreateEndpoints(serverAddr, foundPrevLocal, setArgs...)
if err != nil {
return nil, -1, -1, err
return nil, -1, err
}
if setDriveCount != 0 && setDriveCount != len(setArgs[0]) {
return nil, -1, -1, fmt.Errorf("All zones should have same drive per set ratio - expected %d, got %d", setDriveCount, len(setArgs[0]))
return nil, -1, fmt.Errorf("All zones should have same drive per set ratio - expected %d, got %d", setDriveCount, len(setArgs[0]))
}
if prevSetupType != UnknownSetupType && prevSetupType != setupType {
return nil, -1, -1, fmt.Errorf("All zones should be of the same setup-type to maintain the original SLA expectations - expected %s, got %s", prevSetupType, setupType)
return nil, -1, fmt.Errorf("All zones should be of the same setup-type to maintain the original SLA expectations - expected %s, got %s", prevSetupType, setupType)
}
if err = endpointZones.Add(ZoneEndpoints{
SetCount: len(setArgs),
DrivesPerSet: len(setArgs[0]),
Endpoints: endpointList,
}); err != nil {
return nil, -1, -1, err
return nil, -1, err
}
foundPrevLocal = endpointList.atleastOneEndpointLocal()
if setDriveCount == 0 {
@ -393,5 +393,5 @@ func createServerEndpoints(serverAddr string, args ...string) (
prevSetupType = setupType
}
return endpointZones, setDriveCount, setupType, nil
return endpointZones, setupType, nil
}

View file

@ -56,7 +56,7 @@ func TestCreateServerEndpoints(t *testing.T) {
for _, testCase := range testCases {
testCase := testCase
t.Run("", func(t *testing.T) {
_, _, _, err := createServerEndpoints(testCase.serverAddr, testCase.args...)
_, _, err := createServerEndpoints(testCase.serverAddr, testCase.args...)
if err != nil && testCase.success {
t.Errorf("Expected success but failed instead %s", err)
}

View file

@ -371,6 +371,11 @@ func (s *erasureSets) NewNSLock(ctx context.Context, bucket string, objects ...s
return s.getHashedSet("").NewNSLock(ctx, bucket, objects...)
}
// SetDriveCount returns the current drives per set.
func (s *erasureSets) SetDriveCount() int {
return s.drivesPerSet
}
// StorageUsageInfo - combines output of StorageInfo across all erasure coded object sets.
// This only returns disk usage info for Zones to perform placement decision, this call
// is not implemented in Object interface and is not meant to be used by other object

View file

@ -87,6 +87,10 @@ func (z *erasureZones) NewNSLock(ctx context.Context, bucket string, objects ...
return z.zones[0].NewNSLock(ctx, bucket, objects...)
}
func (z *erasureZones) SetDriveCount() int {
return z.zones[0].SetDriveCount()
}
type zonesAvailableSpace []zoneAvailableSpace
type zoneAvailableSpace struct {

View file

@ -72,6 +72,11 @@ func (er erasureObjects) NewNSLock(ctx context.Context, bucket string, objects .
return er.nsMutex.NewNSLock(ctx, er.getLockers, bucket, objects...)
}
// SetDriveCount returns the current drives per set.
func (er erasureObjects) SetDriveCount() int {
return len(er.getDisks())
}
// Shutdown function for object storage interface.
func (er erasureObjects) Shutdown(ctx context.Context) error {
// Add any object layer shutdown activities here.

View file

@ -191,6 +191,11 @@ func (fs *FSObjects) NewNSLock(ctx context.Context, bucket string, objects ...st
return fs.nsMutex.NewNSLock(ctx, nil, bucket, objects...)
}
// SetDriveCount no-op
func (fs *FSObjects) SetDriveCount() int {
return 0
}
// Shutdown - should be called when process shuts down.
func (fs *FSObjects) Shutdown(ctx context.Context) error {
fs.fsFormatRlk.Close()

View file

@ -220,7 +220,7 @@ func StartGateway(ctx *cli.Context, gw Gateway) {
srvCfg := newServerConfig()
// Override any values from ENVs.
lookupConfigs(srvCfg)
lookupConfigs(srvCfg, 0)
// hold the mutex lock before a new config is assigned.
globalServerConfigMu.Lock()

View file

@ -46,6 +46,11 @@ func (a GatewayUnsupported) NewNSLock(ctx context.Context, bucket string, object
return nil
}
// SetDriveCount no-op
func (a GatewayUnsupported) SetDriveCount() int {
return 0
}
// ListMultipartUploads lists all multipart uploads.
func (a GatewayUnsupported) ListMultipartUploads(ctx context.Context, bucket string, prefix string, keyMarker string, uploadIDMarker string, delimiter string, maxUploads int) (lmi ListMultipartsInfo, err error) {
return lmi, NotImplemented{}

View file

@ -108,9 +108,6 @@ var globalCLIContext = struct {
}{}
var (
// Indicates set drive count.
globalErasureSetDriveCount int
// Indicates if the running minio server is distributed setup.
globalIsDistErasure = false

View file

@ -236,6 +236,11 @@ func getLongLivedLocks(interval time.Duration) map[Endpoint][]nameLockRequesterI
//
// We will ignore the error, and we will retry later to get a resolve on this lock
func lockMaintenance(ctx context.Context, interval time.Duration) error {
objAPI := newObjectLayerWithoutSafeModeFn()
if objAPI == nil {
return nil
}
// Validate if long lived locks are indeed clean.
// Get list of long lived locks to check for staleness.
for lendpoint, nlrips := range getLongLivedLocks(interval) {
@ -275,10 +280,10 @@ func lockMaintenance(ctx context.Context, interval time.Duration) error {
}
// Read locks we assume quorum for be N/2 success
quorum := globalErasureSetDriveCount / 2
quorum := objAPI.SetDriveCount() / 2
if nlrip.lri.Writer {
// For write locks we need N/2+1 success
quorum = globalErasureSetDriveCount/2 + 1
quorum = objAPI.SetDriveCount()/2 + 1
}
// less than the quorum, we have locks expired.

View file

@ -64,6 +64,8 @@ const (
// ObjectLayer implements primitives for object API layer.
type ObjectLayer interface {
SetDriveCount() int // Only implemented by erasure layer
// Locking operations on object.
NewNSLock(ctx context.Context, bucket string, objects ...string) RWLocker

View file

@ -114,9 +114,9 @@ func serverHandleCmdArgs(ctx *cli.Context) {
globalMinioHost, globalMinioPort = mustSplitHostPort(globalMinioAddr)
endpoints := strings.Fields(env.Get(config.EnvEndpoints, ""))
if len(endpoints) > 0 {
globalEndpoints, globalErasureSetDriveCount, setupType, err = createServerEndpoints(globalCLIContext.Addr, endpoints...)
globalEndpoints, setupType, err = createServerEndpoints(globalCLIContext.Addr, endpoints...)
} else {
globalEndpoints, globalErasureSetDriveCount, setupType, err = createServerEndpoints(globalCLIContext.Addr, ctx.Args()...)
globalEndpoints, setupType, err = createServerEndpoints(globalCLIContext.Addr, ctx.Args()...)
}
logger.FatalIf(err, "Invalid command line arguments")

View file

@ -451,7 +451,7 @@ func newStorageRESTHTTPServerClient(t *testing.T) (*httptest.Server, *storageRES
prevGlobalServerConfig := globalServerConfig
globalServerConfig = newServerConfig()
lookupConfigs(globalServerConfig)
lookupConfigs(globalServerConfig, 0)
restClient := newStorageRESTClient(endpoint)

View file

@ -249,7 +249,7 @@ func newXLStorage(path string, hostname string) (*xlStorage, error) {
return &b
},
},
globalSync: env.Get(config.EnvFSOSync, config.EnableOn) == config.EnableOn,
globalSync: env.Get(config.EnvFSOSync, config.EnableOff) == config.EnableOn,
diskMount: mountinfo.IsLikelyMountPoint(path),
// Allow disk usage crawler to run with up to 2 concurrent
// I/O ops, if and when activeIOCount reaches this
@ -2082,10 +2082,7 @@ func (s *xlStorage) RenameData(srcVolume, srcPath, dataDir, dstVolume, dstPath s
return osErrToFileErr(err)
}
for _, entry := range entries {
if entry == xlStorageFormatFile {
continue
}
if strings.HasSuffix(entry, slashSeparator) {
if entry == xlStorageFormatFile || strings.HasSuffix(entry, slashSeparator) {
continue
}
if strings.HasPrefix(entry, "part.") {
@ -2102,6 +2099,7 @@ func (s *xlStorage) RenameData(srcVolume, srcPath, dataDir, dstVolume, dstPath s
if err != nil {
return osErrToFileErr(err)
}
legacyDataPath := pathJoin(dstVolumeDir, dstPath, legacyDataDir)
// legacy data dir means its old content, honor system umask.
if err = os.MkdirAll(legacyDataPath, 0777); err != nil {
@ -2117,7 +2115,8 @@ func (s *xlStorage) RenameData(srcVolume, srcPath, dataDir, dstVolume, dstPath s
}
for _, entry := range entries {
if entry == xlStorageFormatFile {
// Skip xl.meta renames further, also ignore any directories such as `legacyDataDir`
if entry == xlStorageFormatFile || strings.HasPrefix(entry, SlashSeparator) {
continue
}