calculate automatic node based symmetry (#9446)

it is possible in many screnarios that even
if the divisible value is optimal, we may
end up with uneven distribution due to number
of nodes present in the configuration.

added code allow for affinity towards various
ellipses to figure out optimal value across
ellipses such that we can always reach a
symmetric value automatically.

Fixes #9416
This commit is contained in:
Harshavardhana 2020-04-27 14:39:57 -07:00 committed by GitHub
parent 97d952e61c
commit bc61417284
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 145 additions and 34 deletions

View file

@ -117,7 +117,7 @@ func initTestXLObjLayer(ctx context.Context) (ObjectLayer, []string, error) {
}
globalPolicySys = NewPolicySys()
objLayer, err := newXLSets(ctx, endpoints, storageDisks, format, 1, 16)
objLayer, err := newXLSets(ctx, endpoints, storageDisks, format)
if err != nil {
return nil, nil, err
}

View file

@ -18,6 +18,7 @@ package cmd
import (
"fmt"
"sort"
"strconv"
"strings"
@ -63,11 +64,72 @@ var isValidSetSize = func(count uint64) bool {
return (count >= setSizes[0] && count <= setSizes[len(setSizes)-1])
}
func commonSetDriveCount(divisibleSize uint64, setCounts []uint64) (setSize uint64) {
// prefers setCounts to be sorted for optimal behavior.
if divisibleSize < setCounts[len(setCounts)-1] {
return divisibleSize
}
// Figure out largest value of total_drives_in_erasure_set which results
// in least number of total_drives/total_drives_erasure_set ratio.
prevD := divisibleSize / setCounts[0]
for _, cnt := range setCounts {
if divisibleSize%cnt == 0 {
d := divisibleSize / cnt
if d <= prevD {
prevD = d
setSize = cnt
}
}
}
return setSize
}
// possibleSetCountsWithSymmetry returns symmetrical setCounts based on the
// input argument patterns, the symmetry calculation is to ensure that
// we also use uniform number of drives common across all ellipses patterns.
func possibleSetCountsWithSymmetry(setCounts []uint64, argPatterns []ellipses.ArgPattern) []uint64 {
var newSetCounts = make(map[uint64]struct{})
for _, ss := range setCounts {
var symmetry bool
for _, argPattern := range argPatterns {
for _, p := range argPattern {
if uint64(len(p.Seq)) > ss {
symmetry = uint64(len(p.Seq))%ss == 0
} else {
symmetry = ss%uint64(len(p.Seq)) == 0
}
}
}
// With no arg patterns, it is expected that user knows
// the right symmetry, so either ellipses patterns are
// provided (recommended) or no ellipses patterns.
if _, ok := newSetCounts[ss]; !ok && (symmetry || argPatterns == nil) {
newSetCounts[ss] = struct{}{}
}
}
setCounts = []uint64{}
for setCount := range newSetCounts {
setCounts = append(setCounts, setCount)
}
// Not necessarily needed but it ensures to the readers
// eyes that we prefer a sorted setCount slice for the
// subsequent function to figure out the right common
// divisor, it avoids loops.
sort.Slice(setCounts, func(i, j int) bool {
return setCounts[i] < setCounts[j]
})
return setCounts
}
// getSetIndexes returns list of indexes which provides the set size
// on each index, this function also determines the final set size
// The final set size has the affinity towards choosing smaller
// indexes (total sets)
func getSetIndexes(args []string, totalSizes []uint64, customSetDriveCount uint64) (setIndexes [][]uint64, err error) {
func getSetIndexes(args []string, totalSizes []uint64, customSetDriveCount uint64, argPatterns []ellipses.ArgPattern) (setIndexes [][]uint64, err error) {
if len(totalSizes) == 0 || len(args) == 0 {
return nil, errInvalidArgument
}
@ -81,24 +143,7 @@ func getSetIndexes(args []string, totalSizes []uint64, customSetDriveCount uint6
}
}
var setSize uint64
commonSize := getDivisibleSize(totalSizes)
if commonSize > setSizes[len(setSizes)-1] {
prevD := commonSize / setSizes[0]
for _, i := range setSizes {
if commonSize%i == 0 {
d := commonSize / i
if d <= prevD {
prevD = d
setSize = i
}
}
}
} else {
setSize = commonSize
}
possibleSetCounts := func(setSize uint64) (ss []uint64) {
for _, s := range setSizes {
if setSize%s == 0 {
@ -114,11 +159,11 @@ func getSetIndexes(args []string, totalSizes []uint64, customSetDriveCount uint6
return nil, config.ErrInvalidNumberOfErasureEndpoints(nil).Msg(msg)
}
var setSize uint64
// Custom set drive count allows to override automatic distribution.
// only meant if you want to further optimize drive distribution.
if customSetDriveCount > 0 {
msg := fmt.Sprintf("Invalid set drive count. Acceptable values for %d number drives are %d", commonSize, setCounts)
if customSetDriveCount > setSize {
return nil, config.ErrInvalidErasureSetSize(nil).Msg(msg)
}
var found bool
for _, ss := range setCounts {
if ss == customSetDriveCount {
@ -128,7 +173,16 @@ func getSetIndexes(args []string, totalSizes []uint64, customSetDriveCount uint6
if !found {
return nil, config.ErrInvalidErasureSetSize(nil).Msg(msg)
}
// No automatic symmetry calculation expected, user is on their own
setSize = customSetDriveCount
globalCustomErasureDriveCount = true
} else {
// Returns possible set counts with symmetry.
setCounts = possibleSetCountsWithSymmetry(setCounts, argPatterns)
// Final set size with all the symmetry accounted for.
setSize = commonSetDriveCount(commonSize, setCounts)
}
// Check whether setSize is with the supported range.
@ -202,7 +256,7 @@ func parseEndpointSet(customSetDriveCount uint64, args ...string) (ep endpointSe
argPatterns[i] = patterns
}
ep.setIndexes, err = getSetIndexes(args, getTotalSizes(argPatterns), customSetDriveCount)
ep.setIndexes, err = getSetIndexes(args, getTotalSizes(argPatterns), customSetDriveCount, argPatterns)
if err != nil {
return endpointSet{}, config.ErrInvalidErasureEndpoints(nil).Msg(err.Error())
}
@ -224,7 +278,7 @@ func GetAllSets(customSetDriveCount uint64, args ...string) ([][]string, error)
// Check if we have more one args.
if len(args) > 1 {
var err error
setIndexes, err = getSetIndexes(args, []uint64{uint64(len(args))}, customSetDriveCount)
setIndexes, err = getSetIndexes(args, []uint64{uint64(len(args))}, customSetDriveCount, nil)
if err != nil {
return nil, err
}
@ -258,6 +312,15 @@ func GetAllSets(customSetDriveCount uint64, args ...string) ([][]string, error)
return setArgs, nil
}
// Override set drive count for manual distribution.
const (
EnvErasureSetDriveCount = "MINIO_ERASURE_SET_DRIVE_COUNT"
)
var (
globalCustomErasureDriveCount = false
)
// CreateServerEndpoints - validates and creates new endpoints from input args, supports
// both ellipses and without ellipses transparently.
func createServerEndpoints(serverAddr string, args ...string) (
@ -268,7 +331,7 @@ func createServerEndpoints(serverAddr string, args ...string) (
return nil, -1, -1, errInvalidArgument
}
if v := env.Get("MINIO_ERASURE_SET_DRIVE_COUNT", ""); v != "" {
if v := env.Get(EnvErasureSetDriveCount, ""); v != "" {
setDriveCount, err = strconv.Atoi(v)
if err != nil {
return nil, -1, -1, config.ErrInvalidErasureSetSize(err)

View file

@ -104,11 +104,18 @@ func TestGetSetIndexesEnvOverride(t *testing.T) {
8,
true,
},
{
[]string{"http://host{1...2}/data{1...180}"},
[]uint64{360},
[][]uint64{{15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15}},
15,
true,
},
{
[]string{"http://host{1...12}/data{1...12}"},
[]uint64{144},
[][]uint64{{16, 16, 16, 16, 16, 16, 16, 16, 16}},
16,
[][]uint64{{12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12}},
12,
true,
},
{
@ -160,7 +167,16 @@ func TestGetSetIndexesEnvOverride(t *testing.T) {
for _, testCase := range testCases {
testCase := testCase
t.Run("", func(t *testing.T) {
gotIndexes, err := getSetIndexes(testCase.args, testCase.totalSizes, testCase.envOverride)
var argPatterns = make([]ellipses.ArgPattern, len(testCase.args))
for i, arg := range testCase.args {
patterns, err := ellipses.FindEllipsesPatterns(arg)
if err != nil {
t.Fatalf("Unexpected failure %s", err)
}
argPatterns[i] = patterns
}
gotIndexes, err := getSetIndexes(testCase.args, testCase.totalSizes, testCase.envOverride, argPatterns)
if err != nil && testCase.success {
t.Errorf("Expected success but failed instead %s", err)
}
@ -202,6 +218,24 @@ func TestGetSetIndexes(t *testing.T) {
[][]uint64{{9, 9, 9}},
true,
},
{
[]string{"http://host{1...3}/data{1...180}"},
[]uint64{540},
[][]uint64{{15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15}},
true,
},
{
[]string{"http://host{1...2}.rack{1...4}/data{1...180}"},
[]uint64{1440},
[][]uint64{{16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16}},
true,
},
{
[]string{"http://host{1...2}/data{1...180}"},
[]uint64{360},
[][]uint64{{12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12}},
true,
},
{
[]string{"data/controller1/export{1...4}, data/controller2/export{1...8}, data/controller3/export{1...12}"},
[]uint64{4, 8, 12},
@ -243,7 +277,15 @@ func TestGetSetIndexes(t *testing.T) {
for _, testCase := range testCases {
testCase := testCase
t.Run("", func(t *testing.T) {
gotIndexes, err := getSetIndexes(testCase.args, testCase.totalSizes, 0)
var argPatterns = make([]ellipses.ArgPattern, len(testCase.args))
for i, arg := range testCase.args {
patterns, err := ellipses.FindEllipsesPatterns(arg)
if err != nil {
t.Fatalf("Unexpected failure %s", err)
}
argPatterns[i] = patterns
}
gotIndexes, err := getSetIndexes(testCase.args, testCase.totalSizes, 0, argPatterns)
if err != nil && testCase.success {
t.Errorf("Expected success but failed instead %s", err)
}

View file

@ -456,7 +456,10 @@ func checkFormatXLValues(formats []*formatXLV3, drivesPerSet int) error {
return fmt.Errorf("%s disk is already being used in another erasure deployment. (Number of disks specified: %d but the number of disks found in the %s disk's format.json: %d)",
humanize.Ordinal(i+1), len(formats), humanize.Ordinal(i+1), len(formatXL.XL.Sets)*len(formatXL.XL.Sets[0]))
}
if len(formatXL.XL.Sets[0]) != drivesPerSet {
// Only if custom erasure drive count is set,
// we should fail here other proceed to honor what
// is present on the disk.
if globalCustomErasureDriveCount && len(formatXL.XL.Sets[0]) != drivesPerSet {
return fmt.Errorf("%s disk is already formatted with %d drives per erasure set. This cannot be changed to %d, please revert your MINIO_ERASURE_SET_DRIVE_COUNT setting", humanize.Ordinal(i+1), len(formatXL.XL.Sets[0]), drivesPerSet)
}
}

View file

@ -182,7 +182,7 @@ func prepareXLSets32(ctx context.Context) (ObjectLayer, []string, error) {
return nil, nil, err
}
objAPI, err := newXLSets(ctx, endpoints, storageDisks, format, 2, 16)
objAPI, err := newXLSets(ctx, endpoints, storageDisks, format)
if err != nil {
return nil, nil, err
}

View file

@ -293,7 +293,7 @@ func (s *xlSets) GetDisks(setIndex int) func() []StorageAPI {
const defaultMonitorConnectEndpointInterval = time.Second * 10 // Set to 10 secs.
// Initialize new set of erasure coded sets.
func newXLSets(ctx context.Context, endpoints Endpoints, storageDisks []StorageAPI, format *formatXLV3, setCount int, drivesPerSet int) (*xlSets, error) {
func newXLSets(ctx context.Context, endpoints Endpoints, storageDisks []StorageAPI, format *formatXLV3) (*xlSets, error) {
endpointStrings := make([]string, len(endpoints))
for i, endpoint := range endpoints {
if endpoint.IsLocal {
@ -303,6 +303,9 @@ func newXLSets(ctx context.Context, endpoints Endpoints, storageDisks []StorageA
}
}
setCount := len(format.XL.Sets)
drivesPerSet := len(format.XL.Sets[0])
// Initialize the XL sets instance.
s := &xlSets{
sets: make([]*xlObjects, setCount),

View file

@ -96,7 +96,7 @@ func TestNewXLSets(t *testing.T) {
t.Fatalf("Unable to format disks for erasure, %s", err)
}
if _, err := newXLSets(ctx, endpoints, storageDisks, format, 1, 16); err != nil {
if _, err := newXLSets(ctx, endpoints, storageDisks, format); err != nil {
t.Fatalf("Unable to initialize erasure")
}
}

View file

@ -82,7 +82,7 @@ func newXLZones(ctx context.Context, endpointZones EndpointZones) (ObjectLayer,
if deploymentID == "" {
deploymentID = formats[i].ID
}
z.zones[i], err = newXLSets(ctx, ep.Endpoints, storageDisks[i], formats[i], ep.SetCount, ep.DrivesPerSet)
z.zones[i], err = newXLSets(ctx, ep.Endpoints, storageDisks[i], formats[i])
if err != nil {
return nil, err
}