From 3831cc9e3b36cb84a29ad70f97db30a549f238ea Mon Sep 17 00:00:00 2001 From: Harshavardhana Date: Mon, 21 Sep 2020 01:18:13 -0700 Subject: [PATCH] fix: [fs] CompleteMultipart use trie structure for partMatch (#10522) performance improves by around 100x or more ``` go test -v -run NONE -bench BenchmarkGetPartFile goos: linux goarch: amd64 pkg: github.com/minio/minio/cmd BenchmarkGetPartFileWithTrie BenchmarkGetPartFileWithTrie-4 1000000000 0.140 ns/op 0 B/op 0 allocs/op PASS ok github.com/minio/minio/cmd 1.737s ``` fixes #10520 --- cmd/fs-v1-multipart.go | 30 +++++++++++++++-------- cmd/main.go | 10 ++++---- cmd/object-api-utils.go | 12 +++++----- cmd/object-api-utils_test.go | 46 ++++++++++++------------------------ pkg/trie/trie.go | 14 +++++------ 5 files changed, 52 insertions(+), 60 deletions(-) diff --git a/cmd/fs-v1-multipart.go b/cmd/fs-v1-multipart.go index 00d1617fa..dd56d2253 100644 --- a/cmd/fs-v1-multipart.go +++ b/cmd/fs-v1-multipart.go @@ -32,6 +32,7 @@ import ( jsoniter "github.com/json-iterator/go" "github.com/minio/minio/cmd/logger" mioutil "github.com/minio/minio/pkg/ioutil" + "github.com/minio/minio/pkg/trie" ) // Returns EXPORT/.minio.sys/multipart/SHA256/UPLOADID @@ -577,7 +578,10 @@ func (fs *FSObjects) CompleteMultipartUpload(ctx context.Context, bucket string, // Calculate s3 compatible md5sum for complete multipart. s3MD5 := getCompleteMultipartMD5(parts) - partSize := int64(-1) // Used later to ensure that all parts sizes are same. + // ensure that part ETag is canonicalized to strip off extraneous quotes + for i := range parts { + parts[i].ETag = canonicalizeETag(parts[i].ETag) + } fsMeta := fsMetaV1{} @@ -591,16 +595,17 @@ func (fs *FSObjects) CompleteMultipartUpload(ctx context.Context, bucket string, return oi, err } - // ensure that part ETag is canonicalized to strip off extraneous quotes - for i := range parts { - parts[i].ETag = canonicalizeETag(parts[i].ETag) + // Create entries trie structure for prefix match + entriesTrie := trie.NewTrie() + for _, entry := range entries { + entriesTrie.Insert(entry) } // Save consolidated actual size. var objectActualSize int64 // Validate all parts and then commit to disk. for i, part := range parts { - partFile := getPartFile(entries, part.PartNumber, part.ETag) + partFile := getPartFile(entriesTrie, part.PartNumber, part.ETag) if partFile == "" { return oi, InvalidPart{ PartNumber: part.PartNumber, @@ -628,9 +633,6 @@ func (fs *FSObjects) CompleteMultipartUpload(ctx context.Context, bucket string, } return oi, err } - if partSize == -1 { - partSize = actualSize - } fsMeta.Parts[i] = ObjectPartInfo{ Number: part.PartNumber, @@ -695,8 +697,16 @@ func (fs *FSObjects) CompleteMultipartUpload(ctx context.Context, bucket string, fsRemoveFile(ctx, file.filePath) } for _, part := range parts { - partPath := getPartFile(entries, part.PartNumber, part.ETag) - if err = mioutil.AppendFile(appendFilePath, pathJoin(uploadIDDir, partPath), globalFSOSync); err != nil { + partFile := getPartFile(entriesTrie, part.PartNumber, part.ETag) + if partFile == "" { + logger.LogIf(ctx, fmt.Errorf("%.5d.%s missing will not proceed", + part.PartNumber, part.ETag)) + return oi, InvalidPart{ + PartNumber: part.PartNumber, + GotETag: part.ETag, + } + } + if err = mioutil.AppendFile(appendFilePath, pathJoin(uploadIDDir, partFile), globalFSOSync); err != nil { logger.LogIf(ctx, err) return oi, toObjectErr(err) } diff --git a/cmd/main.go b/cmd/main.go index 0661faaa9..4565ba89e 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -102,21 +102,19 @@ func newApp(name string) *cli.App { findClosestCommands := func(command string) []string { var closestCommands []string - for _, value := range commandsTree.PrefixMatch(command) { - closestCommands = append(closestCommands, value.(string)) - } + closestCommands = append(closestCommands, commandsTree.PrefixMatch(command)...) sort.Strings(closestCommands) // Suggest other close commands - allow missed, wrongly added and // even transposed characters for _, value := range commandsTree.Walk(commandsTree.Root()) { - if sort.SearchStrings(closestCommands, value.(string)) < len(closestCommands) { + if sort.SearchStrings(closestCommands, value) < len(closestCommands) { continue } // 2 is arbitrary and represents the max // allowed number of typed errors - if words.DamerauLevenshteinDistance(command, value.(string)) < 2 { - closestCommands = append(closestCommands, value.(string)) + if words.DamerauLevenshteinDistance(command, value) < 2 { + closestCommands = append(closestCommands, value) } } diff --git a/cmd/object-api-utils.go b/cmd/object-api-utils.go index fdb09c334..88893fa65 100644 --- a/cmd/object-api-utils.go +++ b/cmd/object-api-utils.go @@ -45,6 +45,7 @@ import ( "github.com/minio/minio/cmd/logger" "github.com/minio/minio/pkg/hash" "github.com/minio/minio/pkg/ioutil" + "github.com/minio/minio/pkg/trie" "github.com/minio/minio/pkg/wildcard" ) @@ -487,13 +488,12 @@ func hasPattern(patterns []string, matchStr string) bool { } // Returns the part file name which matches the partNumber and etag. -func getPartFile(entries []string, partNumber int, etag string) string { - for _, entry := range entries { - if strings.HasPrefix(entry, fmt.Sprintf("%.5d.%s.", partNumber, etag)) { - return entry - } +func getPartFile(entriesTrie *trie.Trie, partNumber int, etag string) (partFile string) { + for _, match := range entriesTrie.PrefixMatch(fmt.Sprintf("%.5d.%s.", partNumber, etag)) { + partFile = match + break } - return "" + return partFile } // Returns the compressed offset which should be skipped. diff --git a/cmd/object-api-utils_test.go b/cmd/object-api-utils_test.go index 9a98e4bfe..bbb7fe582 100644 --- a/cmd/object-api-utils_test.go +++ b/cmd/object-api-utils_test.go @@ -18,6 +18,7 @@ package cmd import ( "bytes" + "fmt" "io" "net/http" "reflect" @@ -27,6 +28,7 @@ import ( "github.com/klauspost/compress/s2" "github.com/minio/minio/cmd/config/compress" "github.com/minio/minio/cmd/crypto" + "github.com/minio/minio/pkg/trie" ) // Tests validate bucket name. @@ -440,40 +442,22 @@ func TestExcludeForCompression(t *testing.T) { } } -// Test getPartFile function. -func TestGetPartFile(t *testing.T) { - testCases := []struct { - entries []string - partNumber int - etag string - result string - }{ - { - entries: []string{"00001.8a034f82cb9cb31140d87d3ce2a9ede3.67108864", "fs.json", "00002.d73d8ab724016dfb051e2d3584495c54.32891137"}, - partNumber: 1, - etag: "8a034f82cb9cb31140d87d3ce2a9ede3", - result: "00001.8a034f82cb9cb31140d87d3ce2a9ede3.67108864", - }, - { - entries: []string{"00001.8a034f82cb9cb31140d87d3ce2a9ede3.67108864", "fs.json", "00002.d73d8ab724016dfb051e2d3584495c54.32891137"}, - partNumber: 2, - etag: "d73d8ab724016dfb051e2d3584495c54", - result: "00002.d73d8ab724016dfb051e2d3584495c54.32891137", - }, - { - entries: []string{"00001.8a034f82cb9cb31140d87d3ce2a9ede3.67108864", "fs.json", "00002.d73d8ab724016dfb051e2d3584495c54.32891137"}, - partNumber: 1, - etag: "d73d8ab724016dfb051e2d3584495c54", - result: "", - }, +func BenchmarkGetPartFileWithTrie(b *testing.B) { + b.ResetTimer() + + entriesTrie := trie.NewTrie() + for i := 1; i <= 10000; i++ { + entriesTrie.Insert(fmt.Sprintf("%.5d.8a034f82cb9cb31140d87d3ce2a9ede3.67108864", i)) } - for i, test := range testCases { - got := getPartFile(test.entries, test.partNumber, test.etag) - if got != test.result { - t.Errorf("Test %d - expected %s but received %s", - i+1, test.result, got) + + for i := 1; i <= 10000; i++ { + partFile := getPartFile(entriesTrie, i, "8a034f82cb9cb31140d87d3ce2a9ede3") + if partFile == "" { + b.Fatal("partFile returned is empty") } } + + b.ReportAllocs() } func TestGetActualSize(t *testing.T) { diff --git a/pkg/trie/trie.go b/pkg/trie/trie.go index d41d4d38a..5c63e4ec0 100644 --- a/pkg/trie/trie.go +++ b/pkg/trie/trie.go @@ -21,7 +21,7 @@ package trie // Node trie tree node container carries value and children. type Node struct { exists bool - value interface{} + value string child map[rune]*Node // runes as child. } @@ -29,7 +29,7 @@ type Node struct { func newNode() *Node { return &Node{ exists: false, - value: nil, + value: "", child: make(map[rune]*Node), } } @@ -65,16 +65,16 @@ func (t *Trie) Insert(key string) { } // PrefixMatch - prefix match. -func (t *Trie) PrefixMatch(key string) []interface{} { +func (t *Trie) PrefixMatch(key string) []string { node, _ := t.findNode(key) - if node != nil { - return t.Walk(node) + if node == nil { + return nil } - return []interface{}{} + return t.Walk(node) } // Walk the tree. -func (t *Trie) Walk(node *Node) (ret []interface{}) { +func (t *Trie) Walk(node *Node) (ret []string) { if node.exists { ret = append(ret, node.value) }