fix: [fs] CompleteMultipart use trie structure for partMatch (#10522)

performance improves by around 100x or more

```
go test -v -run NONE -bench BenchmarkGetPartFile
goos: linux
goarch: amd64
pkg: github.com/minio/minio/cmd
BenchmarkGetPartFileWithTrie
BenchmarkGetPartFileWithTrie-4          1000000000               0.140 ns/op           0 B/op          0 allocs/op
PASS
ok      github.com/minio/minio/cmd      1.737s
```

fixes #10520
This commit is contained in:
Harshavardhana 2020-09-21 01:18:13 -07:00 committed by GitHub
parent 230fc0d186
commit 3831cc9e3b
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 52 additions and 60 deletions

View file

@ -32,6 +32,7 @@ import (
jsoniter "github.com/json-iterator/go"
"github.com/minio/minio/cmd/logger"
mioutil "github.com/minio/minio/pkg/ioutil"
"github.com/minio/minio/pkg/trie"
)
// Returns EXPORT/.minio.sys/multipart/SHA256/UPLOADID
@ -577,7 +578,10 @@ func (fs *FSObjects) CompleteMultipartUpload(ctx context.Context, bucket string,
// Calculate s3 compatible md5sum for complete multipart.
s3MD5 := getCompleteMultipartMD5(parts)
partSize := int64(-1) // Used later to ensure that all parts sizes are same.
// ensure that part ETag is canonicalized to strip off extraneous quotes
for i := range parts {
parts[i].ETag = canonicalizeETag(parts[i].ETag)
}
fsMeta := fsMetaV1{}
@ -591,16 +595,17 @@ func (fs *FSObjects) CompleteMultipartUpload(ctx context.Context, bucket string,
return oi, err
}
// ensure that part ETag is canonicalized to strip off extraneous quotes
for i := range parts {
parts[i].ETag = canonicalizeETag(parts[i].ETag)
// Create entries trie structure for prefix match
entriesTrie := trie.NewTrie()
for _, entry := range entries {
entriesTrie.Insert(entry)
}
// Save consolidated actual size.
var objectActualSize int64
// Validate all parts and then commit to disk.
for i, part := range parts {
partFile := getPartFile(entries, part.PartNumber, part.ETag)
partFile := getPartFile(entriesTrie, part.PartNumber, part.ETag)
if partFile == "" {
return oi, InvalidPart{
PartNumber: part.PartNumber,
@ -628,9 +633,6 @@ func (fs *FSObjects) CompleteMultipartUpload(ctx context.Context, bucket string,
}
return oi, err
}
if partSize == -1 {
partSize = actualSize
}
fsMeta.Parts[i] = ObjectPartInfo{
Number: part.PartNumber,
@ -695,8 +697,16 @@ func (fs *FSObjects) CompleteMultipartUpload(ctx context.Context, bucket string,
fsRemoveFile(ctx, file.filePath)
}
for _, part := range parts {
partPath := getPartFile(entries, part.PartNumber, part.ETag)
if err = mioutil.AppendFile(appendFilePath, pathJoin(uploadIDDir, partPath), globalFSOSync); err != nil {
partFile := getPartFile(entriesTrie, part.PartNumber, part.ETag)
if partFile == "" {
logger.LogIf(ctx, fmt.Errorf("%.5d.%s missing will not proceed",
part.PartNumber, part.ETag))
return oi, InvalidPart{
PartNumber: part.PartNumber,
GotETag: part.ETag,
}
}
if err = mioutil.AppendFile(appendFilePath, pathJoin(uploadIDDir, partFile), globalFSOSync); err != nil {
logger.LogIf(ctx, err)
return oi, toObjectErr(err)
}

View file

@ -102,21 +102,19 @@ func newApp(name string) *cli.App {
findClosestCommands := func(command string) []string {
var closestCommands []string
for _, value := range commandsTree.PrefixMatch(command) {
closestCommands = append(closestCommands, value.(string))
}
closestCommands = append(closestCommands, commandsTree.PrefixMatch(command)...)
sort.Strings(closestCommands)
// Suggest other close commands - allow missed, wrongly added and
// even transposed characters
for _, value := range commandsTree.Walk(commandsTree.Root()) {
if sort.SearchStrings(closestCommands, value.(string)) < len(closestCommands) {
if sort.SearchStrings(closestCommands, value) < len(closestCommands) {
continue
}
// 2 is arbitrary and represents the max
// allowed number of typed errors
if words.DamerauLevenshteinDistance(command, value.(string)) < 2 {
closestCommands = append(closestCommands, value.(string))
if words.DamerauLevenshteinDistance(command, value) < 2 {
closestCommands = append(closestCommands, value)
}
}

View file

@ -45,6 +45,7 @@ import (
"github.com/minio/minio/cmd/logger"
"github.com/minio/minio/pkg/hash"
"github.com/minio/minio/pkg/ioutil"
"github.com/minio/minio/pkg/trie"
"github.com/minio/minio/pkg/wildcard"
)
@ -487,13 +488,12 @@ func hasPattern(patterns []string, matchStr string) bool {
}
// Returns the part file name which matches the partNumber and etag.
func getPartFile(entries []string, partNumber int, etag string) string {
for _, entry := range entries {
if strings.HasPrefix(entry, fmt.Sprintf("%.5d.%s.", partNumber, etag)) {
return entry
}
func getPartFile(entriesTrie *trie.Trie, partNumber int, etag string) (partFile string) {
for _, match := range entriesTrie.PrefixMatch(fmt.Sprintf("%.5d.%s.", partNumber, etag)) {
partFile = match
break
}
return ""
return partFile
}
// Returns the compressed offset which should be skipped.

View file

@ -18,6 +18,7 @@ package cmd
import (
"bytes"
"fmt"
"io"
"net/http"
"reflect"
@ -27,6 +28,7 @@ import (
"github.com/klauspost/compress/s2"
"github.com/minio/minio/cmd/config/compress"
"github.com/minio/minio/cmd/crypto"
"github.com/minio/minio/pkg/trie"
)
// Tests validate bucket name.
@ -440,40 +442,22 @@ func TestExcludeForCompression(t *testing.T) {
}
}
// Test getPartFile function.
func TestGetPartFile(t *testing.T) {
testCases := []struct {
entries []string
partNumber int
etag string
result string
}{
{
entries: []string{"00001.8a034f82cb9cb31140d87d3ce2a9ede3.67108864", "fs.json", "00002.d73d8ab724016dfb051e2d3584495c54.32891137"},
partNumber: 1,
etag: "8a034f82cb9cb31140d87d3ce2a9ede3",
result: "00001.8a034f82cb9cb31140d87d3ce2a9ede3.67108864",
},
{
entries: []string{"00001.8a034f82cb9cb31140d87d3ce2a9ede3.67108864", "fs.json", "00002.d73d8ab724016dfb051e2d3584495c54.32891137"},
partNumber: 2,
etag: "d73d8ab724016dfb051e2d3584495c54",
result: "00002.d73d8ab724016dfb051e2d3584495c54.32891137",
},
{
entries: []string{"00001.8a034f82cb9cb31140d87d3ce2a9ede3.67108864", "fs.json", "00002.d73d8ab724016dfb051e2d3584495c54.32891137"},
partNumber: 1,
etag: "d73d8ab724016dfb051e2d3584495c54",
result: "",
},
func BenchmarkGetPartFileWithTrie(b *testing.B) {
b.ResetTimer()
entriesTrie := trie.NewTrie()
for i := 1; i <= 10000; i++ {
entriesTrie.Insert(fmt.Sprintf("%.5d.8a034f82cb9cb31140d87d3ce2a9ede3.67108864", i))
}
for i, test := range testCases {
got := getPartFile(test.entries, test.partNumber, test.etag)
if got != test.result {
t.Errorf("Test %d - expected %s but received %s",
i+1, test.result, got)
for i := 1; i <= 10000; i++ {
partFile := getPartFile(entriesTrie, i, "8a034f82cb9cb31140d87d3ce2a9ede3")
if partFile == "" {
b.Fatal("partFile returned is empty")
}
}
b.ReportAllocs()
}
func TestGetActualSize(t *testing.T) {

View file

@ -21,7 +21,7 @@ package trie
// Node trie tree node container carries value and children.
type Node struct {
exists bool
value interface{}
value string
child map[rune]*Node // runes as child.
}
@ -29,7 +29,7 @@ type Node struct {
func newNode() *Node {
return &Node{
exists: false,
value: nil,
value: "",
child: make(map[rune]*Node),
}
}
@ -65,16 +65,16 @@ func (t *Trie) Insert(key string) {
}
// PrefixMatch - prefix match.
func (t *Trie) PrefixMatch(key string) []interface{} {
func (t *Trie) PrefixMatch(key string) []string {
node, _ := t.findNode(key)
if node != nil {
return t.Walk(node)
if node == nil {
return nil
}
return []interface{}{}
return t.Walk(node)
}
// Walk the tree.
func (t *Trie) Walk(node *Node) (ret []interface{}) {
func (t *Trie) Walk(node *Node) (ret []string) {
if node.exists {
ret = append(ret, node.value)
}