Don't copy interesting ids, clean up logging (#11102)

When searching the caches don't copy the ids, instead inline the loop.

```
Benchmark_bucketMetacache_findCache-32    	   19200	     63490 ns/op	    8303 B/op	       5 allocs/op
Benchmark_bucketMetacache_findCache-32    	   20338	     58609 ns/op	     111 B/op	       4 allocs/op
```

Add a reasonable, but still the simplistic benchmark.

Bonus - make nicer zero alloc logging
This commit is contained in:
Klaus Post 2020-12-14 13:13:33 -08:00 committed by GitHub
parent 8368ab76aa
commit f6fb27e8f0
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 176 additions and 166 deletions

View file

@ -73,6 +73,12 @@ func newBucketMetacache(bucket string, cleanup bool) *bucketMetacache {
}
}
func (b *bucketMetacache) debugf(format string, data ...interface{}) {
if metacacheDebug {
console.Debugf(format, data...)
}
}
// loadBucketMetaCache will load the cache from the object layer.
// If the cache cannot be found a new one is created.
func loadBucketMetaCache(ctx context.Context, bucket string) (*bucketMetacache, error) {
@ -199,7 +205,6 @@ func (b *bucketMetacache) findCache(o listPathOptions) metacache {
}
extend := globalAPIConfig.getExtendListLife()
const debugPrint = false
// Grab a write lock, since we create one if we cannot find one.
if o.Create {
@ -212,9 +217,7 @@ func (b *bucketMetacache) findCache(o listPathOptions) metacache {
// Check if exists already.
if c, ok := b.caches[o.ID]; ok {
if debugPrint {
console.Info("returning existing %v", o.ID)
}
b.debugf("returning existing %v", o.ID)
return c
}
// No need to do expensive checks on transients.
@ -231,103 +234,30 @@ func (b *bucketMetacache) findCache(o listPathOptions) metacache {
best := o.newMetacache()
b.caches[o.ID] = best
b.updated = true
if debugPrint {
console.Info("returning new cache %s, bucket: %v", best.id, best.bucket)
}
b.debugf("returning new cache %s, bucket: %v", best.id, best.bucket)
return best
}
interesting := interestingCaches(o.BaseDir, b.cachesRoot)
var best metacache
for _, id := range interesting {
cached, ok := b.caches[id]
if !ok {
continue
}
rootSplit := strings.Split(o.BaseDir, slashSeparator)
for i := range rootSplit {
interesting := b.cachesRoot[path.Join(rootSplit[:i+1]...)]
// Never return transient caches if there is no id.
if cached.status == scanStateError || cached.status == scanStateNone || cached.dataVersion != metacacheStreamVersion {
if debugPrint {
console.Info("cache %s state or stream version mismatch", cached.id)
}
continue
}
if cached.startedCycle < o.OldestCycle {
if debugPrint {
console.Info("cache %s cycle too old", cached.id)
}
continue
}
// If the existing listing wasn't recursive root must match.
if !cached.recursive && o.BaseDir != cached.root {
if debugPrint {
console.Info("cache %s non rec prefix mismatch, cached:%v, want:%v", cached.id, cached.root, o.BaseDir)
}
continue
}
// Root of what we are looking for must at least have
if !strings.HasPrefix(o.BaseDir, cached.root) {
if debugPrint {
console.Info("cache %s prefix mismatch, cached:%v, want:%v", cached.id, cached.root, o.BaseDir)
}
continue
}
if cached.filter != "" && strings.HasPrefix(cached.filter, o.FilterPrefix) {
if debugPrint {
console.Info("cache %s cannot be used because of filter %s", cached.id, cached.filter)
}
continue
}
if o.Recursive && !cached.recursive {
if debugPrint {
console.Info("cache %s not recursive", cached.id)
}
// If this is recursive the cached listing must be as well.
continue
}
if o.Separator != slashSeparator && !cached.recursive {
if debugPrint {
console.Info("cache %s not slashsep and not recursive", cached.id)
}
// Non slash separator requires recursive.
continue
}
if !cached.finished() && time.Since(cached.lastUpdate) > metacacheMaxRunningAge {
if debugPrint {
console.Info("cache %s not running, time: %v", cached.id, time.Since(cached.lastUpdate))
}
// Abandoned
continue
}
if cached.finished() && cached.endedCycle <= o.OldestCycle {
if extend <= 0 {
// If scan has ended the oldest requested must be less.
if debugPrint {
console.Info("cache %s ended and cycle (%v) <= oldest allowed (%v)", cached.id, cached.endedCycle, o.OldestCycle)
}
for _, id := range interesting {
cached, ok := b.caches[id]
if !ok {
continue
}
if time.Since(cached.lastUpdate) > metacacheMaxRunningAge+extend {
// Cache ended within bloom cycle, but we can extend the life.
if debugPrint {
console.Info("cache %s ended (%v) and beyond extended life (%v)", cached.id, cached.lastUpdate, extend+metacacheMaxRunningAge)
}
if !cached.matches(&o, extend) {
continue
}
}
if cached.started.Before(best.started) {
if debugPrint {
console.Info("cache %s disregarded - we have a better", cached.id)
if cached.started.Before(best.started) {
b.debugf("cache %s disregarded - we have a better", cached.id)
// If we already have a newer, keep that.
continue
}
// If we already have a newer, keep that.
continue
best = cached
}
best = cached
}
if !best.started.IsZero() {
if o.Create {
@ -335,9 +265,7 @@ func (b *bucketMetacache) findCache(o listPathOptions) metacache {
b.caches[best.id] = best
b.updated = true
}
if debugPrint {
console.Info("returning cached %s, status: %v, ended: %v", best.id, best.status, best.ended)
}
b.debugf("returning cached %s, status: %v, ended: %v", best.id, best.status, best.ended)
return best
}
if !o.Create {
@ -353,9 +281,7 @@ func (b *bucketMetacache) findCache(o listPathOptions) metacache {
b.caches[o.ID] = best
b.cachesRoot[best.root] = append(b.cachesRoot[best.root], best.id)
b.updated = true
if debugPrint {
console.Info("returning new cache %s, bucket: %v", best.id, best.bucket)
}
b.debugf("returning new cache %s, bucket: %v", best.id, best.bucket)
return best
}
@ -365,8 +291,6 @@ func (b *bucketMetacache) cleanup() {
remove := make(map[string]struct{})
currentCycle := intDataUpdateTracker.current()
const debugPrint = false
// Test on a copy
// cleanup is the only one deleting caches.
caches, rootIdx := b.cloneCaches()
@ -378,9 +302,7 @@ func (b *bucketMetacache) cleanup() {
continue
}
if !cache.worthKeeping(currentCycle) {
if debugPrint {
logger.Info("cache %s not worth keeping", id)
}
b.debugf("cache %s not worth keeping", id)
remove[id] = struct{}{}
continue
}
@ -418,15 +340,11 @@ func (b *bucketMetacache) cleanup() {
}
if cache.canBeReplacedBy(&cache2) {
if debugPrint {
logger.Info("cache %s can be replaced by %s", id, cache2.id)
}
b.debugf("cache %s can be replaced by %s", id, cache2.id)
remove[id] = struct{}{}
break
} else {
if debugPrint {
logger.Info("cache %s can be NOT replaced by %s", id, cache2.id)
}
b.debugf("cache %s can be NOT replaced by %s", id, cache2.id)
}
}
}
@ -515,9 +433,6 @@ func (b *bucketMetacache) getCache(id string) *metacache {
// deleteAll will delete all on disk data for ALL caches.
// Deletes are performed concurrently.
func (b *bucketMetacache) deleteAll() {
b.mu.Lock()
defer b.mu.Unlock()
ctx := context.Background()
ez, ok := newObjectLayerFn().(*erasureServerPools)
if !ok {
@ -525,6 +440,9 @@ func (b *bucketMetacache) deleteAll() {
return
}
b.mu.Lock()
defer b.mu.Unlock()
b.updated = true
if !b.transient {
// Delete all.

View file

@ -0,0 +1,55 @@
package cmd
import (
"fmt"
"testing"
)
func Benchmark_bucketMetacache_findCache(b *testing.B) {
bm := newBucketMetacache("", false)
const elements = 50000
const paths = 100
if elements%paths != 0 {
b.Fatal("elements must be divisible by the number of paths")
}
var pathNames [paths]string
for i := range pathNames[:] {
pathNames[i] = fmt.Sprintf("prefix/%d", i)
}
for i := 0; i < elements; i++ {
bm.findCache(listPathOptions{
ID: mustGetUUID(),
Bucket: "",
BaseDir: pathNames[i%paths],
Prefix: "",
FilterPrefix: "",
Marker: "",
Limit: 0,
AskDisks: 0,
Recursive: false,
Separator: slashSeparator,
Create: true,
CurrentCycle: uint64(i),
OldestCycle: uint64(i - 1),
})
}
b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
bm.findCache(listPathOptions{
ID: mustGetUUID(),
Bucket: "",
BaseDir: pathNames[i%paths],
Prefix: "",
FilterPrefix: "",
Marker: "",
Limit: 0,
AskDisks: 0,
Recursive: false,
Separator: slashSeparator,
Create: true,
CurrentCycle: uint64(i % elements),
OldestCycle: uint64(0),
})
}
}

View file

@ -29,6 +29,7 @@ import (
"sync"
"time"
jsoniter "github.com/json-iterator/go"
"github.com/minio/minio/cmd/logger"
"github.com/minio/minio/pkg/console"
"github.com/minio/minio/pkg/hash"
@ -122,11 +123,22 @@ func (o listPathOptions) newMetacache() metacache {
}
}
func (o *listPathOptions) debugf(format string, data ...interface{}) {
if metacacheDebug {
console.Debugf(format, data...)
}
}
func (o *listPathOptions) debugln(data ...interface{}) {
if metacacheDebug {
console.Debugln(data...)
}
}
// gatherResults will collect all results on the input channel and filter results according to the options.
// Caller should close the channel when done.
// The returned function will return the results once there is enough or input is closed.
func (o *listPathOptions) gatherResults(in <-chan metaCacheEntry) func() (metaCacheEntriesSorted, error) {
const debugPrint = false
var resultsDone = make(chan metaCacheEntriesSorted)
// Copy so we can mutate
resCh := resultsDone
@ -142,32 +154,22 @@ func (o *listPathOptions) gatherResults(in <-chan metaCacheEntry) func() (metaCa
if !o.IncludeDirectories && entry.isDir() {
continue
}
if debugPrint {
console.Infoln("gather got:", entry.name)
}
o.debugln("gather got:", entry.name)
if o.Marker != "" && entry.name <= o.Marker {
if debugPrint {
console.Infoln("pre marker")
}
o.debugln("pre marker")
continue
}
if !strings.HasPrefix(entry.name, o.Prefix) {
if debugPrint {
console.Infoln("not in prefix")
}
o.debugln("not in prefix")
continue
}
if !o.Recursive && !entry.isInDir(o.Prefix, o.Separator) {
if debugPrint {
console.Infoln("not in dir", o.Prefix, o.Separator)
}
o.debugln("not in dir", o.Prefix, o.Separator)
continue
}
if !o.InclDeleted && entry.isObject() {
if entry.isLatestDeletemarker() {
if debugPrint {
console.Infoln("latest delete")
}
o.debugln("latest delete")
continue
}
}
@ -181,9 +183,7 @@ func (o *listPathOptions) gatherResults(in <-chan metaCacheEntry) func() (metaCa
}
continue
}
if debugPrint {
console.Infoln("adding...")
}
o.debugln("adding...")
results.o = append(results.o, entry)
}
if resCh != nil {
@ -207,19 +207,15 @@ func (o *listPathOptions) findFirstPart(fi FileInfo) (int, error) {
if search == "" {
return 0, nil
}
const debugPrint = false
if debugPrint {
console.Infoln("searching for ", search)
}
o.debugln("searching for ", search)
var tmp metacacheBlock
var json = jsoniter.ConfigCompatibleWithStandardLibrary
i := 0
for {
partKey := fmt.Sprintf("%s-metacache-part-%d", ReservedMetadataPrefixLower, i)
v, ok := fi.Metadata[partKey]
if !ok {
if debugPrint {
console.Infoln("no match in metadata, waiting")
}
o.debugln("no match in metadata, waiting")
return -1, io.ErrUnexpectedEOF
}
err := json.Unmarshal([]byte(v), &tmp)
@ -231,27 +227,18 @@ func (o *listPathOptions) findFirstPart(fi FileInfo) (int, error) {
return 0, errFileNotFound
}
if tmp.First >= search {
if debugPrint {
console.Infoln("First >= search", v)
}
o.debugln("First >= search", v)
return i, nil
}
if tmp.Last >= search {
if debugPrint {
console.Infoln("Last >= search", v)
}
o.debugln("Last >= search", v)
return i, nil
}
if tmp.EOS {
if debugPrint {
console.Infoln("no match, at EOS", v)
}
o.debugln("no match, at EOS", v)
return -3, io.EOF
}
if debugPrint {
console.Infoln("First ", tmp.First, "<", search, " search", i)
}
o.debugln("First ", tmp.First, "<", search, " search", i)
i++
}
}
@ -314,7 +301,6 @@ func (o *listPathOptions) SetFilter() {
// Will return io.EOF if there are no more entries with the same filter.
// The last entry can be used as a marker to resume the listing.
func (r *metacacheReader) filter(o listPathOptions) (entries metaCacheEntriesSorted, err error) {
const debugPrint = false
// Forward to prefix, if any
err = r.forwardTo(o.Prefix)
if err != nil {
@ -336,9 +322,8 @@ func (r *metacacheReader) filter(o listPathOptions) (entries metaCacheEntriesSor
}
}
}
if debugPrint {
console.Infoln("forwarded to ", o.Prefix, "marker:", o.Marker, "sep:", o.Separator)
}
o.debugln("forwarded to ", o.Prefix, "marker:", o.Marker, "sep:", o.Separator)
// Filter
if !o.Recursive {
entries.o = make(metaCacheEntries, 0, o.Limit)
@ -557,10 +542,7 @@ func (er erasureObjects) SetDriveCount() int {
// Will return io.EOF if continuing would not yield more results.
func (er *erasureObjects) listPath(ctx context.Context, o listPathOptions) (entries metaCacheEntriesSorted, err error) {
const debugPrint = false
if debugPrint {
console.Printf("listPath with options: %#v\n", o)
}
o.debugf("listPath with options: %#v\n", o)
// See if we have the listing stored.
if !o.Create && !o.singleObject {
@ -586,9 +568,7 @@ func (er *erasureObjects) listPath(ctx context.Context, o listPathOptions) (entr
rpc := globalNotificationSys.restClientFromHash(o.Bucket)
var metaMu sync.Mutex
if debugPrint {
console.Println("listPath: scanning bucket:", o.Bucket, "basedir:", o.BaseDir, "prefix:", o.Prefix, "marker:", o.Marker)
}
o.debugln("listPath: scanning bucket:", o.Bucket, "basedir:", o.BaseDir, "prefix:", o.Prefix, "marker:", o.Marker)
// Disconnect from call above, but cancel on exit.
ctx, cancel := context.WithCancel(GlobalContext)
@ -596,9 +576,7 @@ func (er *erasureObjects) listPath(ctx context.Context, o listPathOptions) (entr
disks := er.getOnlineDisks()
defer func() {
if debugPrint {
console.Println("listPath returning:", entries.len(), "err:", err)
}
o.debugln("listPath returning:", entries.len(), "err:", err)
if err != nil && !errors.Is(err, io.EOF) {
go func(err string) {
metaMu.Lock()
@ -684,9 +662,7 @@ func (er *erasureObjects) listPath(ctx context.Context, o listPathOptions) (entr
// Don't save single object listings.
return nil
}
if debugPrint {
console.Println("listPath: saving block", b.n, "to", o.objectPath(b.n))
}
o.debugln("listPath: saving block", b.n, "to", o.objectPath(b.n))
r, err := hash.NewReader(bytes.NewBuffer(b.data), int64(len(b.data)), "", "", int64(len(b.data)), false)
logger.LogIf(ctx, err)
custom := b.headerKV()

View file

@ -46,6 +46,9 @@ const (
// Enabling this will make cache sharing more likely and cause less IO,
// but may cause additional latency to some calls.
metacacheSharePrefix = false
// metacacheDebug will enable debug printing
metacacheDebug = false
)
//go:generate msgp -file $GOFILE -unexported
@ -73,6 +76,64 @@ func (m *metacache) finished() bool {
return !m.ended.IsZero()
}
// matches returns whether the metacache matches the options given.
func (m *metacache) matches(o *listPathOptions, extend time.Duration) bool {
if o == nil {
return false
}
// Never return transient caches if there is no id.
if m.status == scanStateError || m.status == scanStateNone || m.dataVersion != metacacheStreamVersion {
o.debugf("cache %s state or stream version mismatch", m.id)
return false
}
if m.startedCycle < o.OldestCycle {
o.debugf("cache %s cycle too old", m.id)
return false
}
// Root of what we are looking for must at least have the same
if !strings.HasPrefix(o.BaseDir, m.root) {
o.debugf("cache %s prefix mismatch, cached:%v, want:%v", m.id, m.root, o.BaseDir)
return false
}
if m.filter != "" && strings.HasPrefix(m.filter, o.FilterPrefix) {
o.debugf("cache %s cannot be used because of filter %s", m.id, m.filter)
return false
}
if o.Recursive && !m.recursive {
o.debugf("cache %s not recursive", m.id)
// If this is recursive the cached listing must be as well.
return false
}
if o.Separator != slashSeparator && !m.recursive {
o.debugf("cache %s not slashsep and not recursive", m.id)
// Non slash separator requires recursive.
return false
}
if !m.finished() && time.Since(m.lastUpdate) > metacacheMaxRunningAge {
o.debugf("cache %s not running, time: %v", m.id, time.Since(m.lastUpdate))
// Abandoned
return false
}
if m.finished() && m.endedCycle <= o.OldestCycle {
if extend <= 0 {
// If scan has ended the oldest requested must be less.
o.debugf("cache %s ended and cycle (%v) <= oldest allowed (%v)", m.id, m.endedCycle, o.OldestCycle)
return false
}
if time.Since(m.lastUpdate) > metacacheMaxRunningAge+extend {
// Cache ended within bloom cycle, but we can extend the life.
o.debugf("cache %s ended (%v) and beyond extended life (%v)", m.id, m.lastUpdate, extend+metacacheMaxRunningAge)
return false
}
}
return true
}
// worthKeeping indicates if the cache by itself is worth keeping.
func (m *metacache) worthKeeping(currentCycle uint64) bool {
if m == nil {