minio/cmd/metacache-walk.go
Harshavardhana 445a9bd827
fix: heal optimizations in crawler to avoid multiple healing attempts (#11173)
Fixes two problems

- Double healing when bitrot is enabled, instead heal attempt
  once in applyActions() before lifecycle is applied.

- If applyActions() is successful and getSize() returns proper
  value, then object is accounted for and should be removed
  from the oldCache namespace map to avoid double heal attempts.
2020-12-28 10:31:00 -08:00

295 lines
8.6 KiB
Go

/*
* MinIO Cloud Storage, (C) 2020 MinIO, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package cmd
import (
"context"
"io"
"io/ioutil"
"net/http"
"net/url"
"os"
"sort"
"strconv"
"strings"
"sync/atomic"
"github.com/gorilla/mux"
"github.com/minio/minio/cmd/logger"
)
// WalkDirOptions provides options for WalkDir operations.
type WalkDirOptions struct {
// Bucket to crawl
Bucket string
// Directory inside the bucket.
BaseDir string
// Do a full recursive scan.
Recursive bool
// ReportNotFound will return errFileNotFound if all disks reports the BaseDir cannot be found.
ReportNotFound bool
// FilterPrefix will only return results with given prefix within folder.
// Should never contain a slash.
FilterPrefix string
}
// WalkDir will traverse a directory and return all entries found.
// On success a sorted meta cache stream will be returned.
func (s *xlStorage) WalkDir(ctx context.Context, opts WalkDirOptions, wr io.Writer) error {
atomic.AddInt32(&s.activeIOCount, 1)
defer func() {
atomic.AddInt32(&s.activeIOCount, -1)
}()
// Verify if volume is valid and it exists.
volumeDir, err := s.getVolDir(opts.Bucket)
if err != nil {
return err
}
// Stat a volume entry.
_, err = os.Stat(volumeDir)
if err != nil {
if osIsNotExist(err) {
return errVolumeNotFound
} else if isSysErrIO(err) {
return errFaultyDisk
}
return err
}
// Fast exit track to check if we are listing an object with
// a trailing slash, this will avoid to list the object content.
if HasSuffix(opts.BaseDir, SlashSeparator) {
if st, err := os.Stat(pathJoin(volumeDir, opts.BaseDir, xlStorageFormatFile)); err == nil && st.Mode().IsRegular() {
return errFileNotFound
}
}
// Use a small block size to start sending quickly
w := newMetacacheWriter(wr, 16<<10)
defer w.Close()
out, err := w.stream()
if err != nil {
return err
}
defer close(out)
prefix := opts.FilterPrefix
var scanDir func(path string) error
scanDir = func(current string) error {
entries, err := s.ListDir(ctx, opts.Bucket, current, -1)
if err != nil {
// Folder could have gone away in-between
if err != errVolumeNotFound && err != errFileNotFound {
logger.LogIf(ctx, err)
}
if opts.ReportNotFound && err == errFileNotFound && current == opts.BaseDir {
return errFileNotFound
}
// Forward some errors?
return nil
}
dirObjects := make(map[string]struct{})
for i, entry := range entries {
if len(prefix) > 0 && !strings.HasPrefix(entry, prefix) {
continue
}
if strings.HasSuffix(entry, slashSeparator) {
if strings.HasSuffix(entry, globalDirSuffixWithSlash) {
// Add without extension so it is sorted correctly.
entry = strings.TrimSuffix(entry, globalDirSuffixWithSlash) + slashSeparator
dirObjects[entry] = struct{}{}
entries[i] = entry
continue
}
// Trim slash, maybe compiler is clever?
entries[i] = entries[i][:len(entry)-1]
continue
}
// Do do not retain the file.
entries[i] = ""
// If root was an object return it as such.
if HasSuffix(entry, xlStorageFormatFile) {
var meta metaCacheEntry
meta.metadata, err = ioutil.ReadFile(pathJoin(volumeDir, current, entry))
if err != nil {
logger.LogIf(ctx, err)
continue
}
meta.name = strings.TrimSuffix(entry, xlStorageFormatFile)
meta.name = strings.TrimSuffix(meta.name, SlashSeparator)
meta.name = pathJoin(current, meta.name)
meta.name = decodeDirObject(meta.name)
out <- meta
return nil
}
// Check legacy.
if HasSuffix(entry, xlStorageFormatFileV1) {
var meta metaCacheEntry
meta.metadata, err = ioutil.ReadFile(pathJoin(volumeDir, current, entry))
if err != nil {
logger.LogIf(ctx, err)
continue
}
meta.name = strings.TrimSuffix(entry, xlStorageFormatFileV1)
meta.name = strings.TrimSuffix(meta.name, SlashSeparator)
meta.name = pathJoin(current, meta.name)
out <- meta
return nil
}
// Skip all other files.
}
// Process in sort order.
sort.Strings(entries)
dirStack := make([]string, 0, 5)
prefix = "" // Remove prefix after first level.
for _, entry := range entries {
if entry == "" {
continue
}
meta := metaCacheEntry{name: PathJoin(current, entry)}
// If directory entry on stack before this, pop it now.
for len(dirStack) > 0 && dirStack[len(dirStack)-1] < meta.name {
pop := dirStack[len(dirStack)-1]
out <- metaCacheEntry{name: pop}
if opts.Recursive {
// Scan folder we found. Should be in correct sort order where we are.
err := scanDir(pop)
logger.LogIf(ctx, err)
}
dirStack = dirStack[:len(dirStack)-1]
}
// All objects will be returned as directories, there has been no object check yet.
// Check it by attempting to read metadata.
_, isDirObj := dirObjects[entry]
if isDirObj {
meta.name = meta.name[:len(meta.name)-1] + globalDirSuffixWithSlash
}
meta.metadata, err = ioutil.ReadFile(pathJoin(volumeDir, meta.name, xlStorageFormatFile))
switch {
case err == nil:
// It was an object
if isDirObj {
meta.name = strings.TrimSuffix(meta.name, globalDirSuffixWithSlash) + slashSeparator
}
out <- meta
case osIsNotExist(err):
meta.metadata, err = ioutil.ReadFile(pathJoin(volumeDir, meta.name, xlStorageFormatFileV1))
if err == nil {
// Maybe rename? Would make it inconsistent across disks though.
// os.Rename(pathJoin(volumeDir, meta.name, xlStorageFormatFileV1), pathJoin(volumeDir, meta.name, xlStorageFormatFile))
// It was an object
out <- meta
continue
}
// NOT an object, append to stack (with slash)
// If dirObject, but no metadata (which is unexpected) we skip it.
if !isDirObj {
dirStack = append(dirStack, meta.name+slashSeparator)
}
case isSysErrNotDir(err):
// skip
default:
logger.LogIf(ctx, err)
}
}
// If directory entry left on stack, pop it now.
for len(dirStack) > 0 {
pop := dirStack[len(dirStack)-1]
out <- metaCacheEntry{name: pop}
if opts.Recursive {
// Scan folder we found. Should be in correct sort order where we are.
err := scanDir(pop)
logger.LogIf(ctx, err)
}
dirStack = dirStack[:len(dirStack)-1]
}
return nil
}
// Stream output.
return scanDir(opts.BaseDir)
}
func (p *xlStorageDiskIDCheck) WalkDir(ctx context.Context, opts WalkDirOptions, wr io.Writer) error {
if err := p.checkDiskStale(); err != nil {
return err
}
return p.storage.WalkDir(ctx, opts, wr)
}
// WalkDir will traverse a directory and return all entries found.
// On success a meta cache stream will be returned, that should be closed when done.
func (client *storageRESTClient) WalkDir(ctx context.Context, opts WalkDirOptions, wr io.Writer) error {
values := make(url.Values)
values.Set(storageRESTVolume, opts.Bucket)
values.Set(storageRESTDirPath, opts.BaseDir)
values.Set(storageRESTRecursive, strconv.FormatBool(opts.Recursive))
values.Set(storageRESTReportNotFound, strconv.FormatBool(opts.ReportNotFound))
values.Set(storageRESTPrefixFilter, opts.FilterPrefix)
respBody, err := client.call(ctx, storageRESTMethodWalkDir, values, nil, -1)
if err != nil {
logger.LogIf(ctx, err)
return err
}
return waitForHTTPStream(respBody, wr)
}
// WalkDirHandler - remote caller to list files and folders in a requested directory path.
func (s *storageRESTServer) WalkDirHandler(w http.ResponseWriter, r *http.Request) {
if !s.IsValid(w, r) {
return
}
vars := mux.Vars(r)
volume := vars[storageRESTVolume]
dirPath := vars[storageRESTDirPath]
recursive, err := strconv.ParseBool(vars[storageRESTRecursive])
if err != nil {
s.writeErrorResponse(w, err)
return
}
var reportNotFound bool
if v := vars[storageRESTReportNotFound]; v != "" {
reportNotFound, err = strconv.ParseBool(v)
if err != nil {
s.writeErrorResponse(w, err)
return
}
}
prefix := r.URL.Query().Get(storageRESTPrefixFilter)
writer := streamHTTPResponse(w)
writer.CloseWithError(s.storage.WalkDir(r.Context(), WalkDirOptions{
Bucket: volume,
BaseDir: dirPath,
Recursive: recursive,
ReportNotFound: reportNotFound,
FilterPrefix: prefix,
}, writer))
}