trace: Add storage & OS tracing (#11889)

This commit is contained in:
Anis Elleuch 2021-03-27 07:24:07 +01:00 committed by GitHub
parent 95096e31a7
commit d8b5adfd10
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
17 changed files with 362 additions and 203 deletions

View file

@ -1031,26 +1031,75 @@ func toAdminAPIErr(ctx context.Context, err error) APIError {
// false if certain conditions are not met.
// - input entry is not of the type *trace.Info*
// - errOnly entries are to be traced, not status code 2xx, 3xx.
// - all entries to be traced, if not trace only S3 API requests.
func mustTrace(entry interface{}, trcAll, errOnly bool) bool {
// - trace.Info type is asked by opts
func mustTrace(entry interface{}, opts madmin.ServiceTraceOpts) (shouldTrace bool) {
trcInfo, ok := entry.(trace.Info)
if !ok {
return false
}
// Handle browser requests separately filter them and return.
if HasPrefix(trcInfo.ReqInfo.Path, minioReservedBucketPath+"/upload") {
if errOnly {
return trcInfo.RespInfo.StatusCode >= http.StatusBadRequest
// Override shouldTrace decision with errOnly filtering
defer func() {
if shouldTrace && opts.OnlyErrors {
shouldTrace = trcInfo.RespInfo.StatusCode >= http.StatusBadRequest
}
}()
if opts.Threshold > 0 {
var latency time.Duration
switch trcInfo.TraceType {
case trace.OS:
latency = trcInfo.OSStats.Duration
case trace.Storage:
latency = trcInfo.StorageStats.Duration
case trace.HTTP:
latency = trcInfo.CallStats.Latency
}
if latency < opts.Threshold {
return false
}
}
if opts.Internal && trcInfo.TraceType == trace.HTTP && HasPrefix(trcInfo.ReqInfo.Path, minioReservedBucketPath+SlashSeparator) {
return true
}
trace := trcAll || !HasPrefix(trcInfo.ReqInfo.Path, minioReservedBucketPath+SlashSeparator)
if errOnly {
return trace && trcInfo.RespInfo.StatusCode >= http.StatusBadRequest
if opts.S3 && trcInfo.TraceType == trace.HTTP && !HasPrefix(trcInfo.ReqInfo.Path, minioReservedBucketPath+SlashSeparator) {
return true
}
return trace
if opts.Storage && trcInfo.TraceType == trace.Storage {
return true
}
return opts.OS && trcInfo.TraceType == trace.OS
}
func extractTraceOptions(r *http.Request) (opts madmin.ServiceTraceOpts, err error) {
q := r.URL.Query()
opts.OnlyErrors = q.Get("err") == "true"
opts.S3 = q.Get("s3") == "true"
opts.Internal = q.Get("internal") == "true"
opts.Storage = q.Get("storage") == "true"
opts.OS = q.Get("os") == "true"
// Support deprecated 'all' query
if q.Get("all") == "true" {
opts.S3 = true
opts.Internal = true
opts.Storage = true
opts.OS = true
}
if t := q.Get("threshold"); t != "" {
d, err := time.ParseDuration(t)
if err != nil {
return opts, err
}
opts.Threshold = d
}
return
}
// TraceHandler - POST /minio/admin/v3/trace
@ -1059,9 +1108,6 @@ func mustTrace(entry interface{}, trcAll, errOnly bool) bool {
func (a adminAPIHandlers) TraceHandler(w http.ResponseWriter, r *http.Request) {
ctx := newContext(r, w, "HTTPTrace")
trcAll := r.URL.Query().Get("all") == "true"
trcErr := r.URL.Query().Get("err") == "true"
// Validate request signature.
_, adminAPIErr := checkAdminRequestAuth(ctx, r, iampolicy.TraceAdminAction, "")
if adminAPIErr != ErrNone {
@ -1069,6 +1115,12 @@ func (a adminAPIHandlers) TraceHandler(w http.ResponseWriter, r *http.Request) {
return
}
traceOpts, err := extractTraceOptions(r)
if err != nil {
writeErrorResponseJSON(ctx, w, errorCodes.ToAPIErr(ErrInvalidRequest), r.URL)
return
}
setEventStreamHeaders(w)
// Trace Publisher and peer-trace-client uses nonblocking send and hence does not wait for slow receivers.
@ -1077,15 +1129,15 @@ func (a adminAPIHandlers) TraceHandler(w http.ResponseWriter, r *http.Request) {
peers, _ := newPeerRestClients(globalEndpoints)
globalHTTPTrace.Subscribe(traceCh, ctx.Done(), func(entry interface{}) bool {
return mustTrace(entry, trcAll, trcErr)
globalTrace.Subscribe(traceCh, ctx.Done(), func(entry interface{}) bool {
return mustTrace(entry, traceOpts)
})
for _, peer := range peers {
if peer == nil {
continue
}
peer.Trace(traceCh, ctx.Done(), trcAll, trcErr)
peer.Trace(traceCh, ctx.Done(), traceOpts)
}
keepAliveTicker := time.NewTicker(500 * time.Millisecond)

View file

@ -66,7 +66,7 @@ func waitForLowHTTPReq(maxIO int, maxWait time.Duration) {
// Bucket notification and http trace are not costly, it is okay to ignore them
// while counting the number of concurrent connections
maxIOFn := func() int {
return maxIO + int(globalHTTPListen.NumSubscribers()) + int(globalHTTPTrace.NumSubscribers())
return maxIO + int(globalHTTPListen.NumSubscribers()) + int(globalTrace.NumSubscribers())
}
tmpMaxWait := maxWait

View file

@ -23,23 +23,21 @@ const (
// Top level common ENVs
const (
EnvAccessKey = "MINIO_ACCESS_KEY"
EnvSecretKey = "MINIO_SECRET_KEY"
EnvRootUser = "MINIO_ROOT_USER"
EnvRootPassword = "MINIO_ROOT_PASSWORD"
EnvAccessKeyOld = "MINIO_ACCESS_KEY_OLD"
EnvSecretKeyOld = "MINIO_SECRET_KEY_OLD"
EnvRootUserOld = "MINIO_ROOT_USER_OLD"
EnvRootPasswordOld = "MINIO_ROOT_PASSWORD_OLD"
EnvBrowser = "MINIO_BROWSER"
EnvDomain = "MINIO_DOMAIN"
EnvRegionName = "MINIO_REGION_NAME"
EnvPublicIPs = "MINIO_PUBLIC_IPS"
EnvFSOSync = "MINIO_FS_OSYNC"
EnvArgs = "MINIO_ARGS"
EnvDNSWebhook = "MINIO_DNS_WEBHOOK_ENDPOINT"
EnvLogPosixTimes = "MINIO_LOG_POSIX_TIMES"
EnvLogPosixThresholdInMS = "MINIO_LOG_POSIX_THRESHOLD_MS"
EnvAccessKey = "MINIO_ACCESS_KEY"
EnvSecretKey = "MINIO_SECRET_KEY"
EnvRootUser = "MINIO_ROOT_USER"
EnvRootPassword = "MINIO_ROOT_PASSWORD"
EnvAccessKeyOld = "MINIO_ACCESS_KEY_OLD"
EnvSecretKeyOld = "MINIO_SECRET_KEY_OLD"
EnvRootUserOld = "MINIO_ROOT_USER_OLD"
EnvRootPasswordOld = "MINIO_ROOT_PASSWORD_OLD"
EnvBrowser = "MINIO_BROWSER"
EnvDomain = "MINIO_DOMAIN"
EnvRegionName = "MINIO_REGION_NAME"
EnvPublicIPs = "MINIO_PUBLIC_IPS"
EnvFSOSync = "MINIO_FS_OSYNC"
EnvArgs = "MINIO_ARGS"
EnvDNSWebhook = "MINIO_DNS_WEBHOOK_ENDPOINT"
EnvUpdate = "MINIO_UPDATE"

View file

@ -181,9 +181,9 @@ var (
globalHTTPServerErrorCh = make(chan error)
globalOSSignalCh = make(chan os.Signal, 1)
// global Trace system to send HTTP request/response logs to
// registered listeners
globalHTTPTrace = pubsub.New()
// global Trace system to send HTTP request/response
// and Storage/OS calls info to registered listeners.
globalTrace = pubsub.New()
// global Listen system to send S3 API events to registered listeners
globalHTTPListen = pubsub.New()

View file

@ -359,24 +359,24 @@ func extractPostPolicyFormValues(ctx context.Context, form *multipart.Form) (fil
// Log headers and body.
func httpTraceAll(f http.HandlerFunc) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
if globalHTTPTrace.NumSubscribers() == 0 {
if globalTrace.NumSubscribers() == 0 {
f.ServeHTTP(w, r)
return
}
trace := Trace(f, true, w, r)
globalHTTPTrace.Publish(trace)
globalTrace.Publish(trace)
}
}
// Log only the headers.
func httpTraceHdrs(f http.HandlerFunc) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
if globalHTTPTrace.NumSubscribers() == 0 {
if globalTrace.NumSubscribers() == 0 {
f.ServeHTTP(w, r)
return
}
trace := Trace(f, false, w, r)
globalHTTPTrace.Publish(trace)
globalTrace.Publish(trace)
}
}

View file

@ -124,7 +124,8 @@ func WebTrace(ri *jsonrpc.RequestInfo) trace.Info {
reqHeaders.Set("Transfer-Encoding", strings.Join(r.TransferEncoding, ","))
}
t := trace.Info{FuncName: name}
now := time.Now().UTC()
t := trace.Info{TraceType: trace.HTTP, FuncName: name, Time: now}
t.NodeName = r.Host
if globalIsDistErasure {
t.NodeName = globalLocalNodeName
@ -137,7 +138,7 @@ func WebTrace(ri *jsonrpc.RequestInfo) trace.Info {
vars := mux.Vars(r)
rq := trace.RequestInfo{
Time: time.Now().UTC(),
Time: now,
Proto: r.Proto,
Method: r.Method,
Path: SlashSeparator + pathJoin(vars["bucket"], vars["object"]),
@ -185,10 +186,12 @@ func Trace(f http.HandlerFunc, logBody bool, w http.ResponseWriter, r *http.Requ
reqHeaders.Set("Transfer-Encoding", strings.Join(r.TransferEncoding, ","))
}
var reqBodyRecorder *recordRequest
t := trace.Info{FuncName: name}
reqBodyRecorder = &recordRequest{Reader: r.Body, logBody: logBody, headers: reqHeaders}
reqBodyRecorder := &recordRequest{Reader: r.Body, logBody: logBody, headers: reqHeaders}
r.Body = ioutil.NopCloser(reqBodyRecorder)
now := time.Now().UTC()
t := trace.Info{TraceType: trace.HTTP, FuncName: name, Time: now}
t.NodeName = r.Host
if globalIsDistErasure {
t.NodeName = globalLocalNodeName
@ -197,8 +200,9 @@ func Trace(f http.HandlerFunc, logBody bool, w http.ResponseWriter, r *http.Requ
if host, _, err := net.SplitHostPort(t.NodeName); err == nil {
t.NodeName = host
}
rq := trace.RequestInfo{
Time: time.Now().UTC(),
Time: now,
Proto: r.Proto,
Method: r.Method,
Path: r.URL.Path,

View file

@ -17,145 +17,109 @@
package cmd
import (
"fmt"
"os"
"strings"
"time"
"github.com/minio/minio/cmd/config"
"github.com/minio/minio/pkg/disk"
"github.com/minio/minio/pkg/env"
trace "github.com/minio/minio/pkg/trace"
)
var (
logTime bool = false
threshold time.Duration
//go:generate stringer -type=osMetric -trimprefix=osMetric $GOFILE
type osMetric uint8
const (
osMetricRemoveAll osMetric = iota
osMetricMkdirAll
osMetricRename
osMetricOpenFile
osMetricOpen
osMetricOpenFileDirectIO
osMetricLstat
osMetricRemove
osMetricStat
// .... add more
osMetricLast
)
func init() {
logTime = env.IsSet(config.EnvLogPosixTimes)
t, _ := env.GetInt(
config.EnvLogPosixThresholdInMS,
100,
)
threshold = time.Duration(t) * time.Millisecond
func osTrace(s osMetric, startTime time.Time, duration time.Duration, path string) trace.Info {
return trace.Info{
TraceType: trace.OS,
Time: startTime,
NodeName: globalLocalNodeName,
FuncName: s.String(),
OSStats: trace.OSStats{
Duration: duration,
Path: path,
},
}
}
func reportTime(name *strings.Builder, startTime time.Time) {
delta := time.Since(startTime)
if delta > threshold {
name.WriteString(" ")
name.WriteString(delta.String())
fmt.Println(name.String())
func updateOSMetrics(s osMetric, paths ...string) func() {
if globalTrace.NumSubscribers() == 0 {
return func() {}
}
startTime := time.Now()
return func() {
duration := time.Since(startTime)
globalTrace.Publish(osTrace(s, startTime, duration, strings.Join(paths, " ")))
}
}
// RemoveAll captures time taken to call the underlying os.RemoveAll
func RemoveAll(dirPath string) error {
if logTime {
startTime := time.Now()
var s strings.Builder
s.WriteString("os.RemoveAll: ")
s.WriteString(dirPath)
defer reportTime(&s, startTime)
}
defer updateOSMetrics(osMetricRemoveAll, dirPath)()
return os.RemoveAll(dirPath)
}
// MkdirAll captures time taken to call os.MkdirAll
func MkdirAll(dirPath string, mode os.FileMode) error {
if logTime {
startTime := time.Now()
var s strings.Builder
s.WriteString("os.MkdirAll: ")
s.WriteString(dirPath)
defer reportTime(&s, startTime)
}
defer updateOSMetrics(osMetricMkdirAll, dirPath)()
return os.MkdirAll(dirPath, mode)
}
// Rename captures time taken to call os.Rename
func Rename(src, dst string) error {
if logTime {
startTime := time.Now()
var s strings.Builder
s.WriteString("os.Rename: ")
s.WriteString(src)
s.WriteString(" to ")
s.WriteString(dst)
defer reportTime(&s, startTime)
}
defer updateOSMetrics(osMetricRename, src, dst)()
return os.Rename(src, dst)
}
// OpenFile captures time taken to call os.OpenFile
func OpenFile(name string, flag int, perm os.FileMode) (*os.File, error) {
if logTime {
startTime := time.Now()
var s strings.Builder
s.WriteString("os.OpenFile: ")
s.WriteString(name)
defer reportTime(&s, startTime)
}
defer updateOSMetrics(osMetricOpenFile, name)()
return os.OpenFile(name, flag, perm)
}
// Open captures time taken to call os.Open
func Open(name string) (*os.File, error) {
if logTime {
startTime := time.Now()
var s strings.Builder
s.WriteString("os.Open: ")
s.WriteString(name)
defer reportTime(&s, startTime)
}
defer updateOSMetrics(osMetricOpen, name)()
return os.Open(name)
}
// OpenFileDirectIO captures time taken to call disk.OpenFileDirectIO
func OpenFileDirectIO(name string, flag int, perm os.FileMode) (*os.File, error) {
if logTime {
startTime := time.Now()
var s strings.Builder
s.WriteString("disk.OpenFileDirectIO: ")
s.WriteString(name)
defer reportTime(&s, startTime)
}
defer updateOSMetrics(osMetricOpenFileDirectIO, name)()
return disk.OpenFileDirectIO(name, flag, perm)
}
// Lstat captures time taken to call os.Lstat
func Lstat(name string) (os.FileInfo, error) {
if logTime {
startTime := time.Now()
var s strings.Builder
s.WriteString("os.Lstat: ")
s.WriteString(name)
defer reportTime(&s, startTime)
}
defer updateOSMetrics(osMetricLstat, name)()
return os.Lstat(name)
}
// Remove captures time taken to call os.Remove
func Remove(deletePath string) error {
if logTime {
startTime := time.Now()
var s strings.Builder
s.WriteString("os.Remove: ")
s.WriteString(deletePath)
defer reportTime(&s, startTime)
}
defer updateOSMetrics(osMetricRemove, deletePath)()
return os.Remove(deletePath)
}
// Stat captures time taken to call os.Stat
func Stat(name string) (os.FileInfo, error) {
if logTime {
startTime := time.Now()
var s strings.Builder
s.WriteString("os.Stat: ")
s.WriteString(name)
defer reportTime(&s, startTime)
}
defer updateOSMetrics(osMetricStat, name)()
return os.Stat(name)
}

32
cmd/osmetric_string.go Normal file
View file

@ -0,0 +1,32 @@
// Code generated by "stringer -type=osMetric -trimprefix=osMetric os-instrumented.go"; DO NOT EDIT.
package cmd
import "strconv"
func _() {
// An "invalid array index" compiler error signifies that the constant values have changed.
// Re-run the stringer command to generate them again.
var x [1]struct{}
_ = x[osMetricRemoveAll-0]
_ = x[osMetricMkdirAll-1]
_ = x[osMetricRename-2]
_ = x[osMetricOpenFile-3]
_ = x[osMetricOpen-4]
_ = x[osMetricOpenFileDirectIO-5]
_ = x[osMetricLstat-6]
_ = x[osMetricRemove-7]
_ = x[osMetricStat-8]
_ = x[osMetricLast-9]
}
const _osMetric_name = "RemoveAllMkdirAllRenameOpenFileOpenOpenFileDirectIOLstatRemoveStatLast"
var _osMetric_index = [...]uint8{0, 9, 17, 23, 31, 35, 51, 56, 62, 66, 70}
func (i osMetric) String() string {
if i >= osMetric(len(_osMetric_index)-1) {
return "osMetric(" + strconv.FormatInt(int64(i), 10) + ")"
}
return _osMetric_name[_osMetric_index[i]:_osMetric_index[i+1]]
}

View file

@ -682,10 +682,14 @@ func (client *peerRESTClient) UpdateMetacacheListing(ctx context.Context, m meta
}
func (client *peerRESTClient) doTrace(traceCh chan interface{}, doneCh <-chan struct{}, trcAll, trcErr bool) {
func (client *peerRESTClient) doTrace(traceCh chan interface{}, doneCh <-chan struct{}, traceOpts madmin.ServiceTraceOpts) {
values := make(url.Values)
values.Set(peerRESTTraceAll, strconv.FormatBool(trcAll))
values.Set(peerRESTTraceErr, strconv.FormatBool(trcErr))
values.Set(peerRESTTraceErr, strconv.FormatBool(traceOpts.OnlyErrors))
values.Set(peerRESTTraceS3, strconv.FormatBool(traceOpts.S3))
values.Set(peerRESTTraceStorage, strconv.FormatBool(traceOpts.Storage))
values.Set(peerRESTTraceOS, strconv.FormatBool(traceOpts.OS))
values.Set(peerRESTTraceInternal, strconv.FormatBool(traceOpts.Internal))
values.Set(peerRESTTraceThreshold, traceOpts.Threshold.String())
// To cancel the REST request in case doneCh gets closed.
ctx, cancel := context.WithCancel(GlobalContext)
@ -779,10 +783,10 @@ func (client *peerRESTClient) Listen(listenCh chan interface{}, doneCh <-chan st
}
// Trace - send http trace request to peer nodes
func (client *peerRESTClient) Trace(traceCh chan interface{}, doneCh <-chan struct{}, trcAll, trcErr bool) {
func (client *peerRESTClient) Trace(traceCh chan interface{}, doneCh <-chan struct{}, traceOpts madmin.ServiceTraceOpts) {
go func() {
for {
client.doTrace(traceCh, doneCh, trcAll, trcErr)
client.doTrace(traceCh, doneCh, traceOpts)
select {
case <-doneCh:
return

View file

@ -17,7 +17,7 @@
package cmd
const (
peerRESTVersion = "v12"
peerRESTVersion = "v13" // Add storage tracing
peerRESTVersionPrefix = SlashSeparator + peerRESTVersion
peerRESTPrefix = minioReservedBucketPath + "/peer"
peerRESTPath = peerRESTPrefix + peerRESTVersionPrefix
@ -62,18 +62,22 @@ const (
)
const (
peerRESTBucket = "bucket"
peerRESTBuckets = "buckets"
peerRESTUser = "user"
peerRESTGroup = "group"
peerRESTUserTemp = "user-temp"
peerRESTPolicy = "policy"
peerRESTUserOrGroup = "user-or-group"
peerRESTIsGroup = "is-group"
peerRESTSignal = "signal"
peerRESTProfiler = "profiler"
peerRESTTraceAll = "all"
peerRESTTraceErr = "err"
peerRESTBucket = "bucket"
peerRESTBuckets = "buckets"
peerRESTUser = "user"
peerRESTGroup = "group"
peerRESTUserTemp = "user-temp"
peerRESTPolicy = "policy"
peerRESTUserOrGroup = "user-or-group"
peerRESTIsGroup = "is-group"
peerRESTSignal = "signal"
peerRESTProfiler = "profiler"
peerRESTTraceErr = "err"
peerRESTTraceInternal = "internal"
peerRESTTraceStorage = "storage"
peerRESTTraceS3 = "s3"
peerRESTTraceOS = "os"
peerRESTTraceThreshold = "threshold"
peerRESTListenBucket = "bucket"
peerRESTListenPrefix = "prefix"

View file

@ -899,14 +899,37 @@ func (s *peerRESTServer) ListenHandler(w http.ResponseWriter, r *http.Request) {
}
}
func extractTraceOptsFromPeerRequest(r *http.Request) (opts madmin.ServiceTraceOpts, err error) {
q := r.URL.Query()
opts.OnlyErrors = q.Get(peerRESTTraceErr) == "true"
opts.Storage = q.Get(peerRESTTraceStorage) == "true"
opts.Internal = q.Get(peerRESTTraceInternal) == "true"
opts.S3 = q.Get(peerRESTTraceS3) == "true"
opts.OS = q.Get(peerRESTTraceOS) == "true"
if t := q.Get(peerRESTTraceThreshold); t != "" {
d, err := time.ParseDuration(t)
if err != nil {
return opts, err
}
opts.Threshold = d
}
return
}
// TraceHandler sends http trace messages back to peer rest client
func (s *peerRESTServer) TraceHandler(w http.ResponseWriter, r *http.Request) {
if !s.IsValid(w, r) {
s.writeErrorResponse(w, errors.New("Invalid request"))
return
}
trcAll := r.URL.Query().Get(peerRESTTraceAll) == "true"
trcErr := r.URL.Query().Get(peerRESTTraceErr) == "true"
traceOpts, err := extractTraceOptsFromPeerRequest(r)
if err != nil {
s.writeErrorResponse(w, errors.New("Invalid request"))
return
}
w.WriteHeader(http.StatusOK)
w.(http.Flusher).Flush()
@ -918,8 +941,8 @@ func (s *peerRESTServer) TraceHandler(w http.ResponseWriter, r *http.Request) {
// Use buffered channel to take care of burst sends or slow w.Write()
ch := make(chan interface{}, 2000)
globalHTTPTrace.Subscribe(ch, doneCh, func(entry interface{}) bool {
return mustTrace(entry, trcAll, trcErr)
globalTrace.Subscribe(ch, doneCh, func(entry interface{}) bool {
return mustTrace(entry, traceOpts)
})
keepAliveTicker := time.NewTicker(500 * time.Millisecond)

View file

@ -31,12 +31,12 @@ func _() {
_ = x[storageMetricWriteMetadata-20]
_ = x[storageMetricReadVersion-21]
_ = x[storageMetricReadAll-22]
_ = x[metricLast-23]
_ = x[storageMetricLast-23]
}
const _storageMetric_name = "MakeVolBulkMakeVolListVolsStatVolDeleteVolWalkDirListDirReadFileAppendFileCreateFileReadFileStreamRenameFileRenameDataCheckPartsCheckFileDeleteDeleteVersionsVerifyFileWriteAllDeleteVersionWriteMetadataReadVersionReadAllmetricLast"
const _storageMetric_name = "MakeVolBulkMakeVolListVolsStatVolDeleteVolWalkDirListDirReadFileAppendFileCreateFileReadFileStreamRenameFileRenameDataCheckPartsCheckFileDeleteDeleteVersionsVerifyFileWriteAllDeleteVersionWriteMetadataReadVersionReadAllLast"
var _storageMetric_index = [...]uint8{0, 11, 18, 26, 33, 42, 49, 56, 64, 74, 84, 98, 108, 118, 128, 137, 143, 157, 167, 175, 188, 201, 212, 219, 229}
var _storageMetric_index = [...]uint8{0, 11, 18, 26, 33, 42, 49, 56, 64, 74, 84, 98, 108, 118, 128, 137, 143, 157, 167, 175, 188, 201, 212, 219, 223}
func (i storageMetric) String() string {
if i >= storageMetric(len(_storageMetric_index)-1) {

View file

@ -77,8 +77,8 @@ func registerWebRouter(router *mux.Router) error {
"bucket": bucketName,
"object": objectName,
})
if globalHTTPTrace.NumSubscribers() > 0 {
globalHTTPTrace.Publish(WebTrace(ri))
if globalTrace.NumSubscribers() > 0 {
globalTrace.Publish(WebTrace(ri))
}
ctx := newContext(ri.Request, ri.ResponseWriter, ri.Method)
logger.AuditLog(ctx, ri.ResponseWriter, ri.Request, claims.Map())

View file

@ -19,11 +19,13 @@ package cmd
import (
"context"
"io"
"strings"
"sync"
"sync/atomic"
"time"
ewma "github.com/VividCortex/ewma"
trace "github.com/minio/minio/pkg/trace"
)
//go:generate stringer -type=storageMetric -trimprefix=storageMetric $GOFILE
@ -57,7 +59,7 @@ const (
// .... add more
metricLast
storageMetricLast
)
// Detects change in underlying disk.
@ -65,8 +67,8 @@ type xlStorageDiskIDCheck struct {
storage StorageAPI
diskID string
apiCalls [metricLast]uint64
apiLatencies [metricLast]ewma.MovingAverage
apiCalls [storageMetricLast]uint64
apiLatencies [storageMetricLast]ewma.MovingAverage
}
func (p *xlStorageDiskIDCheck) getMetrics() DiskMetrics {
@ -222,7 +224,7 @@ func (p *xlStorageDiskIDCheck) DiskInfo(ctx context.Context) (info DiskInfo, err
}
func (p *xlStorageDiskIDCheck) MakeVolBulk(ctx context.Context, volumes ...string) (err error) {
defer p.updateStorageMetrics(storageMetricMakeVolBulk)()
defer p.updateStorageMetrics(storageMetricMakeVolBulk, volumes...)()
select {
case <-ctx.Done():
@ -237,7 +239,7 @@ func (p *xlStorageDiskIDCheck) MakeVolBulk(ctx context.Context, volumes ...strin
}
func (p *xlStorageDiskIDCheck) MakeVol(ctx context.Context, volume string) (err error) {
defer p.updateStorageMetrics(storageMetricMakeVol)()
defer p.updateStorageMetrics(storageMetricMakeVol, volume)()
select {
case <-ctx.Done():
@ -252,7 +254,7 @@ func (p *xlStorageDiskIDCheck) MakeVol(ctx context.Context, volume string) (err
}
func (p *xlStorageDiskIDCheck) ListVols(ctx context.Context) ([]VolInfo, error) {
defer p.updateStorageMetrics(storageMetricListVols)()
defer p.updateStorageMetrics(storageMetricListVols, "/")()
select {
case <-ctx.Done():
@ -267,7 +269,7 @@ func (p *xlStorageDiskIDCheck) ListVols(ctx context.Context) ([]VolInfo, error)
}
func (p *xlStorageDiskIDCheck) StatVol(ctx context.Context, volume string) (vol VolInfo, err error) {
defer p.updateStorageMetrics(storageMetricStatVol)()
defer p.updateStorageMetrics(storageMetricStatVol, volume)()
select {
case <-ctx.Done():
@ -282,7 +284,7 @@ func (p *xlStorageDiskIDCheck) StatVol(ctx context.Context, volume string) (vol
}
func (p *xlStorageDiskIDCheck) DeleteVol(ctx context.Context, volume string, forceDelete bool) (err error) {
defer p.updateStorageMetrics(storageMetricDeleteVol)()
defer p.updateStorageMetrics(storageMetricDeleteVol, volume)()
select {
case <-ctx.Done():
@ -297,7 +299,7 @@ func (p *xlStorageDiskIDCheck) DeleteVol(ctx context.Context, volume string, for
}
func (p *xlStorageDiskIDCheck) ListDir(ctx context.Context, volume, dirPath string, count int) ([]string, error) {
defer p.updateStorageMetrics(storageMetricListDir)()
defer p.updateStorageMetrics(storageMetricListDir, volume, dirPath)()
select {
case <-ctx.Done():
@ -313,7 +315,7 @@ func (p *xlStorageDiskIDCheck) ListDir(ctx context.Context, volume, dirPath stri
}
func (p *xlStorageDiskIDCheck) ReadFile(ctx context.Context, volume string, path string, offset int64, buf []byte, verifier *BitrotVerifier) (n int64, err error) {
defer p.updateStorageMetrics(storageMetricReadFile)()
defer p.updateStorageMetrics(storageMetricReadFile, volume, path)()
select {
case <-ctx.Done():
@ -329,7 +331,7 @@ func (p *xlStorageDiskIDCheck) ReadFile(ctx context.Context, volume string, path
}
func (p *xlStorageDiskIDCheck) AppendFile(ctx context.Context, volume string, path string, buf []byte) (err error) {
defer p.updateStorageMetrics(storageMetricAppendFile)()
defer p.updateStorageMetrics(storageMetricAppendFile, volume, path)()
select {
case <-ctx.Done():
@ -345,7 +347,7 @@ func (p *xlStorageDiskIDCheck) AppendFile(ctx context.Context, volume string, pa
}
func (p *xlStorageDiskIDCheck) CreateFile(ctx context.Context, volume, path string, size int64, reader io.Reader) error {
defer p.updateStorageMetrics(storageMetricCreateFile)()
defer p.updateStorageMetrics(storageMetricCreateFile, volume, path)()
select {
case <-ctx.Done():
@ -361,7 +363,7 @@ func (p *xlStorageDiskIDCheck) CreateFile(ctx context.Context, volume, path stri
}
func (p *xlStorageDiskIDCheck) ReadFileStream(ctx context.Context, volume, path string, offset, length int64) (io.ReadCloser, error) {
defer p.updateStorageMetrics(storageMetricReadFileStream)()
defer p.updateStorageMetrics(storageMetricReadFileStream, volume, path)()
select {
case <-ctx.Done():
@ -377,7 +379,7 @@ func (p *xlStorageDiskIDCheck) ReadFileStream(ctx context.Context, volume, path
}
func (p *xlStorageDiskIDCheck) RenameFile(ctx context.Context, srcVolume, srcPath, dstVolume, dstPath string) error {
defer p.updateStorageMetrics(storageMetricRenameFile)()
defer p.updateStorageMetrics(storageMetricRenameFile, srcVolume, srcPath, dstVolume, dstPath)()
select {
case <-ctx.Done():
@ -393,7 +395,7 @@ func (p *xlStorageDiskIDCheck) RenameFile(ctx context.Context, srcVolume, srcPat
}
func (p *xlStorageDiskIDCheck) RenameData(ctx context.Context, srcVolume, srcPath, dataDir, dstVolume, dstPath string) error {
defer p.updateStorageMetrics(storageMetricRenameData)()
defer p.updateStorageMetrics(storageMetricRenameData, srcPath, dataDir, dstVolume, dstPath)()
select {
case <-ctx.Done():
@ -409,7 +411,7 @@ func (p *xlStorageDiskIDCheck) RenameData(ctx context.Context, srcVolume, srcPat
}
func (p *xlStorageDiskIDCheck) CheckParts(ctx context.Context, volume string, path string, fi FileInfo) (err error) {
defer p.updateStorageMetrics(storageMetricCheckParts)()
defer p.updateStorageMetrics(storageMetricCheckParts, volume, path)()
select {
case <-ctx.Done():
@ -425,7 +427,7 @@ func (p *xlStorageDiskIDCheck) CheckParts(ctx context.Context, volume string, pa
}
func (p *xlStorageDiskIDCheck) CheckFile(ctx context.Context, volume string, path string) (err error) {
defer p.updateStorageMetrics(storageMetricCheckFile)()
defer p.updateStorageMetrics(storageMetricCheckFile, volume, path)()
select {
case <-ctx.Done():
@ -441,7 +443,7 @@ func (p *xlStorageDiskIDCheck) CheckFile(ctx context.Context, volume string, pat
}
func (p *xlStorageDiskIDCheck) Delete(ctx context.Context, volume string, path string, recursive bool) (err error) {
defer p.updateStorageMetrics(storageMetricDelete)()
defer p.updateStorageMetrics(storageMetricDelete, volume, path)()
select {
case <-ctx.Done():
@ -456,12 +458,21 @@ func (p *xlStorageDiskIDCheck) Delete(ctx context.Context, volume string, path s
return p.storage.Delete(ctx, volume, path, recursive)
}
// DeleteVersions deletes slice of versions, it can be same object
// or multiple objects.
func (p *xlStorageDiskIDCheck) DeleteVersions(ctx context.Context, volume string, versions []FileInfo) (errs []error) {
defer p.updateStorageMetrics(storageMetricDeleteVersions)()
// Mererly for tracing storage
path := ""
if len(versions) > 0 {
path = versions[0].Name
}
defer p.updateStorageMetrics(storageMetricDeleteVersions, volume, path)()
errs = make([]error, len(versions))
select {
case <-ctx.Done():
errs = make([]error, len(versions))
for i := range errs {
errs[i] = ctx.Err()
}
@ -470,17 +481,17 @@ func (p *xlStorageDiskIDCheck) DeleteVersions(ctx context.Context, volume string
}
if err := p.checkDiskStale(); err != nil {
errs = make([]error, len(versions))
for i := range errs {
errs[i] = err
}
return errs
}
return p.storage.DeleteVersions(ctx, volume, versions)
}
func (p *xlStorageDiskIDCheck) VerifyFile(ctx context.Context, volume, path string, fi FileInfo) error {
defer p.updateStorageMetrics(storageMetricVerifyFile)()
defer p.updateStorageMetrics(storageMetricVerifyFile, volume, path)()
select {
case <-ctx.Done():
@ -496,7 +507,7 @@ func (p *xlStorageDiskIDCheck) VerifyFile(ctx context.Context, volume, path stri
}
func (p *xlStorageDiskIDCheck) WriteAll(ctx context.Context, volume string, path string, b []byte) (err error) {
defer p.updateStorageMetrics(storageMetricWriteAll)()
defer p.updateStorageMetrics(storageMetricWriteAll, volume, path)()
select {
case <-ctx.Done():
@ -512,7 +523,7 @@ func (p *xlStorageDiskIDCheck) WriteAll(ctx context.Context, volume string, path
}
func (p *xlStorageDiskIDCheck) DeleteVersion(ctx context.Context, volume, path string, fi FileInfo, forceDelMarker bool) (err error) {
defer p.updateStorageMetrics(storageMetricDeleteVersion)()
defer p.updateStorageMetrics(storageMetricDeleteVersion, volume, path)()
select {
case <-ctx.Done():
@ -528,7 +539,7 @@ func (p *xlStorageDiskIDCheck) DeleteVersion(ctx context.Context, volume, path s
}
func (p *xlStorageDiskIDCheck) WriteMetadata(ctx context.Context, volume, path string, fi FileInfo) (err error) {
defer p.updateStorageMetrics(storageMetricWriteMetadata)()
defer p.updateStorageMetrics(storageMetricWriteMetadata, volume, path)()
select {
case <-ctx.Done():
@ -544,7 +555,7 @@ func (p *xlStorageDiskIDCheck) WriteMetadata(ctx context.Context, volume, path s
}
func (p *xlStorageDiskIDCheck) ReadVersion(ctx context.Context, volume, path, versionID string, readData bool) (fi FileInfo, err error) {
defer p.updateStorageMetrics(storageMetricReadVersion)()
defer p.updateStorageMetrics(storageMetricReadVersion, volume, path)()
select {
case <-ctx.Done():
@ -560,7 +571,7 @@ func (p *xlStorageDiskIDCheck) ReadVersion(ctx context.Context, volume, path, ve
}
func (p *xlStorageDiskIDCheck) ReadAll(ctx context.Context, volume string, path string) (buf []byte, err error) {
defer p.updateStorageMetrics(storageMetricReadAll)()
defer p.updateStorageMetrics(storageMetricReadAll, volume, path)()
select {
case <-ctx.Done():
@ -575,11 +586,31 @@ func (p *xlStorageDiskIDCheck) ReadAll(ctx context.Context, volume string, path
return p.storage.ReadAll(ctx, volume, path)
}
// Update storage metrics
func (p *xlStorageDiskIDCheck) updateStorageMetrics(s storageMetric) func() {
startTime := time.Now()
return func() {
atomic.AddUint64(&p.apiCalls[s], 1)
p.apiLatencies[s].Add(float64(time.Since(startTime)))
func storageTrace(s storageMetric, startTime time.Time, duration time.Duration, path string) trace.Info {
return trace.Info{
TraceType: trace.Storage,
Time: startTime,
NodeName: globalLocalNodeName,
FuncName: s.String(),
StorageStats: trace.StorageStats{
Duration: duration,
Path: path,
},
}
}
// Update storage metrics
func (p *xlStorageDiskIDCheck) updateStorageMetrics(s storageMetric, paths ...string) func() {
startTime := time.Now()
trace := globalTrace.NumSubscribers() > 0
return func() {
duration := time.Since(startTime)
atomic.AddUint64(&p.apiCalls[s], 1)
p.apiLatencies[s].Add(float64(duration))
if trace {
globalTrace.Publish(storageTrace(s, startTime, duration, strings.Join(paths, " ")))
}
}
}

View file

@ -37,14 +37,15 @@ func main() {
if err != nil {
log.Fatalln(err)
}
doneCh := make(chan struct{})
defer close(doneCh)
// Start listening on all http trace activity from all servers
// in the minio cluster.
allTrace := false
errTrace := false
traceCh := madmClnt.ServiceTrace(context.Background(), allTrace, errTrace, doneCh)
// Start listening on all http trace activity from all servers in the minio cluster.
traceCh := madmClnt.ServiceTrace(context.Background(), madmin.ServiceTraceOpts{
S3: true,
Internal: true,
Storage: true,
OS: true,
Threshold: 0,
})
for traceInfo := range traceCh {
if traceInfo.Err != nil {
fmt.Println(traceInfo.Err)

View file

@ -23,6 +23,7 @@ import (
"net/http"
"net/url"
"strconv"
"time"
trace "github.com/minio/minio/pkg/trace"
)
@ -77,16 +78,30 @@ type ServiceTraceInfo struct {
Err error `json:"-"`
}
// ServiceTraceOpts holds tracing options
type ServiceTraceOpts struct {
S3 bool
Internal bool
Storage bool
OS bool
OnlyErrors bool
Threshold time.Duration
}
// ServiceTrace - listen on http trace notifications.
func (adm AdminClient) ServiceTrace(ctx context.Context, allTrace, errTrace bool) <-chan ServiceTraceInfo {
func (adm AdminClient) ServiceTrace(ctx context.Context, opts ServiceTraceOpts) <-chan ServiceTraceInfo {
traceInfoCh := make(chan ServiceTraceInfo)
// Only success, start a routine to start reading line by line.
go func(traceInfoCh chan<- ServiceTraceInfo) {
defer close(traceInfoCh)
for {
urlValues := make(url.Values)
urlValues.Set("all", strconv.FormatBool(allTrace))
urlValues.Set("err", strconv.FormatBool(errTrace))
urlValues.Set("err", strconv.FormatBool(opts.OnlyErrors))
urlValues.Set("s3", strconv.FormatBool(opts.S3))
urlValues.Set("internal", strconv.FormatBool(opts.Internal))
urlValues.Set("storage", strconv.FormatBool(opts.Storage))
urlValues.Set("os", strconv.FormatBool(opts.OS))
urlValues.Set("threshold", opts.Threshold.String())
reqData := requestData{
relPath: adminAPIPrefix + "/trace",
queryValues: urlValues,

View file

@ -21,14 +21,45 @@ import (
"time"
)
// Type indicates the type of the tracing Info
type Type int
const (
// OS tracing (Golang os package calls)
OS Type = iota
// Storage tracing (MinIO Storage Layer)
Storage
// HTTP tracing (MinIO S3 & Internode)
HTTP
)
// Info - represents a trace record, additionally
// also reports errors if any while listening on trace.
type Info struct {
NodeName string `json:"nodename"`
FuncName string `json:"funcname"`
TraceType Type `json:"type"`
NodeName string `json:"nodename"`
FuncName string `json:"funcname"`
Time time.Time `json:"time"`
ReqInfo RequestInfo `json:"request"`
RespInfo ResponseInfo `json:"response"`
CallStats CallStats `json:"stats"`
StorageStats StorageStats `json:"storageStats"`
OSStats OSStats `json:"osStats"`
}
// StorageStats statistics on MinIO Storage layer calls
type StorageStats struct {
Path string `json:"path"`
Duration time.Duration `json:"duration"`
}
// OSStats statistics on operating system specific calls.
type OSStats struct {
Path string `json:"path"`
Duration time.Duration `json:"duration"`
}
// CallStats records request stats