diff --git a/cmd/common-main.go b/cmd/common-main.go index d2fb7dd23..79f4e8468 100644 --- a/cmd/common-main.go +++ b/cmd/common-main.go @@ -359,6 +359,14 @@ func handleCommonEnvVars() { globalCacheMaxUse = maxUse } } + + var err error + if cacheEncKey := os.Getenv("MINIO_CACHE_ENCRYPTION_MASTER_KEY"); cacheEncKey != "" { + globalCacheKMSKeyID, globalCacheKMS, err = parseKMSMasterKey(cacheEncKey) + if err != nil { + logger.Fatal(uiErrInvalidCacheEncryptionKey(err), "Invalid cache encryption master key") + } + } // In place update is true by default if the MINIO_UPDATE is not set // or is not set to 'off', if MINIO_UPDATE is set to 'off' then // in-place update is off. diff --git a/cmd/disk-cache-backend.go b/cmd/disk-cache-backend.go index 96f6a0597..20495f4b3 100644 --- a/cmd/disk-cache-backend.go +++ b/cmd/disk-cache-backend.go @@ -19,6 +19,7 @@ package cmd import ( "bytes" "context" + "crypto/rand" "encoding/hex" "encoding/json" "fmt" @@ -33,8 +34,10 @@ import ( "time" "github.com/djherbis/atime" + "github.com/minio/minio/cmd/crypto" "github.com/minio/minio/cmd/logger" "github.com/minio/minio/pkg/disk" + "github.com/minio/sio" "github.com/ncw/directio" ) @@ -45,6 +48,10 @@ const ( cacheMetaVersion = "1.0.0" cacheEnvDelimiter = ";" + + // SSECacheEncrypted is the metadata key indicating that the object + // is a cache entry encrypted with cache KMS master key in globalCacheKMS. + SSECacheEncrypted = "X-Minio-Internal-Encrypted-Cache" ) // CacheChecksumInfoV1 - carries checksums of individual blocks on disk. @@ -223,6 +230,7 @@ func (c *diskCache) purge() { continue } cc := cacheControlOpts(objInfo) + if atime.Get(fi).Before(expiry) || cc.isStale(objInfo.ModTime) { if err = removeAll(pathJoin(c.dir, obj.Name())); err != nil { @@ -273,6 +281,10 @@ func (c *diskCache) Stat(ctx context.Context, bucket, object string) (oi ObjectI } oi.Bucket = bucket oi.Name = object + + if err = decryptCacheObjectETag(&oi); err != nil { + return oi, err + } return } @@ -323,7 +335,24 @@ func (c *diskCache) saveMetadata(ctx context.Context, bucket, object string, met // Backend metadata could have changed through server side copy - reset cache metadata if that is the case func (c *diskCache) updateMetadataIfChanged(ctx context.Context, bucket, object string, bkObjectInfo, cacheObjInfo ObjectInfo) error { - if !reflect.DeepEqual(bkObjectInfo.UserDefined, cacheObjInfo.UserDefined) || + + bkMeta := make(map[string]string) + cacheMeta := make(map[string]string) + for k, v := range bkObjectInfo.UserDefined { + if hasPrefix(k, ReservedMetadataPrefix) { + // Do not need to send any internal metadata + continue + } + bkMeta[http.CanonicalHeaderKey(k)] = v + } + for k, v := range cacheObjInfo.UserDefined { + if hasPrefix(k, ReservedMetadataPrefix) { + // Do not need to send any internal metadata + continue + } + cacheMeta[http.CanonicalHeaderKey(k)] = v + } + if !reflect.DeepEqual(bkMeta, cacheMeta) || bkObjectInfo.ETag != cacheObjInfo.ETag || bkObjectInfo.ContentType != cacheObjInfo.ContentType || bkObjectInfo.Expires != cacheObjInfo.Expires { @@ -337,11 +366,11 @@ func getCacheSHADir(dir, bucket, object string) string { } // Cache data to disk with bitrot checksum added for each block of 1MB -func (c *diskCache) bitrotWriteToCache(ctx context.Context, cachePath string, reader io.Reader, size int64) (int64, error) { +func (c *diskCache) bitrotWriteToCache(ctx context.Context, cachePath string, reader io.Reader, size uint64) (int64, error) { if err := os.MkdirAll(cachePath, 0777); err != nil { return 0, err } - bufSize := int64(readSizeV1) + bufSize := uint64(readSizeV1) if size > 0 && bufSize > size { bufSize = size } @@ -396,6 +425,39 @@ func (c *diskCache) bitrotWriteToCache(ctx context.Context, cachePath string, re return bytesWritten, nil } +func newCacheEncryptReader(content io.Reader, bucket, object string, metadata map[string]string) (r io.Reader, err error) { + objectEncryptionKey, err := newCacheEncryptMetadata(bucket, object, metadata) + if err != nil { + return nil, err + } + + reader, err := sio.EncryptReader(content, sio.Config{Key: objectEncryptionKey[:], MinVersion: sio.Version20}) + if err != nil { + return nil, crypto.ErrInvalidCustomerKey + } + return reader, nil +} +func newCacheEncryptMetadata(bucket, object string, metadata map[string]string) ([]byte, error) { + var sealedKey crypto.SealedKey + if globalCacheKMS == nil { + return nil, errKMSNotConfigured + } + key, encKey, err := globalCacheKMS.GenerateKey(globalCacheKMSKeyID, crypto.Context{bucket: path.Join(bucket, object)}) + if err != nil { + return nil, err + } + + objectKey := crypto.GenerateKey(key, rand.Reader) + sealedKey = objectKey.Seal(key, crypto.GenerateIV(rand.Reader), crypto.S3.String(), bucket, object) + crypto.S3.CreateMetadata(metadata, globalCacheKMSKeyID, encKey, sealedKey) + + if etag, ok := metadata["etag"]; ok { + metadata["etag"] = hex.EncodeToString(objectKey.SealETag([]byte(etag))) + } + metadata[SSECacheEncrypted] = "" + return objectKey[:], nil +} + // Caches the object to disk func (c *diskCache) Put(ctx context.Context, bucket, object string, data io.Reader, size int64, opts ObjectOptions) error { if c.diskUsageHigh() { @@ -417,14 +479,29 @@ func (c *diskCache) Put(ctx context.Context, bucket, object string, data io.Read bufSize = size } - n, err := c.bitrotWriteToCache(ctx, cachePath, data, size) + var metadata = make(map[string]string) + for k, v := range opts.UserDefined { + metadata[k] = v + } + var reader = data + var actualSize = uint64(size) + var err error + if globalCacheKMS != nil { + reader, err = newCacheEncryptReader(data, bucket, object, metadata) + if err != nil { + return err + } + actualSize, _ = sio.EncryptedSize(uint64(size)) + } + n, err := c.bitrotWriteToCache(ctx, cachePath, reader, actualSize) if IsErr(err, baseErrs...) { c.setOnline(false) } if err != nil { return err } - return c.saveMetadata(ctx, bucket, object, opts.UserDefined, n) + + return c.saveMetadata(ctx, bucket, object, metadata, n) } // checks streaming bitrot checksum of cached object before returning data @@ -484,7 +561,6 @@ func (c *diskCache) bitrotReadFromCache(ctx context.Context, filePath string, of if _, err := io.ReadFull(rc, checksumHash); err != nil { return err } - h.Reset() n, err := io.ReadFull(rc, *bufp) if err != nil && err != io.EOF && err != io.ErrUnexpectedEOF { @@ -527,7 +603,7 @@ func (c *diskCache) Get(ctx context.Context, bucket, object string, rs *HTTPRang var objInfo ObjectInfo cacheObjPath := getCacheSHADir(c.dir, bucket, object) - if objInfo, err = c.statCache(ctx, cacheObjPath); err != nil { + if objInfo, err = c.Stat(ctx, bucket, object); err != nil { return nil, toObjectErr(err, bucket, object) } @@ -552,7 +628,6 @@ func (c *diskCache) Get(ctx context.Context, bucket, object string, rs *HTTPRang // Cleanup function to cause the go routine above to exit, in // case of incomplete read. pipeCloser := func() { pr.Close() } - return fn(pr, h, opts.CheckCopyPrecondFn, pipeCloser) } diff --git a/cmd/disk-cache-utils.go b/cmd/disk-cache-utils.go index 30481be8d..5713b40aa 100644 --- a/cmd/disk-cache-utils.go +++ b/cmd/disk-cache-utils.go @@ -17,8 +17,10 @@ package cmd import ( + "encoding/hex" "io" "os" + "path" "strconv" "strings" "time" @@ -124,7 +126,7 @@ func backendDownError(err error) bool { // IsCacheable returns if the object should be saved in the cache. func (o ObjectInfo) IsCacheable() bool { - return !crypto.IsEncrypted(o.UserDefined) + return !crypto.IsEncrypted(o.UserDefined) || globalCacheKMS != nil } // reads file cached on disk from offset upto length @@ -147,6 +149,10 @@ func readCacheFileStream(filePath string, offset, length int64) (io.ReadCloser, return nil, err } + if err = os.Chtimes(filePath, time.Now(), st.ModTime()); err != nil { + return nil, err + } + // Verify if its not a regular file, since subsequent Seek is undefined. if !st.Mode().IsRegular() { return nil, errIsNotRegular @@ -168,3 +174,47 @@ func readCacheFileStream(filePath string, offset, length int64) (io.ReadCloser, io.Closer }{Reader: io.LimitReader(fr, length), Closer: fr}, nil } + +func isCacheEncrypted(meta map[string]string) bool { + _, ok := meta[SSECacheEncrypted] + return ok +} + +// decryptCacheObjectETag tries to decrypt the ETag saved in encrypted format using the cache KMS +func decryptCacheObjectETag(info *ObjectInfo) error { + // Directories are never encrypted. + if info.IsDir { + return nil + } + encrypted := crypto.S3.IsEncrypted(info.UserDefined) && isCacheEncrypted(info.UserDefined) + + switch { + case encrypted: + if globalCacheKMS == nil { + return errKMSNotConfigured + } + keyID, kmsKey, sealedKey, err := crypto.S3.ParseMetadata(info.UserDefined) + + if err != nil { + return err + } + extKey, err := globalCacheKMS.UnsealKey(keyID, kmsKey, crypto.Context{info.Bucket: path.Join(info.Bucket, info.Name)}) + if err != nil { + return err + } + var objectKey crypto.ObjectKey + if err = objectKey.Unseal(extKey, sealedKey, crypto.S3.String(), info.Bucket, info.Name); err != nil { + return err + } + etagStr := tryDecryptETag(objectKey[:], info.ETag, false) + // backend ETag was hex encoded before encrypting, so hex decode to get actual ETag + etag, err := hex.DecodeString(etagStr) + if err != nil { + return err + } + info.ETag = string(etag) + return nil + } + + return nil +} diff --git a/cmd/disk-cache.go b/cmd/disk-cache.go index 2b226fc5a..ec5a24c9b 100644 --- a/cmd/disk-cache.go +++ b/cmd/disk-cache.go @@ -263,7 +263,6 @@ func (c *cacheObjects) GetObjectInfo(ctx context.Context, bucket, object string, return cachedObjInfo, nil } } - objInfo, err := getObjectInfoFn(ctx, bucket, object, opts) if err != nil { if _, ok := err.(ObjectNotFound); ok { @@ -446,6 +445,7 @@ func checkAtimeSupport(dir string) (err error) { } func (c *cacheObjects) migrateCacheFromV1toV2(ctx context.Context) { logger.StartupMessage(colorBlue("Cache migration initiated ....")) + var wg = &sync.WaitGroup{} errs := make([]error, len(c.cache)) for i, dc := range c.cache { diff --git a/cmd/encryption-v1.go b/cmd/encryption-v1.go index 6f6a64634..fd9c6752d 100644 --- a/cmd/encryption-v1.go +++ b/cmd/encryption-v1.go @@ -279,6 +279,23 @@ func decryptObjectInfo(key []byte, bucket, object string, metadata map[string]st switch { default: return nil, errObjectTampered + case crypto.S3.IsEncrypted(metadata) && isCacheEncrypted(metadata): + if globalCacheKMS == nil { + return nil, errKMSNotConfigured + } + keyID, kmsKey, sealedKey, err := crypto.S3.ParseMetadata(metadata) + if err != nil { + return nil, err + } + extKey, err := globalCacheKMS.UnsealKey(keyID, kmsKey, crypto.Context{bucket: path.Join(bucket, object)}) + if err != nil { + return nil, err + } + var objectKey crypto.ObjectKey + if err = objectKey.Unseal(extKey, sealedKey, crypto.S3.String(), bucket, object); err != nil { + return nil, err + } + return objectKey[:], nil case crypto.S3.IsEncrypted(metadata): if GlobalKMS == nil { return nil, errKMSNotConfigured diff --git a/cmd/format-disk-cache.go b/cmd/format-disk-cache.go index b1bfff6f4..07871a53f 100644 --- a/cmd/format-disk-cache.go +++ b/cmd/format-disk-cache.go @@ -30,6 +30,7 @@ import ( "strings" "github.com/minio/minio/cmd/logger" + "github.com/minio/sio" ) const ( @@ -362,7 +363,7 @@ func loadAndValidateCacheFormat(ctx context.Context, drives []string) (formats [ // reads cached object on disk and writes it back after adding bitrot // hashsum per block as per the new disk cache format. -func migrateData(ctx context.Context, c *diskCache, oldfile, destDir string) error { +func migrateCacheData(ctx context.Context, c *diskCache, bucket, object, oldfile, destDir string, metadata map[string]string) error { st, err := os.Stat(oldfile) if err != nil { err = osErrToFSFileErr(err) @@ -372,7 +373,18 @@ func migrateData(ctx context.Context, c *diskCache, oldfile, destDir string) err if err != nil { return err } - _, err = c.bitrotWriteToCache(ctx, destDir, readCloser, st.Size()) + var reader io.Reader = readCloser + + actualSize := uint64(st.Size()) + if globalCacheKMS != nil { + reader, err = newCacheEncryptReader(readCloser, bucket, object, metadata) + if err != nil { + return err + } + actualSize, _ = sio.EncryptedSize(uint64(st.Size())) + } + + _, err = c.bitrotWriteToCache(ctx, destDir, reader, uint64(actualSize)) return err } @@ -401,6 +413,7 @@ func migrateOldCache(ctx context.Context, c *diskCache) error { } for _, bucket := range buckets { + bucket = strings.TrimSuffix(bucket, SlashSeparator) var objMetaPaths []string root := path.Join(oldCacheBucketsPath, bucket) err := filepath.Walk(root, func(path string, info os.FileInfo, err error) error { @@ -422,10 +435,23 @@ func migrateOldCache(ctx context.Context, c *diskCache) error { return err } prevCachedPath := path.Join(c.dir, bucket, object) + + // get old cached metadata + oldMetaPath := pathJoin(oldCacheBucketsPath, bucket, object, cacheMetaJSONFile) + metaPath := pathJoin(destdir, cacheMetaJSONFile) + metaBytes, err := ioutil.ReadFile(oldMetaPath) + if err != nil { + return err + } + // marshal cache metadata after adding version and stat info + meta := &cacheMeta{} + if err = json.Unmarshal(metaBytes, &meta); err != nil { + return err + } // move cached object to new cache directory path // migrate cache data and add bit-rot protection hash sum // at the start of each block - if err := migrateData(ctx, c, prevCachedPath, destdir); err != nil { + if err := migrateCacheData(ctx, c, bucket, object, prevCachedPath, destdir, meta.Meta); err != nil { continue } stat, err := os.Stat(prevCachedPath) @@ -440,19 +466,7 @@ func migrateOldCache(ctx context.Context, c *diskCache) error { if err := os.Remove(prevCachedPath); err != nil { return err } - // move cached metadata after changing cache metadata version - oldMetaPath := pathJoin(oldCacheBucketsPath, bucket, object, cacheMetaJSONFile) - metaPath := pathJoin(destdir, cacheMetaJSONFile) - metaBytes, err := ioutil.ReadFile(oldMetaPath) - if err != nil { - return err - } - // marshal cache metadata after adding version and stat info - meta := &cacheMeta{} - if err = json.Unmarshal(metaBytes, &meta); err != nil { - return err - } meta.Checksum = CacheChecksumInfoV1{Algorithm: HighwayHash256S.String(), Blocksize: cacheBlkSize} meta.Version = cacheMetaVersion meta.Stat.Size = stat.Size() diff --git a/cmd/globals.go b/cmd/globals.go index b5d8636fb..a4dbfb94a 100644 --- a/cmd/globals.go +++ b/cmd/globals.go @@ -221,7 +221,10 @@ var ( globalCacheExpiry = 90 // Max allowed disk cache percentage globalCacheMaxUse = 80 - + // Disk cache KMS Key + globalCacheKMSKeyID string + // Initialized KMS configuration for disk cache + globalCacheKMS crypto.KMS // Allocated etcd endpoint for config and bucket DNS. globalEtcdClient *etcd.Client diff --git a/cmd/ui-errors.go b/cmd/ui-errors.go index 4758fbbfe..87296d2dd 100644 --- a/cmd/ui-errors.go +++ b/cmd/ui-errors.go @@ -71,6 +71,12 @@ var ( "MINIO_CACHE_MAXUSE: Valid cache max-use value between 0-100", ) + uiErrInvalidCacheEncryptionKey = newUIErrFn( + "Invalid cache encryption master key value", + "Please check the passed value", + "MINIO_CACHE_ENCRYPTION_MASTER_KEY: For more information, please refer to https://docs.min.io/docs/minio-disk-cache-guide", + ) + uiErrInvalidCredentials = newUIErrFn( "Invalid credentials", "Please provide correct credentials", diff --git a/docs/disk-caching/DESIGN.md b/docs/disk-caching/DESIGN.md index 89dd0fd61..feac874a2 100644 --- a/docs/disk-caching/DESIGN.md +++ b/docs/disk-caching/DESIGN.md @@ -40,7 +40,12 @@ Disk caching caches objects for **downloaded** objects i.e - Bitrot protection is added to cached content and verified when object is served from cache. - When an object is deleted, corresponding entry in cache if any is deleted as well. - Cache continues to work for read-only operations such as GET, HEAD when backend is offline. -- Cache-Control and Expires headers can be used to control how long objects stay in the cache +- Cache-Control and Expires headers can be used to control how long objects stay in the cache. ETag of cached objects are not validated with backend until expiry time as per the Cache-Control or Expires header is met. +- To ensure security guarantees, encrypted objects are normally not cached. However, if you wish to encrypt cached content on disk, you can set MINIO_CACHE_ENCRYPTION_MASTER_KEY environment variable to set a cache KMS +master key to automatically encrypt all cached content. + + Note that cache KMS master key is not recommended for use in production deployments. If the MinIO server/gateway machine is ever compromised, the cache KMS master key must also be treated as compromised. + Support for external KMS to manage cache KMS keys is on the roadmap,and would be ideal for production use cases. > NOTE: Expiration happens automatically based on the configured interval as explained above, frequently accessed objects stay alive in cache for a significantly longer time. @@ -51,5 +56,3 @@ Upon restart of minio server after a running minio process is killed or crashes, - Bucket policies are not cached, so anonymous operations are not supported when backend is offline. - Objects are distributed using deterministic hashing among the list of configured cache drives. If one or more drives go offline, or cache drive configuration is altered in any way, performance may degrade to a linear lookup time depending on the number of disks in cache. -## TODO -- Encrypt cached objects automatically with a cache encryption master key \ No newline at end of file