minio/pkg/etag/reader.go

155 lines
3.9 KiB
Go
Raw Normal View History

// Copyright (c) 2015-2021 MinIO, Inc.
pkg/etag: add new package for S3 ETag handling (#11577) This commit adds a new package `etag` for dealing with S3 ETags. Even though ETag is often viewed as MD5 checksum of an object, handling S3 ETags correctly is a surprisingly complex task. While it is true that the ETag corresponds to the MD5 for the most basic S3 API operations, there are many exceptions in case of multipart uploads or encryption. In worse, some S3 clients expect very specific behavior when it comes to ETags. For example, some clients expect that the ETag is a double-quoted string and fail otherwise. Non-AWS compliant ETag handling has been a source of many bugs in the past. Therefore, this commit adds a dedicated `etag` package that provides functionality for parsing, generating and converting S3 ETags. Further, this commit removes the ETag computation from the `hash` package. Instead, the `hash` package (i.e. `hash.Reader`) should focus only on computing and verifying the content-sha256. One core feature of this commit is to provide a mechanism to communicate a computed ETag from a low-level `io.Reader` to a high-level `io.Reader`. This problem occurs when an S3 server receives a request and has to compute the ETag of the content. However, the server may also wrap the initial body with several other `io.Reader`, e.g. when encrypting or compressing the content: ``` reader := Encrypt(Compress(ETag(content))) ``` In such a case, the ETag should be accessible by the high-level `io.Reader`. The `etag` provides a mechanism to wrap `io.Reader` implementations such that the `ETag` can be accessed by a type-check. This technique is applied to the PUT, COPY and Upload handlers.
2021-02-23 21:31:53 +01:00
//
// This file is part of MinIO Object Storage stack
pkg/etag: add new package for S3 ETag handling (#11577) This commit adds a new package `etag` for dealing with S3 ETags. Even though ETag is often viewed as MD5 checksum of an object, handling S3 ETags correctly is a surprisingly complex task. While it is true that the ETag corresponds to the MD5 for the most basic S3 API operations, there are many exceptions in case of multipart uploads or encryption. In worse, some S3 clients expect very specific behavior when it comes to ETags. For example, some clients expect that the ETag is a double-quoted string and fail otherwise. Non-AWS compliant ETag handling has been a source of many bugs in the past. Therefore, this commit adds a dedicated `etag` package that provides functionality for parsing, generating and converting S3 ETags. Further, this commit removes the ETag computation from the `hash` package. Instead, the `hash` package (i.e. `hash.Reader`) should focus only on computing and verifying the content-sha256. One core feature of this commit is to provide a mechanism to communicate a computed ETag from a low-level `io.Reader` to a high-level `io.Reader`. This problem occurs when an S3 server receives a request and has to compute the ETag of the content. However, the server may also wrap the initial body with several other `io.Reader`, e.g. when encrypting or compressing the content: ``` reader := Encrypt(Compress(ETag(content))) ``` In such a case, the ETag should be accessible by the high-level `io.Reader`. The `etag` provides a mechanism to wrap `io.Reader` implementations such that the `ETag` can be accessed by a type-check. This technique is applied to the PUT, COPY and Upload handlers.
2021-02-23 21:31:53 +01:00
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
pkg/etag: add new package for S3 ETag handling (#11577) This commit adds a new package `etag` for dealing with S3 ETags. Even though ETag is often viewed as MD5 checksum of an object, handling S3 ETags correctly is a surprisingly complex task. While it is true that the ETag corresponds to the MD5 for the most basic S3 API operations, there are many exceptions in case of multipart uploads or encryption. In worse, some S3 clients expect very specific behavior when it comes to ETags. For example, some clients expect that the ETag is a double-quoted string and fail otherwise. Non-AWS compliant ETag handling has been a source of many bugs in the past. Therefore, this commit adds a dedicated `etag` package that provides functionality for parsing, generating and converting S3 ETags. Further, this commit removes the ETag computation from the `hash` package. Instead, the `hash` package (i.e. `hash.Reader`) should focus only on computing and verifying the content-sha256. One core feature of this commit is to provide a mechanism to communicate a computed ETag from a low-level `io.Reader` to a high-level `io.Reader`. This problem occurs when an S3 server receives a request and has to compute the ETag of the content. However, the server may also wrap the initial body with several other `io.Reader`, e.g. when encrypting or compressing the content: ``` reader := Encrypt(Compress(ETag(content))) ``` In such a case, the ETag should be accessible by the high-level `io.Reader`. The `etag` provides a mechanism to wrap `io.Reader` implementations such that the `ETag` can be accessed by a type-check. This technique is applied to the PUT, COPY and Upload handlers.
2021-02-23 21:31:53 +01:00
//
// This program is distributed in the hope that it will be useful
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
pkg/etag: add new package for S3 ETag handling (#11577) This commit adds a new package `etag` for dealing with S3 ETags. Even though ETag is often viewed as MD5 checksum of an object, handling S3 ETags correctly is a surprisingly complex task. While it is true that the ETag corresponds to the MD5 for the most basic S3 API operations, there are many exceptions in case of multipart uploads or encryption. In worse, some S3 clients expect very specific behavior when it comes to ETags. For example, some clients expect that the ETag is a double-quoted string and fail otherwise. Non-AWS compliant ETag handling has been a source of many bugs in the past. Therefore, this commit adds a dedicated `etag` package that provides functionality for parsing, generating and converting S3 ETags. Further, this commit removes the ETag computation from the `hash` package. Instead, the `hash` package (i.e. `hash.Reader`) should focus only on computing and verifying the content-sha256. One core feature of this commit is to provide a mechanism to communicate a computed ETag from a low-level `io.Reader` to a high-level `io.Reader`. This problem occurs when an S3 server receives a request and has to compute the ETag of the content. However, the server may also wrap the initial body with several other `io.Reader`, e.g. when encrypting or compressing the content: ``` reader := Encrypt(Compress(ETag(content))) ``` In such a case, the ETag should be accessible by the high-level `io.Reader`. The `etag` provides a mechanism to wrap `io.Reader` implementations such that the `ETag` can be accessed by a type-check. This technique is applied to the PUT, COPY and Upload handlers.
2021-02-23 21:31:53 +01:00
package etag
import (
"crypto/md5"
"fmt"
"hash"
"io"
)
// Tagger is the interface that wraps the basic ETag method.
type Tagger interface {
ETag() ETag
}
type wrapReader struct {
io.Reader
Tagger
}
// ETag returns the ETag of the underlying Tagger.
func (r *wrapReader) ETag() ETag {
if r.Tagger == nil {
return nil
}
return r.Tagger.ETag()
}
// Wrap returns an io.Reader that reads from the wrapped
// io.Reader and implements the Tagger interaface.
//
// If content implements Tagger then the returned Reader
// returns ETag of the content. Otherwise, it returns
// nil as ETag.
//
// Wrap provides an adapter for io.Reader implemetations
// that don't implement the Tagger interface.
// It is mainly used to provide a high-level io.Reader
// access to the ETag computed by a low-level io.Reader:
//
// content := etag.NewReader(r.Body, nil)
//
// compressedContent := Compress(content)
// encryptedContent := Encrypt(compressedContent)
//
// // Now, we need an io.Reader that can access
// // the ETag computed over the content.
// reader := etag.Wrap(encryptedContent, content)
//
func Wrap(wrapped, content io.Reader) io.Reader {
if t, ok := content.(Tagger); ok {
return wrapReader{
Reader: wrapped,
Tagger: t,
}
}
return wrapReader{
Reader: wrapped,
}
}
// A Reader wraps an io.Reader and computes the
// MD5 checksum of the read content as ETag.
//
// Optionally, a Reader can also verify that
// the computed ETag matches an expected value.
// Therefore, it compares both ETags once the
// underlying io.Reader returns io.EOF.
// If the computed ETag does not match the
// expected ETag then Read returns a VerifyError.
//
// Reader implements the Tagger interface.
type Reader struct {
src io.Reader
md5 hash.Hash
checksum ETag
readN int64
}
// NewReader returns a new Reader that computes the
// MD5 checksum of the content read from r as ETag.
//
// If the provided etag is not nil the returned
// Reader compares the etag with the computed
// MD5 sum once the r returns io.EOF.
func NewReader(r io.Reader, etag ETag) *Reader {
if er, ok := r.(*Reader); ok {
if er.readN == 0 && Equal(etag, er.checksum) {
return er
}
}
return &Reader{
src: r,
md5: md5.New(),
checksum: etag,
}
}
// Read reads up to len(p) bytes from the underlying
// io.Reader as specified by the io.Reader interface.
func (r *Reader) Read(p []byte) (int, error) {
n, err := r.src.Read(p)
r.readN += int64(n)
r.md5.Write(p[:n])
if err == io.EOF && len(r.checksum) != 0 {
if etag := r.ETag(); !Equal(etag, r.checksum) {
return n, VerifyError{
Expected: r.checksum,
Computed: etag,
}
}
}
return n, err
}
// ETag returns the ETag of all the content read
// so far. Reading more content changes the MD5
// checksum. Therefore, calling ETag multiple
// times may return different results.
func (r *Reader) ETag() ETag {
sum := r.md5.Sum(nil)
return ETag(sum)
}
// VerifyError is an error signaling that a
// computed ETag does not match an expected
// ETag.
type VerifyError struct {
Expected ETag
Computed ETag
}
func (v VerifyError) Error() string {
return fmt.Sprintf("etag: expected ETag %q does not match computed ETag %q", v.Expected, v.Computed)
}