minio/pkg/etag/reader.go
Andreas Auernhammer d4b822d697
pkg/etag: add new package for S3 ETag handling (#11577)
This commit adds a new package `etag` for dealing
with S3 ETags.

Even though ETag is often viewed as MD5 checksum of
an object, handling S3 ETags correctly is a surprisingly
complex task. While it is true that the ETag corresponds
to the MD5 for the most basic S3 API operations, there are
many exceptions in case of multipart uploads or encryption.

In worse, some S3 clients expect very specific behavior when
it comes to ETags. For example, some clients expect that the
ETag is a double-quoted string and fail otherwise.
Non-AWS compliant ETag handling has been a source of many bugs
in the past.

Therefore, this commit adds a dedicated `etag` package that provides
functionality for parsing, generating and converting S3 ETags.
Further, this commit removes the ETag computation from the `hash`
package. Instead, the `hash` package (i.e. `hash.Reader`) should
focus only on computing and verifying the content-sha256.

One core feature of this commit is to provide a mechanism to
communicate a computed ETag from a low-level `io.Reader` to
a high-level `io.Reader`.

This problem occurs when an S3 server receives a request and
has to compute the ETag of the content. However, the server
may also wrap the initial body with several other `io.Reader`,
e.g. when encrypting or compressing the content:
```
   reader := Encrypt(Compress(ETag(content)))
```
In such a case, the ETag should be accessible by the high-level
`io.Reader`.

The `etag` provides a mechanism to wrap `io.Reader` implementations
such that the `ETag` can be accessed by a type-check.
This technique is applied to the PUT, COPY and Upload handlers.
2021-02-23 12:31:53 -08:00

152 lines
3.8 KiB
Go

// MinIO Cloud Storage, (C) 2021 MinIO, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package etag
import (
"crypto/md5"
"fmt"
"hash"
"io"
)
// Tagger is the interface that wraps the basic ETag method.
type Tagger interface {
ETag() ETag
}
type wrapReader struct {
io.Reader
Tagger
}
// ETag returns the ETag of the underlying Tagger.
func (r *wrapReader) ETag() ETag {
if r.Tagger == nil {
return nil
}
return r.Tagger.ETag()
}
// Wrap returns an io.Reader that reads from the wrapped
// io.Reader and implements the Tagger interaface.
//
// If content implements Tagger then the returned Reader
// returns ETag of the content. Otherwise, it returns
// nil as ETag.
//
// Wrap provides an adapter for io.Reader implemetations
// that don't implement the Tagger interface.
// It is mainly used to provide a high-level io.Reader
// access to the ETag computed by a low-level io.Reader:
//
// content := etag.NewReader(r.Body, nil)
//
// compressedContent := Compress(content)
// encryptedContent := Encrypt(compressedContent)
//
// // Now, we need an io.Reader that can access
// // the ETag computed over the content.
// reader := etag.Wrap(encryptedContent, content)
//
func Wrap(wrapped, content io.Reader) io.Reader {
if t, ok := content.(Tagger); ok {
return wrapReader{
Reader: wrapped,
Tagger: t,
}
}
return wrapReader{
Reader: wrapped,
}
}
// A Reader wraps an io.Reader and computes the
// MD5 checksum of the read content as ETag.
//
// Optionally, a Reader can also verify that
// the computed ETag matches an expected value.
// Therefore, it compares both ETags once the
// underlying io.Reader returns io.EOF.
// If the computed ETag does not match the
// expected ETag then Read returns a VerifyError.
//
// Reader implements the Tagger interface.
type Reader struct {
src io.Reader
md5 hash.Hash
checksum ETag
readN int64
}
// NewReader returns a new Reader that computes the
// MD5 checksum of the content read from r as ETag.
//
// If the provided etag is not nil the returned
// Reader compares the etag with the computed
// MD5 sum once the r returns io.EOF.
func NewReader(r io.Reader, etag ETag) *Reader {
if er, ok := r.(*Reader); ok {
if er.readN == 0 && Equal(etag, er.checksum) {
return er
}
}
return &Reader{
src: r,
md5: md5.New(),
checksum: etag,
}
}
// Read reads up to len(p) bytes from the underlying
// io.Reader as specified by the io.Reader interface.
func (r *Reader) Read(p []byte) (int, error) {
n, err := r.src.Read(p)
r.readN += int64(n)
r.md5.Write(p[:n])
if err == io.EOF && len(r.checksum) != 0 {
if etag := r.ETag(); !Equal(etag, r.checksum) {
return n, VerifyError{
Expected: r.checksum,
Computed: etag,
}
}
}
return n, err
}
// ETag returns the ETag of all the content read
// so far. Reading more content changes the MD5
// checksum. Therefore, calling ETag multiple
// times may return different results.
func (r *Reader) ETag() ETag {
sum := r.md5.Sum(nil)
return ETag(sum)
}
// VerifyError is an error signaling that a
// computed ETag does not match an expected
// ETag.
type VerifyError struct {
Expected ETag
Computed ETag
}
func (v VerifyError) Error() string {
return fmt.Sprintf("etag: expected ETag %q does not match computed ETag %q", v.Expected, v.Computed)
}