2021-04-18 21:41:13 +02:00
|
|
|
// Copyright (c) 2015-2021 MinIO, Inc.
|
pkg/etag: add new package for S3 ETag handling (#11577)
This commit adds a new package `etag` for dealing
with S3 ETags.
Even though ETag is often viewed as MD5 checksum of
an object, handling S3 ETags correctly is a surprisingly
complex task. While it is true that the ETag corresponds
to the MD5 for the most basic S3 API operations, there are
many exceptions in case of multipart uploads or encryption.
In worse, some S3 clients expect very specific behavior when
it comes to ETags. For example, some clients expect that the
ETag is a double-quoted string and fail otherwise.
Non-AWS compliant ETag handling has been a source of many bugs
in the past.
Therefore, this commit adds a dedicated `etag` package that provides
functionality for parsing, generating and converting S3 ETags.
Further, this commit removes the ETag computation from the `hash`
package. Instead, the `hash` package (i.e. `hash.Reader`) should
focus only on computing and verifying the content-sha256.
One core feature of this commit is to provide a mechanism to
communicate a computed ETag from a low-level `io.Reader` to
a high-level `io.Reader`.
This problem occurs when an S3 server receives a request and
has to compute the ETag of the content. However, the server
may also wrap the initial body with several other `io.Reader`,
e.g. when encrypting or compressing the content:
```
reader := Encrypt(Compress(ETag(content)))
```
In such a case, the ETag should be accessible by the high-level
`io.Reader`.
The `etag` provides a mechanism to wrap `io.Reader` implementations
such that the `ETag` can be accessed by a type-check.
This technique is applied to the PUT, COPY and Upload handlers.
2021-02-23 21:31:53 +01:00
|
|
|
//
|
2021-04-18 21:41:13 +02:00
|
|
|
// This file is part of MinIO Object Storage stack
|
pkg/etag: add new package for S3 ETag handling (#11577)
This commit adds a new package `etag` for dealing
with S3 ETags.
Even though ETag is often viewed as MD5 checksum of
an object, handling S3 ETags correctly is a surprisingly
complex task. While it is true that the ETag corresponds
to the MD5 for the most basic S3 API operations, there are
many exceptions in case of multipart uploads or encryption.
In worse, some S3 clients expect very specific behavior when
it comes to ETags. For example, some clients expect that the
ETag is a double-quoted string and fail otherwise.
Non-AWS compliant ETag handling has been a source of many bugs
in the past.
Therefore, this commit adds a dedicated `etag` package that provides
functionality for parsing, generating and converting S3 ETags.
Further, this commit removes the ETag computation from the `hash`
package. Instead, the `hash` package (i.e. `hash.Reader`) should
focus only on computing and verifying the content-sha256.
One core feature of this commit is to provide a mechanism to
communicate a computed ETag from a low-level `io.Reader` to
a high-level `io.Reader`.
This problem occurs when an S3 server receives a request and
has to compute the ETag of the content. However, the server
may also wrap the initial body with several other `io.Reader`,
e.g. when encrypting or compressing the content:
```
reader := Encrypt(Compress(ETag(content)))
```
In such a case, the ETag should be accessible by the high-level
`io.Reader`.
The `etag` provides a mechanism to wrap `io.Reader` implementations
such that the `ETag` can be accessed by a type-check.
This technique is applied to the PUT, COPY and Upload handlers.
2021-02-23 21:31:53 +01:00
|
|
|
//
|
2021-04-18 21:41:13 +02:00
|
|
|
// This program is free software: you can redistribute it and/or modify
|
|
|
|
// it under the terms of the GNU Affero General Public License as published by
|
|
|
|
// the Free Software Foundation, either version 3 of the License, or
|
|
|
|
// (at your option) any later version.
|
pkg/etag: add new package for S3 ETag handling (#11577)
This commit adds a new package `etag` for dealing
with S3 ETags.
Even though ETag is often viewed as MD5 checksum of
an object, handling S3 ETags correctly is a surprisingly
complex task. While it is true that the ETag corresponds
to the MD5 for the most basic S3 API operations, there are
many exceptions in case of multipart uploads or encryption.
In worse, some S3 clients expect very specific behavior when
it comes to ETags. For example, some clients expect that the
ETag is a double-quoted string and fail otherwise.
Non-AWS compliant ETag handling has been a source of many bugs
in the past.
Therefore, this commit adds a dedicated `etag` package that provides
functionality for parsing, generating and converting S3 ETags.
Further, this commit removes the ETag computation from the `hash`
package. Instead, the `hash` package (i.e. `hash.Reader`) should
focus only on computing and verifying the content-sha256.
One core feature of this commit is to provide a mechanism to
communicate a computed ETag from a low-level `io.Reader` to
a high-level `io.Reader`.
This problem occurs when an S3 server receives a request and
has to compute the ETag of the content. However, the server
may also wrap the initial body with several other `io.Reader`,
e.g. when encrypting or compressing the content:
```
reader := Encrypt(Compress(ETag(content)))
```
In such a case, the ETag should be accessible by the high-level
`io.Reader`.
The `etag` provides a mechanism to wrap `io.Reader` implementations
such that the `ETag` can be accessed by a type-check.
This technique is applied to the PUT, COPY and Upload handlers.
2021-02-23 21:31:53 +01:00
|
|
|
//
|
2021-04-18 21:41:13 +02:00
|
|
|
// This program is distributed in the hope that it will be useful
|
|
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
// GNU Affero General Public License for more details.
|
|
|
|
//
|
|
|
|
// You should have received a copy of the GNU Affero General Public License
|
|
|
|
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
pkg/etag: add new package for S3 ETag handling (#11577)
This commit adds a new package `etag` for dealing
with S3 ETags.
Even though ETag is often viewed as MD5 checksum of
an object, handling S3 ETags correctly is a surprisingly
complex task. While it is true that the ETag corresponds
to the MD5 for the most basic S3 API operations, there are
many exceptions in case of multipart uploads or encryption.
In worse, some S3 clients expect very specific behavior when
it comes to ETags. For example, some clients expect that the
ETag is a double-quoted string and fail otherwise.
Non-AWS compliant ETag handling has been a source of many bugs
in the past.
Therefore, this commit adds a dedicated `etag` package that provides
functionality for parsing, generating and converting S3 ETags.
Further, this commit removes the ETag computation from the `hash`
package. Instead, the `hash` package (i.e. `hash.Reader`) should
focus only on computing and verifying the content-sha256.
One core feature of this commit is to provide a mechanism to
communicate a computed ETag from a low-level `io.Reader` to
a high-level `io.Reader`.
This problem occurs when an S3 server receives a request and
has to compute the ETag of the content. However, the server
may also wrap the initial body with several other `io.Reader`,
e.g. when encrypting or compressing the content:
```
reader := Encrypt(Compress(ETag(content)))
```
In such a case, the ETag should be accessible by the high-level
`io.Reader`.
The `etag` provides a mechanism to wrap `io.Reader` implementations
such that the `ETag` can be accessed by a type-check.
This technique is applied to the PUT, COPY and Upload handlers.
2021-02-23 21:31:53 +01:00
|
|
|
|
|
|
|
package etag
|
|
|
|
|
|
|
|
import (
|
|
|
|
"crypto/md5"
|
|
|
|
"fmt"
|
|
|
|
"hash"
|
|
|
|
"io"
|
|
|
|
)
|
|
|
|
|
|
|
|
// Tagger is the interface that wraps the basic ETag method.
|
|
|
|
type Tagger interface {
|
|
|
|
ETag() ETag
|
|
|
|
}
|
|
|
|
|
|
|
|
type wrapReader struct {
|
|
|
|
io.Reader
|
|
|
|
Tagger
|
|
|
|
}
|
|
|
|
|
|
|
|
// ETag returns the ETag of the underlying Tagger.
|
|
|
|
func (r *wrapReader) ETag() ETag {
|
|
|
|
if r.Tagger == nil {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
return r.Tagger.ETag()
|
|
|
|
}
|
|
|
|
|
|
|
|
// Wrap returns an io.Reader that reads from the wrapped
|
|
|
|
// io.Reader and implements the Tagger interaface.
|
|
|
|
//
|
|
|
|
// If content implements Tagger then the returned Reader
|
|
|
|
// returns ETag of the content. Otherwise, it returns
|
|
|
|
// nil as ETag.
|
|
|
|
//
|
|
|
|
// Wrap provides an adapter for io.Reader implemetations
|
|
|
|
// that don't implement the Tagger interface.
|
|
|
|
// It is mainly used to provide a high-level io.Reader
|
|
|
|
// access to the ETag computed by a low-level io.Reader:
|
|
|
|
//
|
|
|
|
// content := etag.NewReader(r.Body, nil)
|
|
|
|
//
|
|
|
|
// compressedContent := Compress(content)
|
|
|
|
// encryptedContent := Encrypt(compressedContent)
|
|
|
|
//
|
|
|
|
// // Now, we need an io.Reader that can access
|
|
|
|
// // the ETag computed over the content.
|
|
|
|
// reader := etag.Wrap(encryptedContent, content)
|
|
|
|
//
|
|
|
|
func Wrap(wrapped, content io.Reader) io.Reader {
|
|
|
|
if t, ok := content.(Tagger); ok {
|
|
|
|
return wrapReader{
|
|
|
|
Reader: wrapped,
|
|
|
|
Tagger: t,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return wrapReader{
|
|
|
|
Reader: wrapped,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// A Reader wraps an io.Reader and computes the
|
|
|
|
// MD5 checksum of the read content as ETag.
|
|
|
|
//
|
|
|
|
// Optionally, a Reader can also verify that
|
|
|
|
// the computed ETag matches an expected value.
|
|
|
|
// Therefore, it compares both ETags once the
|
|
|
|
// underlying io.Reader returns io.EOF.
|
|
|
|
// If the computed ETag does not match the
|
|
|
|
// expected ETag then Read returns a VerifyError.
|
|
|
|
//
|
|
|
|
// Reader implements the Tagger interface.
|
|
|
|
type Reader struct {
|
|
|
|
src io.Reader
|
|
|
|
|
|
|
|
md5 hash.Hash
|
|
|
|
checksum ETag
|
|
|
|
|
|
|
|
readN int64
|
|
|
|
}
|
|
|
|
|
|
|
|
// NewReader returns a new Reader that computes the
|
|
|
|
// MD5 checksum of the content read from r as ETag.
|
|
|
|
//
|
|
|
|
// If the provided etag is not nil the returned
|
|
|
|
// Reader compares the etag with the computed
|
|
|
|
// MD5 sum once the r returns io.EOF.
|
|
|
|
func NewReader(r io.Reader, etag ETag) *Reader {
|
|
|
|
if er, ok := r.(*Reader); ok {
|
|
|
|
if er.readN == 0 && Equal(etag, er.checksum) {
|
|
|
|
return er
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return &Reader{
|
|
|
|
src: r,
|
|
|
|
md5: md5.New(),
|
|
|
|
checksum: etag,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Read reads up to len(p) bytes from the underlying
|
|
|
|
// io.Reader as specified by the io.Reader interface.
|
|
|
|
func (r *Reader) Read(p []byte) (int, error) {
|
|
|
|
n, err := r.src.Read(p)
|
|
|
|
r.readN += int64(n)
|
|
|
|
r.md5.Write(p[:n])
|
|
|
|
|
|
|
|
if err == io.EOF && len(r.checksum) != 0 {
|
|
|
|
if etag := r.ETag(); !Equal(etag, r.checksum) {
|
|
|
|
return n, VerifyError{
|
|
|
|
Expected: r.checksum,
|
|
|
|
Computed: etag,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return n, err
|
|
|
|
}
|
|
|
|
|
|
|
|
// ETag returns the ETag of all the content read
|
|
|
|
// so far. Reading more content changes the MD5
|
|
|
|
// checksum. Therefore, calling ETag multiple
|
|
|
|
// times may return different results.
|
|
|
|
func (r *Reader) ETag() ETag {
|
|
|
|
sum := r.md5.Sum(nil)
|
|
|
|
return ETag(sum)
|
|
|
|
}
|
|
|
|
|
|
|
|
// VerifyError is an error signaling that a
|
|
|
|
// computed ETag does not match an expected
|
|
|
|
// ETag.
|
|
|
|
type VerifyError struct {
|
|
|
|
Expected ETag
|
|
|
|
Computed ETag
|
|
|
|
}
|
|
|
|
|
|
|
|
func (v VerifyError) Error() string {
|
|
|
|
return fmt.Sprintf("etag: expected ETag %q does not match computed ETag %q", v.Expected, v.Computed)
|
|
|
|
}
|