minio/pkg/etag/etag_test.go

226 lines
9.2 KiB
Go
Raw Normal View History

// Copyright (c) 2015-2021 MinIO, Inc.
pkg/etag: add new package for S3 ETag handling (#11577) This commit adds a new package `etag` for dealing with S3 ETags. Even though ETag is often viewed as MD5 checksum of an object, handling S3 ETags correctly is a surprisingly complex task. While it is true that the ETag corresponds to the MD5 for the most basic S3 API operations, there are many exceptions in case of multipart uploads or encryption. In worse, some S3 clients expect very specific behavior when it comes to ETags. For example, some clients expect that the ETag is a double-quoted string and fail otherwise. Non-AWS compliant ETag handling has been a source of many bugs in the past. Therefore, this commit adds a dedicated `etag` package that provides functionality for parsing, generating and converting S3 ETags. Further, this commit removes the ETag computation from the `hash` package. Instead, the `hash` package (i.e. `hash.Reader`) should focus only on computing and verifying the content-sha256. One core feature of this commit is to provide a mechanism to communicate a computed ETag from a low-level `io.Reader` to a high-level `io.Reader`. This problem occurs when an S3 server receives a request and has to compute the ETag of the content. However, the server may also wrap the initial body with several other `io.Reader`, e.g. when encrypting or compressing the content: ``` reader := Encrypt(Compress(ETag(content))) ``` In such a case, the ETag should be accessible by the high-level `io.Reader`. The `etag` provides a mechanism to wrap `io.Reader` implementations such that the `ETag` can be accessed by a type-check. This technique is applied to the PUT, COPY and Upload handlers.
2021-02-23 21:31:53 +01:00
//
// This file is part of MinIO Object Storage stack
pkg/etag: add new package for S3 ETag handling (#11577) This commit adds a new package `etag` for dealing with S3 ETags. Even though ETag is often viewed as MD5 checksum of an object, handling S3 ETags correctly is a surprisingly complex task. While it is true that the ETag corresponds to the MD5 for the most basic S3 API operations, there are many exceptions in case of multipart uploads or encryption. In worse, some S3 clients expect very specific behavior when it comes to ETags. For example, some clients expect that the ETag is a double-quoted string and fail otherwise. Non-AWS compliant ETag handling has been a source of many bugs in the past. Therefore, this commit adds a dedicated `etag` package that provides functionality for parsing, generating and converting S3 ETags. Further, this commit removes the ETag computation from the `hash` package. Instead, the `hash` package (i.e. `hash.Reader`) should focus only on computing and verifying the content-sha256. One core feature of this commit is to provide a mechanism to communicate a computed ETag from a low-level `io.Reader` to a high-level `io.Reader`. This problem occurs when an S3 server receives a request and has to compute the ETag of the content. However, the server may also wrap the initial body with several other `io.Reader`, e.g. when encrypting or compressing the content: ``` reader := Encrypt(Compress(ETag(content))) ``` In such a case, the ETag should be accessible by the high-level `io.Reader`. The `etag` provides a mechanism to wrap `io.Reader` implementations such that the `ETag` can be accessed by a type-check. This technique is applied to the PUT, COPY and Upload handlers.
2021-02-23 21:31:53 +01:00
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
pkg/etag: add new package for S3 ETag handling (#11577) This commit adds a new package `etag` for dealing with S3 ETags. Even though ETag is often viewed as MD5 checksum of an object, handling S3 ETags correctly is a surprisingly complex task. While it is true that the ETag corresponds to the MD5 for the most basic S3 API operations, there are many exceptions in case of multipart uploads or encryption. In worse, some S3 clients expect very specific behavior when it comes to ETags. For example, some clients expect that the ETag is a double-quoted string and fail otherwise. Non-AWS compliant ETag handling has been a source of many bugs in the past. Therefore, this commit adds a dedicated `etag` package that provides functionality for parsing, generating and converting S3 ETags. Further, this commit removes the ETag computation from the `hash` package. Instead, the `hash` package (i.e. `hash.Reader`) should focus only on computing and verifying the content-sha256. One core feature of this commit is to provide a mechanism to communicate a computed ETag from a low-level `io.Reader` to a high-level `io.Reader`. This problem occurs when an S3 server receives a request and has to compute the ETag of the content. However, the server may also wrap the initial body with several other `io.Reader`, e.g. when encrypting or compressing the content: ``` reader := Encrypt(Compress(ETag(content))) ``` In such a case, the ETag should be accessible by the high-level `io.Reader`. The `etag` provides a mechanism to wrap `io.Reader` implementations such that the `ETag` can be accessed by a type-check. This technique is applied to the PUT, COPY and Upload handlers.
2021-02-23 21:31:53 +01:00
//
// This program is distributed in the hope that it will be useful
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
pkg/etag: add new package for S3 ETag handling (#11577) This commit adds a new package `etag` for dealing with S3 ETags. Even though ETag is often viewed as MD5 checksum of an object, handling S3 ETags correctly is a surprisingly complex task. While it is true that the ETag corresponds to the MD5 for the most basic S3 API operations, there are many exceptions in case of multipart uploads or encryption. In worse, some S3 clients expect very specific behavior when it comes to ETags. For example, some clients expect that the ETag is a double-quoted string and fail otherwise. Non-AWS compliant ETag handling has been a source of many bugs in the past. Therefore, this commit adds a dedicated `etag` package that provides functionality for parsing, generating and converting S3 ETags. Further, this commit removes the ETag computation from the `hash` package. Instead, the `hash` package (i.e. `hash.Reader`) should focus only on computing and verifying the content-sha256. One core feature of this commit is to provide a mechanism to communicate a computed ETag from a low-level `io.Reader` to a high-level `io.Reader`. This problem occurs when an S3 server receives a request and has to compute the ETag of the content. However, the server may also wrap the initial body with several other `io.Reader`, e.g. when encrypting or compressing the content: ``` reader := Encrypt(Compress(ETag(content))) ``` In such a case, the ETag should be accessible by the high-level `io.Reader`. The `etag` provides a mechanism to wrap `io.Reader` implementations such that the `ETag` can be accessed by a type-check. This technique is applied to the PUT, COPY and Upload handlers.
2021-02-23 21:31:53 +01:00
package etag
import (
"io"
"io/ioutil"
"net/http"
pkg/etag: add new package for S3 ETag handling (#11577) This commit adds a new package `etag` for dealing with S3 ETags. Even though ETag is often viewed as MD5 checksum of an object, handling S3 ETags correctly is a surprisingly complex task. While it is true that the ETag corresponds to the MD5 for the most basic S3 API operations, there are many exceptions in case of multipart uploads or encryption. In worse, some S3 clients expect very specific behavior when it comes to ETags. For example, some clients expect that the ETag is a double-quoted string and fail otherwise. Non-AWS compliant ETag handling has been a source of many bugs in the past. Therefore, this commit adds a dedicated `etag` package that provides functionality for parsing, generating and converting S3 ETags. Further, this commit removes the ETag computation from the `hash` package. Instead, the `hash` package (i.e. `hash.Reader`) should focus only on computing and verifying the content-sha256. One core feature of this commit is to provide a mechanism to communicate a computed ETag from a low-level `io.Reader` to a high-level `io.Reader`. This problem occurs when an S3 server receives a request and has to compute the ETag of the content. However, the server may also wrap the initial body with several other `io.Reader`, e.g. when encrypting or compressing the content: ``` reader := Encrypt(Compress(ETag(content))) ``` In such a case, the ETag should be accessible by the high-level `io.Reader`. The `etag` provides a mechanism to wrap `io.Reader` implementations such that the `ETag` can be accessed by a type-check. This technique is applied to the PUT, COPY and Upload handlers.
2021-02-23 21:31:53 +01:00
"strings"
"testing"
)
var parseTests = []struct {
String string
ETag ETag
ShouldFail bool
}{
{String: "3b83ef96387f1465", ETag: ETag{59, 131, 239, 150, 56, 127, 20, 101}}, // 0
{String: "3b83ef96387f14655fc854ddc3c6bd57", ETag: ETag{59, 131, 239, 150, 56, 127, 20, 101, 95, 200, 84, 221, 195, 198, 189, 87}}, // 1
{String: `"3b83ef96387f14655fc854ddc3c6bd57"`, ETag: ETag{59, 131, 239, 150, 56, 127, 20, 101, 95, 200, 84, 221, 195, 198, 189, 87}}, // 2
{String: "ceb8853ddc5086cc4ab9e149f8f09c88-1", ETag: ETag{206, 184, 133, 61, 220, 80, 134, 204, 74, 185, 225, 73, 248, 240, 156, 136, 45, 49}}, // 3
{String: `"ceb8853ddc5086cc4ab9e149f8f09c88-2"`, ETag: ETag{206, 184, 133, 61, 220, 80, 134, 204, 74, 185, 225, 73, 248, 240, 156, 136, 45, 50}}, // 4
{ // 5
String: "90402c78d2dccddee1e9e86222ce2c6361675f3529d26000ae2e900ff216b3cb59e130e092d8a2981e776f4d0bd60941",
ETag: ETag{144, 64, 44, 120, 210, 220, 205, 222, 225, 233, 232, 98, 34, 206, 44, 99, 97, 103, 95, 53, 41, 210, 96, 0, 174, 46, 144, 15, 242, 22, 179, 203, 89, 225, 48, 224, 146, 216, 162, 152, 30, 119, 111, 77, 11, 214, 9, 65},
},
{String: `"3b83ef96387f14655fc854ddc3c6bd57`, ShouldFail: true}, // 6
{String: "ceb8853ddc5086cc4ab9e149f8f09c88-", ShouldFail: true}, // 7
{String: "ceb8853ddc5086cc4ab9e149f8f09c88-2a", ShouldFail: true}, // 8
{String: "ceb8853ddc5086cc4ab9e149f8f09c88-2-1", ShouldFail: true}, // 9
{String: "90402c78d2dccddee1e9e86222ce2c-1", ShouldFail: true}, // 10
{String: "90402c78d2dccddee1e9e86222ce2c6361675f3529d26000ae2e900ff216b3cb59e130e092d8a2981e776f4d0bd60941-1", ShouldFail: true}, // 11
}
func TestParse(t *testing.T) {
for i, test := range parseTests {
etag, err := Parse(test.String)
if err == nil && test.ShouldFail {
t.Fatalf("Test %d: parse should have failed but succeeded", i)
}
if err != nil && !test.ShouldFail {
t.Fatalf("Test %d: failed to parse ETag %q: %v", i, test.String, err)
}
if !Equal(etag, test.ETag) {
t.Log([]byte(etag))
t.Fatalf("Test %d: ETags don't match", i)
}
}
}
var stringTests = []struct {
ETag ETag
String string
}{
{ETag: ETag{59, 131, 239, 150, 56, 127, 20, 101}, String: "3b83ef96387f1465"}, // 0
{ETag: ETag{59, 131, 239, 150, 56, 127, 20, 101, 95, 200, 84, 221, 195, 198, 189, 87}, String: "3b83ef96387f14655fc854ddc3c6bd57"}, // 1
{ETag: ETag{206, 184, 133, 61, 220, 80, 134, 204, 74, 185, 225, 73, 248, 240, 156, 136, 45, 49}, String: "ceb8853ddc5086cc4ab9e149f8f09c88-1"}, // 2
{ETag: ETag{206, 184, 133, 61, 220, 80, 134, 204, 74, 185, 225, 73, 248, 240, 156, 136, 45, 50}, String: "ceb8853ddc5086cc4ab9e149f8f09c88-2"}, // 3
{ // 4
ETag: ETag{144, 64, 44, 120, 210, 220, 205, 222, 225, 233, 232, 98, 34, 206, 44, 99, 97, 103, 95, 53, 41, 210, 96, 0, 174, 46, 144, 15, 242, 22, 179, 203, 89, 225, 48, 224, 146, 216, 162, 152, 30, 119, 111, 77, 11, 214, 9, 65},
String: "90402c78d2dccddee1e9e86222ce2c6361675f3529d26000ae2e900ff216b3cb59e130e092d8a2981e776f4d0bd60941",
},
}
func TestString(t *testing.T) {
for i, test := range stringTests {
s := test.ETag.String()
if s != test.String {
t.Fatalf("Test %d: got %s - want %s", i, s, test.String)
}
}
}
var equalTests = []struct {
A string
B string
Equal bool
}{
{A: "3b83ef96387f14655fc854ddc3c6bd57", B: "3b83ef96387f14655fc854ddc3c6bd57", Equal: true}, // 0
{A: "3b83ef96387f14655fc854ddc3c6bd57", B: `"3b83ef96387f14655fc854ddc3c6bd57"`, Equal: true}, // 1
{A: "3b83ef96387f14655fc854ddc3c6bd57", B: "3b83ef96387f14655fc854ddc3c6bd57-2", Equal: false}, // 2
{A: "3b83ef96387f14655fc854ddc3c6bd57", B: "ceb8853ddc5086cc4ab9e149f8f09c88", Equal: false}, // 3
}
func TestEqual(t *testing.T) {
for i, test := range equalTests {
A, err := Parse(test.A)
if err != nil {
t.Fatalf("Test %d: %v", i, err)
}
B, err := Parse(test.B)
if err != nil {
t.Fatalf("Test %d: %v", i, err)
}
if equal := Equal(A, B); equal != test.Equal {
t.Fatalf("Test %d: got %v - want %v", i, equal, test.Equal)
}
}
}
var readerTests = []struct { // Reference values computed by: echo <content> | md5sum
Content string
ETag ETag
}{
{
Content: "", ETag: ETag{212, 29, 140, 217, 143, 0, 178, 4, 233, 128, 9, 152, 236, 248, 66, 126},
},
{
Content: " ", ETag: ETag{114, 21, 238, 156, 125, 157, 194, 41, 210, 146, 26, 64, 232, 153, 236, 95},
},
{
Content: "Hello World", ETag: ETag{177, 10, 141, 177, 100, 224, 117, 65, 5, 183, 169, 155, 231, 46, 63, 229},
},
}
func TestReader(t *testing.T) {
for i, test := range readerTests {
reader := NewReader(strings.NewReader(test.Content), test.ETag)
if _, err := io.Copy(ioutil.Discard, reader); err != nil {
t.Fatalf("Test %d: read failed: %v", i, err)
}
if ETag := reader.ETag(); !Equal(ETag, test.ETag) {
t.Fatalf("Test %d: ETag mismatch: got %q - want %q", i, ETag, test.ETag)
}
}
}
var multipartTests = []struct { // Test cases have been generated using AWS S3
ETags []ETag
Multipart ETag
}{
{
ETags: []ETag{},
Multipart: ETag{},
},
{
ETags: []ETag{must("b10a8db164e0754105b7a99be72e3fe5")},
Multipart: must("7b976cc68452e003eec7cb0eb631a19a-1"),
},
{
ETags: []ETag{must("5f363e0e58a95f06cbe9bbc662c5dfb6"), must("5f363e0e58a95f06cbe9bbc662c5dfb6")},
Multipart: must("a7d414b9133d6483d9a1c4e04e856e3b-2"),
},
{
ETags: []ETag{must("5f363e0e58a95f06cbe9bbc662c5dfb6"), must("a096eb5968d607c2975fb2c4af9ab225"), must("b10a8db164e0754105b7a99be72e3fe5")},
Multipart: must("9a0d1febd9265f59f368ceb652770bc2-3"),
},
{ // Check that multipart ETags are ignored
ETags: []ETag{must("5f363e0e58a95f06cbe9bbc662c5dfb6"), must("5f363e0e58a95f06cbe9bbc662c5dfb6"), must("ceb8853ddc5086cc4ab9e149f8f09c88-1")},
Multipart: must("a7d414b9133d6483d9a1c4e04e856e3b-2"),
},
{ // Check that encrypted ETags are ignored
ETags: []ETag{
must("90402c78d2dccddee1e9e86222ce2c6361675f3529d26000ae2e900ff216b3cb59e130e092d8a2981e776f4d0bd60941"),
must("5f363e0e58a95f06cbe9bbc662c5dfb6"), must("5f363e0e58a95f06cbe9bbc662c5dfb6"),
},
Multipart: must("a7d414b9133d6483d9a1c4e04e856e3b-2"),
},
}
func TestMultipart(t *testing.T) {
for i, test := range multipartTests {
if multipart := Multipart(test.ETags...); !Equal(multipart, test.Multipart) {
t.Fatalf("Test %d: got %q - want %q", i, multipart, test.Multipart)
}
}
}
var fromContentMD5Tests = []struct {
Header http.Header
ETag ETag
ShouldFail bool
}{
{Header: http.Header{}, ETag: nil}, // 0
{Header: http.Header{"Content-Md5": []string{"1B2M2Y8AsgTpgAmY7PhCfg=="}}, ETag: must("d41d8cd98f00b204e9800998ecf8427e")}, // 1
{Header: http.Header{"Content-Md5": []string{"sQqNsWTgdUEFt6mb5y4/5Q=="}}, ETag: must("b10a8db164e0754105b7a99be72e3fe5")}, // 2
{Header: http.Header{"Content-MD5": []string{"1B2M2Y8AsgTpgAmY7PhCfg=="}}, ETag: nil}, // 3 (Content-MD5 vs Content-Md5)
{Header: http.Header{"Content-Md5": []string{"sQqNsWTgdUEFt6mb5y4/5Q==", "1B2M2Y8AsgTpgAmY7PhCfg=="}}, ETag: must("b10a8db164e0754105b7a99be72e3fe5")}, // 4
{Header: http.Header{"Content-Md5": []string{""}}, ShouldFail: true}, // 5 (empty value)
{Header: http.Header{"Content-Md5": []string{"", "sQqNsWTgdUEFt6mb5y4/5Q=="}}, ShouldFail: true}, // 6 (empty value)
{Header: http.Header{"Content-Md5": []string{"d41d8cd98f00b204e9800998ecf8427e"}}, ShouldFail: true}, // 7 (content-md5 is invalid b64 / of invalid length)
}
func TestFromContentMD5(t *testing.T) {
for i, test := range fromContentMD5Tests {
ETag, err := FromContentMD5(test.Header)
if err != nil && !test.ShouldFail {
t.Fatalf("Test %d: failed to convert Content-MD5 to ETag: %v", i, err)
}
if err == nil && test.ShouldFail {
t.Fatalf("Test %d: should have failed but succeeded", i)
}
if err == nil {
if !Equal(ETag, test.ETag) {
t.Fatalf("Test %d: got %q - want %q", i, ETag, test.ETag)
}
}
}
}
func must(s string) ETag {
t, err := Parse(s)
if err != nil {
panic(err)
}
return t
}