minio/cmd/erasure.go
Klaus Post 4a007e3767
Prefer local disks when fetching data blocks (#9563)
If the requested server is part of the set this will always read 
from the local disk, even if the disk contains a parity shard. 
In default setup there is a 50% chance that at least 
one shard that otherwise would have been fetched remotely 
will be read locally instead.

It basically trades RPC call overhead for reed-solomon. 
On distributed localhost this seems to be fairly break-even, 
with a very small gain in throughput and latency. 
However on networked servers this should be a bigger

1MB objects, before:

```
Operation: GET. Concurrency: 32. Hosts: 4.

Requests considered: 76257:
 * Avg: 25ms 50%: 24ms 90%: 32ms 99%: 42ms Fastest: 7ms Slowest: 67ms
 * First Byte: Average: 23ms, Median: 22ms, Best: 5ms, Worst: 65ms

Throughput:
* Average: 1213.68 MiB/s, 1272.63 obj/s (59.948s, starting 14:45:44 CEST)
```

After:
```
Operation: GET. Concurrency: 32. Hosts: 4.

Requests considered: 78845:
 * Avg: 24ms 50%: 24ms 90%: 31ms 99%: 39ms Fastest: 8ms Slowest: 62ms
 * First Byte: Average: 22ms, Median: 21ms, Best: 6ms, Worst: 57ms

Throughput:
* Average: 1255.11 MiB/s, 1316.08 obj/s (59.938s, starting 14:43:58 CEST)
```

Bonus fix: Only ask for heal once on an object.
2020-05-26 16:47:23 -07:00

154 lines
4.1 KiB
Go

/*
* MinIO Cloud Storage, (C) 2017 MinIO, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package cmd
import (
"context"
"sync"
"github.com/klauspost/reedsolomon"
"github.com/minio/minio/cmd/logger"
)
// Erasure - erasure encoding details.
type Erasure struct {
encoder func() reedsolomon.Encoder
dataBlocks, parityBlocks int
blockSize int64
}
// NewErasure creates a new ErasureStorage.
func NewErasure(ctx context.Context, dataBlocks, parityBlocks int, blockSize int64) (e Erasure, err error) {
e = Erasure{
dataBlocks: dataBlocks,
parityBlocks: parityBlocks,
blockSize: blockSize,
}
// Check the parameters for sanity now.
if dataBlocks <= 0 || parityBlocks <= 0 {
return e, reedsolomon.ErrInvShardNum
}
if dataBlocks+parityBlocks > 256 {
return e, reedsolomon.ErrMaxShardNum
}
// Encoder when needed.
var enc reedsolomon.Encoder
var once sync.Once
e.encoder = func() reedsolomon.Encoder {
once.Do(func() {
e, err := reedsolomon.New(dataBlocks, parityBlocks, reedsolomon.WithAutoGoroutines(int(e.ShardSize())))
if err != nil {
// Error conditions should be checked above.
panic(err)
}
enc = e
})
return enc
}
return
}
// EncodeData encodes the given data and returns the erasure-coded data.
// It returns an error if the erasure coding failed.
func (e *Erasure) EncodeData(ctx context.Context, data []byte) ([][]byte, error) {
if len(data) == 0 {
return make([][]byte, e.dataBlocks+e.parityBlocks), nil
}
encoded, err := e.encoder().Split(data)
if err != nil {
logger.LogIf(ctx, err)
return nil, err
}
if err = e.encoder().Encode(encoded); err != nil {
logger.LogIf(ctx, err)
return nil, err
}
return encoded, nil
}
// DecodeDataBlocks decodes the given erasure-coded data.
// It only decodes the data blocks but does not verify them.
// It returns an error if the decoding failed.
func (e *Erasure) DecodeDataBlocks(data [][]byte) error {
var isZero = 0
for _, b := range data[:] {
if len(b) == 0 {
isZero++
break
}
}
if isZero == 0 || isZero == len(data) {
// If all are zero, payload is 0 bytes.
return nil
}
return e.encoder().ReconstructData(data)
}
// DecodeDataAndParityBlocks decodes the given erasure-coded data and verifies it.
// It returns an error if the decoding failed.
func (e *Erasure) DecodeDataAndParityBlocks(ctx context.Context, data [][]byte) error {
needsReconstruction := false
for _, b := range data {
if b == nil {
needsReconstruction = true
break
}
}
if !needsReconstruction {
return nil
}
if err := e.encoder().Reconstruct(data); err != nil {
logger.LogIf(ctx, err)
return err
}
return nil
}
// ShardSize - returns actual shared size from erasure blockSize.
func (e *Erasure) ShardSize() int64 {
return ceilFrac(e.blockSize, int64(e.dataBlocks))
}
// ShardFileSize - returns final erasure size from original size.
func (e *Erasure) ShardFileSize(totalLength int64) int64 {
if totalLength == 0 {
return 0
}
if totalLength == -1 {
return -1
}
numShards := totalLength / e.blockSize
lastBlockSize := totalLength % int64(e.blockSize)
lastShardSize := ceilFrac(lastBlockSize, int64(e.dataBlocks))
return numShards*e.ShardSize() + lastShardSize
}
// ShardFileTillOffset - returns the effectiv eoffset where erasure reading begins.
func (e *Erasure) ShardFileTillOffset(startOffset, length, totalLength int64) int64 {
shardSize := e.ShardSize()
shardFileSize := e.ShardFileSize(totalLength)
endShard := (startOffset + int64(length)) / e.blockSize
tillOffset := endShard*shardSize + shardSize
if tillOffset > shardFileSize {
tillOffset = shardFileSize
}
return tillOffset
}