/* * Minio Cloud Storage, (C) 2016 Minio, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package cmd import ( "io" "github.com/minio/minio/pkg/errors" ) type errIdx struct { idx int err error } func (s ErasureStorage) readConcurrent(volume, path string, offset, length int64, verifiers []*BitrotVerifier) (buffers [][]byte, needsReconstruction bool, err error) { errChan := make(chan errIdx) stageBuffers := make([][]byte, len(s.disks)) buffers = make([][]byte, len(s.disks)) readDisk := func(i int) { stageBuffers[i] = make([]byte, length) disk := s.disks[i] if disk == OfflineDisk { errChan <- errIdx{i, errors.Trace(errDiskNotFound)} return } _, rerr := disk.ReadFile(volume, path, offset, stageBuffers[i], verifiers[i]) errChan <- errIdx{i, rerr} } var finishedCount, successCount, launchIndex int for ; launchIndex < s.dataBlocks; launchIndex++ { go readDisk(launchIndex) } for finishedCount < launchIndex { select { case errVal := <-errChan: finishedCount++ if errVal.err != nil { // TODO: meaningfully log the disk read error // A disk failed to return data, so we // request an additional disk if possible if launchIndex < s.dataBlocks+s.parityBlocks { needsReconstruction = true // requiredBlocks++ go readDisk(launchIndex) launchIndex++ } } else { successCount++ buffers[errVal.idx] = stageBuffers[errVal.idx] stageBuffers[errVal.idx] = nil } } } if successCount != s.dataBlocks { // Not enough disks returns data. err = errors.Trace(errXLReadQuorum) } return } // ReadFile reads as much data as requested from the file under the // given volume and path and writes the data to the provided writer. // The algorithm and the keys/checksums are used to verify the // integrity of the given file. ReadFile will read data from the given // offset up to the given length. If parts of the file are corrupted // ReadFile tries to reconstruct the data. func (s ErasureStorage) ReadFile(writer io.Writer, volume, path string, offset, length, totalLength int64, checksums [][]byte, algorithm BitrotAlgorithm, blocksize int64) (f ErasureFileInfo, err error) { if offset < 0 || length < 0 { return f, errors.Trace(errUnexpected) } if offset+length > totalLength { return f, errors.Trace(errUnexpected) } if !algorithm.Available() { return f, errors.Trace(errBitrotHashAlgoInvalid) } f.Checksums = make([][]byte, len(s.disks)) verifiers := make([]*BitrotVerifier, len(s.disks)) for i, disk := range s.disks { if disk == OfflineDisk { continue } verifiers[i] = NewBitrotVerifier(algorithm, checksums[i]) } chunksize := ceilFrac(blocksize, int64(s.dataBlocks)) // We read all whole-blocks of erasure coded data containing // the requested data range. // // The start index of the erasure coded block containing the // `offset` byte of data is: partDataStartIndex := (offset / blocksize) * chunksize // The start index of the erasure coded block containing the // (last) byte of data at the index `offset + length - 1` is: blockStartIndex := ((offset + length - 1) / blocksize) * chunksize // However, we need the end index of the e.c. block containing // the last byte - we need to check if that block is the last // block in the part (in that case, it may be have a different // chunk size) isLastBlock := (totalLength-1)/blocksize == (offset+length-1)/blocksize var partDataEndIndex int64 if isLastBlock { lastBlockChunkSize := chunksize if totalLength%blocksize != 0 { lastBlockChunkSize = ceilFrac(totalLength%blocksize, int64(s.dataBlocks)) } partDataEndIndex = blockStartIndex + lastBlockChunkSize - 1 } else { partDataEndIndex = blockStartIndex + chunksize - 1 } // Thus, the length of data to be read from the part file(s) is: partDataLength := partDataEndIndex - partDataStartIndex + 1 // The calculation above does not apply when length == 0: if length == 0 { partDataLength = 0 } var buffers [][]byte var needsReconstruction bool buffers, needsReconstruction, err = s.readConcurrent(volume, path, partDataStartIndex, partDataLength, verifiers) if err != nil { // Could not read enough disks. return } numChunks := ceilFrac(partDataLength, chunksize) blocks := make([][]byte, len(s.disks)) if needsReconstruction && numChunks > 1 { // Allocate once for all the equal length blocks. The // last block may have a different length - allocation // for this happens inside the for loop below. for i := range blocks { if len(buffers[i]) == 0 { blocks[i] = make([]byte, chunksize) } } } var buffOffset int64 for chunkNumber := int64(0); chunkNumber < numChunks; chunkNumber++ { if chunkNumber == numChunks-1 && partDataLength%chunksize != 0 { chunksize = partDataLength % chunksize // We allocate again as the last chunk has a // different size. for i := range blocks { if len(buffers[i]) == 0 { blocks[i] = make([]byte, chunksize) } } } for i := range blocks { if len(buffers[i]) == 0 { blocks[i] = blocks[i][0:0] } } for i := range blocks { if len(buffers[i]) != 0 { blocks[i] = buffers[i][buffOffset : buffOffset+chunksize] } } buffOffset += chunksize if needsReconstruction { if err = s.ErasureDecodeDataBlocks(blocks); err != nil { return f, errors.Trace(err) } } var writeStart int64 if chunkNumber == 0 { writeStart = offset % blocksize } writeLength := blocksize - writeStart if chunkNumber == numChunks-1 { lastBlockLength := (offset + length) % blocksize if lastBlockLength != 0 { writeLength = lastBlockLength - writeStart } } n, err := writeDataBlocks(writer, blocks, s.dataBlocks, writeStart, writeLength) if err != nil { return f, err } f.Size += n } f.Algorithm = algorithm for i, disk := range s.disks { if disk == OfflineDisk || buffers[i] == nil { continue } f.Checksums[i] = verifiers[i].Sum(nil) } return f, nil }