/* * Minio Cloud Storage, (C) 2016 Minio, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package main import ( "encoding/json" "fmt" "path" "sort" "sync" "time" ) const ( // Erasure related constants. erasureAlgorithmKlauspost = "klauspost/reedsolomon/vandermonde" erasureAlgorithmISAL = "isa-l/reedsolomon/cauchy" ) // objectPartInfo Info of each part kept in the multipart metadata // file after CompleteMultipartUpload() is called. type objectPartInfo struct { Number int `json:"number"` Name string `json:"name"` ETag string `json:"etag"` Size int64 `json:"size"` } // byObjectPartNumber is a collection satisfying sort.Interface. type byObjectPartNumber []objectPartInfo func (t byObjectPartNumber) Len() int { return len(t) } func (t byObjectPartNumber) Swap(i, j int) { t[i], t[j] = t[j], t[i] } func (t byObjectPartNumber) Less(i, j int) bool { return t[i].Number < t[j].Number } // checkSumInfo - carries checksums of individual part. type checkSumInfo struct { Name string `json:"name"` Algorithm string `json:"algorithm"` Hash string `json:"hash"` } // A xlMetaV1 represents a metadata header mapping keys to sets of values. type xlMetaV1 struct { Version string `json:"version"` Format string `json:"format"` Stat struct { Size int64 `json:"size"` ModTime time.Time `json:"modTime"` Version int64 `json:"version"` } `json:"stat"` Erasure struct { Algorithm string `json:"algorithm"` DataBlocks int `json:"data"` ParityBlocks int `json:"parity"` BlockSize int64 `json:"blockSize"` Index int `json:"index"` Distribution []int `json:"distribution"` Checksum []checkSumInfo `json:"checksum,omitempty"` } `json:"erasure"` Minio struct { Release string `json:"release"` } `json:"minio"` Meta map[string]string `json:"meta"` Parts []objectPartInfo `json:"parts,omitempty"` } // newXLMetaV1 - initializes new xlMetaV1. func newXLMetaV1(dataBlocks, parityBlocks int) (xlMeta xlMetaV1) { xlMeta = xlMetaV1{} xlMeta.Version = "1" xlMeta.Format = "xl" xlMeta.Minio.Release = minioReleaseTag xlMeta.Erasure.Algorithm = erasureAlgorithmKlauspost xlMeta.Erasure.DataBlocks = dataBlocks xlMeta.Erasure.ParityBlocks = parityBlocks xlMeta.Erasure.BlockSize = blockSizeV1 xlMeta.Erasure.Distribution = randInts(dataBlocks + parityBlocks) return xlMeta } // IsValid - is validate tells if the format is sane. func (m xlMetaV1) IsValid() bool { return m.Version == "1" && m.Format == "xl" } // ObjectPartIndex - returns the index of matching object part number. func (m xlMetaV1) ObjectPartIndex(partNumber int) (index int) { for i, part := range m.Parts { if partNumber == part.Number { index = i return index } } return -1 } // ObjectCheckIndex - returns the checksum for the part name from the checksum slice. func (m xlMetaV1) PartObjectChecksum(partNumber int) checkSumInfo { partName := fmt.Sprintf("object%d", partNumber) for _, checksum := range m.Erasure.Checksum { if checksum.Name == partName { return checksum } } return checkSumInfo{} } // AddObjectPart - add a new object part in order. func (m *xlMetaV1) AddObjectPart(partNumber int, partName string, partETag string, partSize int64) { partInfo := objectPartInfo{ Number: partNumber, Name: partName, ETag: partETag, Size: partSize, } // Update part info if it already exists. for i, part := range m.Parts { if partNumber == part.Number { m.Parts[i] = partInfo return } } // Proceed to include new part info. m.Parts = append(m.Parts, partInfo) // Parts in xlMeta should be in sorted order by part number. sort.Sort(byObjectPartNumber(m.Parts)) } // ObjectToPartOffset - translate offset of an object to offset of its individual part. func (m xlMetaV1) ObjectToPartOffset(offset int64) (partIndex int, partOffset int64, err error) { partOffset = offset // Seek until object offset maps to a particular part offset. for i, part := range m.Parts { partIndex = i // Last part can be of '0' bytes, treat it specially and // return right here. if part.Size == 0 { return partIndex, partOffset, nil } // Offset is smaller than size we have reached the proper part offset. if partOffset < part.Size { return partIndex, partOffset, nil } // Continue to towards the next part. partOffset -= part.Size } // Offset beyond the size of the object return InvalidRange. return 0, 0, InvalidRange{} } // pickValidXLMeta - picks one valid xlMeta content and returns from a // slice of xlmeta content. If no value is found this function panics // and dies. func pickValidXLMeta(xlMetas []xlMetaV1) xlMetaV1 { for _, xlMeta := range xlMetas { if xlMeta.IsValid() { return xlMeta } } panic("Unable to look for valid XL metadata content") } // readXLMetadata - returns the object metadata `xl.json` content from // one of the disks picked at random. func (xl xlObjects) readXLMetadata(bucket, object string) (xlMeta xlMetaV1, err error) { // Count for errors encountered. var xlJSONErrCount = 0 // Return the first successful lookup from a random list of disks. for xlJSONErrCount < len(xl.storageDisks) { disk := xl.getRandomDisk() // Choose a random disk on each attempt. var buffer []byte buffer, err = readAll(disk, bucket, path.Join(object, xlMetaJSONFile)) if err == nil { err = json.Unmarshal(buffer, &xlMeta) if err == nil { if xlMeta.IsValid() { return xlMeta, nil } err = errDataCorrupt } } xlJSONErrCount++ // Update error count. } return xlMetaV1{}, err } // renameXLMetadata - renames `xl.json` from source prefix to destination prefix. func (xl xlObjects) renameXLMetadata(srcBucket, srcPrefix, dstBucket, dstPrefix string) error { var wg = &sync.WaitGroup{} var mErrs = make([]error, len(xl.storageDisks)) srcJSONFile := path.Join(srcPrefix, xlMetaJSONFile) dstJSONFile := path.Join(dstPrefix, xlMetaJSONFile) // Rename `xl.json` to all disks in parallel. for index, disk := range xl.storageDisks { wg.Add(1) // Rename `xl.json` in a routine. go func(index int, disk StorageAPI) { defer wg.Done() // Renames `xl.json` from source prefix to destination prefix. rErr := disk.RenameFile(srcBucket, srcJSONFile, dstBucket, dstJSONFile) if rErr != nil { mErrs[index] = rErr return } // Delete any dangling directories. dErr := disk.DeleteFile(srcBucket, srcPrefix) if dErr != nil { mErrs[index] = dErr return } mErrs[index] = nil }(index, disk) } // Wait for all the routines. wg.Wait() // Return the first error. for _, err := range mErrs { if err == nil { continue } return err } return nil } // writeXLMetadata - writes `xl.json` to a single disk. func writeXLMetadata(disk StorageAPI, bucket, prefix string, xlMeta xlMetaV1) error { jsonFile := path.Join(prefix, xlMetaJSONFile) // Marshal json. metadataBytes, err := json.Marshal(&xlMeta) if err != nil { return err } // Persist marshalled data. n, err := disk.AppendFile(bucket, jsonFile, metadataBytes) if err != nil { return err } if n != int64(len(metadataBytes)) { return errUnexpected } return nil } // checkSumAlgorithm - get the algorithm required for checksum // verification for a given part. Allocates a new hash and returns. func checkSumAlgorithm(xlMeta xlMetaV1, partIdx int) string { partCheckSumInfo := xlMeta.PartObjectChecksum(partIdx) return partCheckSumInfo.Algorithm } // xlMetaPartBlockChecksums - get block checksums for a given part. func (xl xlObjects) metaPartBlockChecksums(xlMetas []xlMetaV1, partIdx int) (blockCheckSums []string) { for index := range xl.storageDisks { // Save the read checksums for a given part. blockCheckSums = append(blockCheckSums, xlMetas[index].PartObjectChecksum(partIdx).Hash) } return blockCheckSums } // writeUniqueXLMetadata - writes unique `xl.json` content for each disk in order. func (xl xlObjects) writeUniqueXLMetadata(bucket, prefix string, xlMetas []xlMetaV1) error { var wg = &sync.WaitGroup{} var mErrs = make([]error, len(xl.storageDisks)) // Start writing `xl.json` to all disks in parallel. for index, disk := range xl.storageDisks { wg.Add(1) // Write `xl.json` in a routine. go func(index int, disk StorageAPI) { defer wg.Done() // Pick one xlMeta for a disk at index. xlMetas[index].Erasure.Index = index + 1 // Write unique `xl.json` for a disk at index. if err := writeXLMetadata(disk, bucket, prefix, xlMetas[index]); err != nil { mErrs[index] = err return } mErrs[index] = nil }(index, disk) } // Wait for all the routines. wg.Wait() // Return the first error. for _, err := range mErrs { if err == nil { continue } return err } return nil } // writeXLMetadata - write `xl.json` on all disks in order. func (xl xlObjects) writeXLMetadata(bucket, prefix string, xlMeta xlMetaV1) error { var wg = &sync.WaitGroup{} var mErrs = make([]error, len(xl.storageDisks)) // Start writing `xl.json` to all disks in parallel. for index, disk := range xl.storageDisks { wg.Add(1) // Write `xl.json` in a routine. go func(index int, disk StorageAPI, metadata xlMetaV1) { defer wg.Done() // Save the disk order index. metadata.Erasure.Index = index + 1 // Write xl metadata. if err := writeXLMetadata(disk, bucket, prefix, metadata); err != nil { mErrs[index] = err return } mErrs[index] = nil }(index, disk, xlMeta) } // Wait for all the routines. wg.Wait() // Return the first error. for _, err := range mErrs { if err == nil { continue } return err } return nil }