From ed676667d0932d9a5e7fc3c5641cf750b36af275 Mon Sep 17 00:00:00 2001 From: Harshavardhana Date: Thu, 6 Oct 2016 21:57:42 -0700 Subject: [PATCH] vendor: update reedsolomon package with new changes. (#2870) - Cached inverse matrices for better reconstruct performance. - New error reconstruction required is returned, helpful in initiating healing. --- .../klauspost/reedsolomon/inversion_tree.go | 160 ++++++++++++++++++ .../klauspost/reedsolomon/reedsolomon.go | 83 +++++++-- vendor/vendor.json | 5 +- 3 files changed, 228 insertions(+), 20 deletions(-) create mode 100644 vendor/github.com/klauspost/reedsolomon/inversion_tree.go diff --git a/vendor/github.com/klauspost/reedsolomon/inversion_tree.go b/vendor/github.com/klauspost/reedsolomon/inversion_tree.go new file mode 100644 index 000000000..c9d8ab2e7 --- /dev/null +++ b/vendor/github.com/klauspost/reedsolomon/inversion_tree.go @@ -0,0 +1,160 @@ +/** + * A thread-safe tree which caches inverted matrices. + * + * Copyright 2016, Peter Collins + */ + +package reedsolomon + +import ( + "errors" + "sync" +) + +// The tree uses a Reader-Writer mutex to make it thread-safe +// when accessing cached matrices and inserting new ones. +type inversionTree struct { + mutex *sync.RWMutex + root inversionNode +} + +type inversionNode struct { + matrix matrix + children []*inversionNode +} + +// newInversionTree initializes a tree for storing inverted matrices. +// Note that the root node is the identity matrix as it implies +// there were no errors with the original data. +func newInversionTree(dataShards, parityShards int) inversionTree { + identity, _ := identityMatrix(dataShards) + root := inversionNode{ + matrix: identity, + children: make([]*inversionNode, dataShards+parityShards), + } + return inversionTree{ + mutex: &sync.RWMutex{}, + root: root, + } +} + +// GetInvertedMatrix returns the cached inverted matrix or nil if it +// is not found in the tree keyed on the indices of invalid rows. +func (t inversionTree) GetInvertedMatrix(invalidIndices []int) matrix { + // Lock the tree for reading before accessing the tree. + t.mutex.RLock() + defer t.mutex.RUnlock() + + // If no invalid indices were give we should return the root + // identity matrix. + if len(invalidIndices) == 0 { + return t.root.matrix + } + + // Recursively search for the inverted matrix in the tree, passing in + // 0 as the parent index as we start at the root of the tree. + return t.root.getInvertedMatrix(invalidIndices, 0) +} + +// errAlreadySet is returned if the root node matrix is overwritten +var errAlreadySet = errors.New("the root node identity matrix is already set") + +// InsertInvertedMatrix inserts a new inverted matrix into the tree +// keyed by the indices of invalid rows. The total number of shards +// is required for creating the proper length lists of child nodes for +// each node. +func (t inversionTree) InsertInvertedMatrix(invalidIndices []int, matrix matrix, shards int) error { + // If no invalid indices were given then we are done because the + // root node is already set with the identity matrix. + if len(invalidIndices) == 0 { + return errAlreadySet + } + + if !matrix.IsSquare() { + return errNotSquare + } + + // Lock the tree for writing and reading before accessing the tree. + t.mutex.Lock() + defer t.mutex.Unlock() + + // Recursively create nodes for the inverted matrix in the tree until + // we reach the node to insert the matrix to. We start by passing in + // 0 as the parent index as we start at the root of the tree. + t.root.insertInvertedMatrix(invalidIndices, matrix, shards, 0) + + return nil +} + +func (n inversionNode) getInvertedMatrix(invalidIndices []int, parent int) matrix { + // Get the child node to search next from the list of children. The + // list of children starts relative to the parent index passed in + // because the indices of invalid rows is sorted (by default). As we + // search recursively, the first invalid index gets popped off the list, + // so when searching through the list of children, use that first invalid + // index to find the child node. + firstIndex := invalidIndices[0] + node := n.children[firstIndex-parent] + + // If the child node doesn't exist in the list yet, fail fast by + // returning, so we can construct and insert the proper inverted matrix. + if node == nil { + return nil + } + + // If there's more than one invalid index left in the list we should + // keep searching recursively. + if len(invalidIndices) > 1 { + // Search recursively on the child node by passing in the invalid indices + // with the first index popped off the front. Also the parent index to + // pass down is the first index plus one. + return node.getInvertedMatrix(invalidIndices[1:], firstIndex+1) + } + // If there aren't any more invalid indices to search, we've found our + // node. Return it, however keep in mind that the matrix could still be + // nil because intermediary nodes in the tree are created sometimes with + // their inversion matrices uninitialized. + return node.matrix +} + +func (n inversionNode) insertInvertedMatrix(invalidIndices []int, matrix matrix, shards, parent int) { + // As above, get the child node to search next from the list of children. + // The list of children starts relative to the parent index passed in + // because the indices of invalid rows is sorted (by default). As we + // search recursively, the first invalid index gets popped off the list, + // so when searching through the list of children, use that first invalid + // index to find the child node. + firstIndex := invalidIndices[0] + node := n.children[firstIndex-parent] + + // If the child node doesn't exist in the list yet, create a new + // node because we have the writer lock and add it to the list + // of children. + if node == nil { + // Make the length of the list of children equal to the number + // of shards minus the first invalid index because the list of + // invalid indices is sorted, so only this length of errors + // are possible in the tree. + node = &inversionNode{ + children: make([]*inversionNode, shards-firstIndex), + } + // Insert the new node into the tree at the first index relative + // to the parent index that was given in this recursive call. + n.children[firstIndex-parent] = node + } + + // If there's more than one invalid index left in the list we should + // keep searching recursively in order to find the node to add our + // matrix. + if len(invalidIndices) > 1 { + // As above, search recursively on the child node by passing in + // the invalid indices with the first index popped off the front. + // Also the total number of shards and parent index are passed down + // which is equal to the first index plus one. + node.insertInvertedMatrix(invalidIndices[1:], matrix, shards, firstIndex+1) + } else { + // If there aren't any more invalid indices to search, we've found our + // node. Cache the inverted matrix in this node. + node.matrix = matrix + } +} diff --git a/vendor/github.com/klauspost/reedsolomon/reedsolomon.go b/vendor/github.com/klauspost/reedsolomon/reedsolomon.go index b89a1dcd7..914ebe0ad 100644 --- a/vendor/github.com/klauspost/reedsolomon/reedsolomon.go +++ b/vendor/github.com/klauspost/reedsolomon/reedsolomon.go @@ -81,6 +81,7 @@ type reedSolomon struct { ParityShards int // Number of parity shards, should not be modified. Shards int // Total number of shards. Calculated, and should not be modified. m matrix + tree inversionTree parity [][]byte } @@ -128,6 +129,13 @@ func New(dataShards, parityShards int) (Encoder, error) { top, _ = top.Invert() r.m, _ = vm.Multiply(top) + // Inverted matrices are cached in a tree keyed by the indices + // of the invalid rows of the data to reconstruct. + // The inversion root node will have the identity matrix as + // its inversion matrix because it implies there are no errors + // with the original data. + r.tree = newInversionTree(dataShards, parityShards) + r.parity = make([][]byte, parityShards) for i := range r.parity { r.parity[i] = r.m[dataShards+i] @@ -380,36 +388,61 @@ func (r reedSolomon) Reconstruct(shards [][]byte) error { return ErrTooFewShards } - // Pull out the rows of the matrix that correspond to the - // shards that we have and build a square matrix. This - // matrix could be used to generate the shards that we have - // from the original data. - // - // Also, pull out an array holding just the shards that + // Pull out an array holding just the shards that // correspond to the rows of the submatrix. These shards // will be the input to the decoding process that re-creates // the missing data shards. - subMatrix, _ := newMatrix(r.DataShards, r.DataShards) + // + // Also, create an array of indices of the valid rows we do have + // and the invalid rows we don't have up until we have enough valid rows. subShards := make([][]byte, r.DataShards) + validIndices := make([]int, r.DataShards) + invalidIndices := make([]int, 0) subMatrixRow := 0 for matrixRow := 0; matrixRow < r.Shards && subMatrixRow < r.DataShards; matrixRow++ { if len(shards[matrixRow]) != 0 { - for c := 0; c < r.DataShards; c++ { - subMatrix[subMatrixRow][c] = r.m[matrixRow][c] - } subShards[subMatrixRow] = shards[matrixRow] + validIndices[subMatrixRow] = matrixRow subMatrixRow++ + } else { + invalidIndices = append(invalidIndices, matrixRow) } } - // Invert the matrix, so we can go from the encoded shards - // back to the original data. Then pull out the row that - // generates the shard that we want to decode. Note that - // since this matrix maps back to the original data, it can - // be used to create a data shard, but not a parity shard. - dataDecodeMatrix, err := subMatrix.Invert() - if err != nil { - return err + // Attempt to get the cached inverted matrix out of the tree + // based on the indices of the invalid rows. + dataDecodeMatrix := r.tree.GetInvertedMatrix(invalidIndices) + + // If the inverted matrix isn't cached in the tree yet we must + // construct it ourselves and insert it into the tree for the + // future. In this way the inversion tree is lazily loaded. + if dataDecodeMatrix == nil { + // Pull out the rows of the matrix that correspond to the + // shards that we have and build a square matrix. This + // matrix could be used to generate the shards that we have + // from the original data. + subMatrix, _ := newMatrix(r.DataShards, r.DataShards) + for subMatrixRow, validIndex := range validIndices { + for c := 0; c < r.DataShards; c++ { + subMatrix[subMatrixRow][c] = r.m[validIndex][c] + } + } + // Invert the matrix, so we can go from the encoded shards + // back to the original data. Then pull out the row that + // generates the shard that we want to decode. Note that + // since this matrix maps back to the original data, it can + // be used to create a data shard, but not a parity shard. + dataDecodeMatrix, err = subMatrix.Invert() + if err != nil { + return err + } + + // Cache the inverted matrix in the tree for future use keyed on the + // indices of the invalid rows. + err = r.tree.InsertInvertedMatrix(invalidIndices, dataDecodeMatrix, r.Shards) + if err != nil { + return err + } } // Re-create any data shards that were missing. @@ -487,12 +520,18 @@ func (r reedSolomon) Split(data []byte) ([][]byte, error) { return dst, nil } +// ErrReconstructRequired is returned if too few data shards are intact and a +// reconstruction is required before you can successfully join the shards. +var ErrReconstructRequired = errors.New("reconstruction required as one or more required data shards are nil") + // Join the shards and write the data segment to dst. // // Only the data shards are considered. // You must supply the exact output size you want. +// // If there are to few shards given, ErrTooFewShards will be returned. // If the total data size is less than outSize, ErrShortData will be returned. +// If one or more required data shards are nil, ErrReconstructRequired will be returned. func (r reedSolomon) Join(dst io.Writer, shards [][]byte, outSize int) error { // Do we have enough shards? if len(shards) < r.DataShards { @@ -503,7 +542,15 @@ func (r reedSolomon) Join(dst io.Writer, shards [][]byte, outSize int) error { // Do we have enough data? size := 0 for _, shard := range shards { + if shard == nil { + return ErrReconstructRequired + } size += len(shard) + + // Do we have enough data already? + if size >= outSize { + break + } } if size < outSize { return ErrShortData diff --git a/vendor/vendor.json b/vendor/vendor.json index 3035ba843..ec3e7ced1 100644 --- a/vendor/vendor.json +++ b/vendor/vendor.json @@ -73,9 +73,10 @@ "revisionTime": "2016-01-06T11:44:51+01:00" }, { + "checksumSHA1": "XRii0aDqXZvztXflEB2EE9TRoks=", "path": "github.com/klauspost/reedsolomon", - "revision": "467733eb9c209042fb37d2074a5b6be33a529be0", - "revisionTime": "2016-07-06T21:06:00+02:00" + "revision": "c54154da9e35cab25232314cf69ab9d78447f9a5", + "revisionTime": "2016-09-12T19:31:07Z" }, { "checksumSHA1": "dNYxHiBLalTqluak2/Z8c3RsSEM=",